Skip to content

Commit

Permalink
- Philips formats 100-125: further improvements/fixes for decoding no…
Browse files Browse the repository at this point in the history
…n-latin characters

- Sony Bravia 7 (2024) sdb.xml: support to read it as a reference list (the TV will ignore any edits to this file)
  • Loading branch information
PredatH0r committed Oct 2, 2024
1 parent 8480fcf commit 1904e97
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 67 deletions.
66 changes: 41 additions & 25 deletions source/ChanSort.Loader.MediaTek/Serializer.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Collections.Generic;
using System.IO;
using System.IO;
using System.Linq;
using System.Text;
using System.Xml;
Expand All @@ -11,8 +10,12 @@ namespace ChanSort.Loader.MediaTek;
public class Serializer : SerializerBase
{
/*
* Some Android based TVs export (in addition to the brand specific channel list files) a file named MtkChannelList.xml
* Examples are Philips channel list formats 120 and 125
* Some Android based TVs export an XML file with the format described below.
* Examples are Philips channel list formats 120 and 125 and Sony BRAVIA 7 (2024).
* However there are differences between Philips and Sony:
* - Sony lacks a number of XML elements
* - Sony seems to manage TV, Radio and Data channels internally in separate lists, all starting at 1, while Philips seems to use one combined list with no duplicate major_channel_numbers
*
* <service_list_transfer>
* <service_list_infos>
* <service_list_info service_list_id="...">
Expand All @@ -26,6 +29,9 @@ public class Serializer : SerializerBase
* <std_stream_component_type>
* <record_id>service://SERVICE_LIST_GENERAL_SATELLITE/[service_list_id]/[major_channel_number]
* <visible_service>
*
* The following elements exist in the Philips lists but not in the Sony's sdb.xml
*
* <service_id> SID
* <transport_stream_id> TSID
* <network_id> NID
Expand All @@ -47,13 +53,13 @@ public class Serializer : SerializerBase
private byte[] content;
private string textContent;
private readonly StringBuilder fileInfo = new();

private readonly Dictionary<string, ChannelList> listsById = new();
private readonly bool splitTvRadioData;


#region ctor()
public Serializer(string inputFile) : base(inputFile)
public Serializer(string inputFile, bool separateTvRadioData = false) : base(inputFile)
{
this.splitTvRadioData = separateTvRadioData;
this.Features.ChannelNameEdit = ChannelNameEditMode.All;
this.Features.DeleteMode = DeleteMode.NotSupported;
this.Features.FavoritesMode = FavoritesMode.None;
Expand Down Expand Up @@ -134,7 +140,7 @@ private void ReadServiceListInfos(XmlNode serviceListInfosNode)
#region ReadServiceList()
private void ReadServiceList(XmlElement node)
{
SignalSource ss = SignalSource.Tv | SignalSource.Radio | SignalSource.Data | SignalSource.Dvb;
var ss = SignalSource.Dvb;
var slt = node.GetAttribute("service_list_type");
if (slt.Contains("SATELLITE"))
ss |= SignalSource.Sat;
Expand All @@ -143,29 +149,23 @@ private void ReadServiceList(XmlElement node)
else if (slt.Contains("TERR"))
ss |= SignalSource.Antenna;


// service_list_id example: SERVICE_LIST_GENERAL_SATELLITE/17
var serviceListId = node.GetAttribute("service_list_id");

var list = new ChannelList(ss, serviceListId);
this.listsById[serviceListId] = list;
//var serviceListId = node.GetAttribute("service_list_id");

int idx = 0;
foreach (var child in node.ChildNodes)
{
if (!(child is XmlElement si && si.LocalName == "service_info"))
continue;

ReadChannel(si, ss, idx++, list);
ReadChannel(si, ss, idx++);
}

this.DataRoot.AddChannelList(list);
}
#endregion

#region ReadChannel()

private ChannelInfo ReadChannel(XmlElement si, SignalSource ss, int idx, ChannelList list)
private void ReadChannel(XmlElement si, SignalSource ss, int idx)
{
// record_id example: service://SERVICE_LIST_GENERAL_SATELLITE/17/1
var recIdUri = si.GetElementString("record_id") ?? "";
Expand Down Expand Up @@ -197,13 +197,28 @@ private ChannelInfo ReadChannel(XmlElement si, SignalSource ss, int idx, Channel
else if ((ss & SignalSource.Cable) != 0)
chan.ChannelOrTransponder = LookupData.Instance.GetDvbcTransponder(chan.FreqInMhz).ToString();

var elements = si.GetElementsByTagName("major_channel_number", si.NamespaceURI);
list.ReadOnly |= elements.Count == 1 && elements[0].Attributes["editable", si.NamespaceURI].InnerText == "false";

list.AddChannel(chan);
if (splitTvRadioData)
ss |= LookupData.Instance.IsRadioTvOrData(chan.ServiceType);
else
ss |= SignalSource.Tv | SignalSource.Radio | SignalSource.Data;


return chan;
var list = DataRoot.GetChannelList(ss);
if (list == null)
{
var name = (ss & SignalSource.Antenna) != 0 ? "Antenna" : (ss & SignalSource.Cable) != 0 ? "Cable" : (ss & SignalSource.Sat) != 0 ? "Sat" : (ss & SignalSource.Ip) != 0 ? "IP" : "Other";
if (splitTvRadioData)
name += " " + ((ss & SignalSource.Tv) != 0 ? " TV" : (ss & SignalSource.Radio) != 0 ? " Radio" : " Data");

list = new ChannelList(ss, name);
this.DataRoot.AddChannelList(list);
}

var elements = si.GetElementsByTagName("major_channel_number", si.NamespaceURI);
list.ReadOnly |= elements.Count == 1 && elements[0].Attributes!["editable", si.NamespaceURI].InnerText == "false";

list.AddChannel(chan);
}
#endregion

Expand Down Expand Up @@ -231,10 +246,11 @@ public override void Save()
continue;

var si = ch.Xml;
si["major_channel_number"].InnerText = ch.NewProgramNr.ToString();
si["service_name"].InnerText = ch.Name;
si["lock"].InnerText = ch.Lock ? "1" : "0";
si["visible_service"].InnerText = ch.Hidden ? "1" : "3";
si["major_channel_number"]!.InnerText = ch.NewProgramNr.ToString();
si["service_name"]!.InnerText = ch.Name;
si["visible_service"]!.InnerText = ch.Hidden ? "1" : "3";
if (si["lock"] != null) // Sony lists don't have this elements
si["lock"].InnerText = ch.Lock ? "1" : "0";
}
}

Expand Down
95 changes: 60 additions & 35 deletions source/ChanSort.Loader.Philips/XmlSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -591,74 +591,99 @@ private string DecodeName(string input, NameType nameType)
return input;

// The Philips encodes names is a complete mess.
// Each character is represented as two bytes, with the low byte first and the high second, but this isn't utf16.
// All observed files have the "high" byte always as 0x00
// If looking only at the odd bytes, this can either be encoded in some random locale, a valid utf8 sequence or 1 byte characters mixed with big-endian utf16 double-bytes characters.
// Two hex digits are combined to an integer with the low byte first and the high byte second.
// If the high byte is 0xFF, it is treated as if it were 0x00.
// If any of the high bytes is non-zero, the encoding is little-endian utf16.
// If all high bytes are 0, some guesswork is needed to decode the sequence of odd bytes. It can be encoded as
// - a valid utf8 sequence
// - 1 byte per character in some undetermined locale
// - 1 byte per character mixed with 2 bytes big-endian utf16 (v125)

// according to https://github.com/PredatH0r/ChanSort/issues/347 Philips seems use a locale dependent encoding for favorite list names,
// writing "0xAA 0x00" to the file for an 8 bit code point. Congratulations, well done!

// In version 120/125 umlauts in channel names are encoded as 1 byte CP-1252/UTF16 code point + 0xFF as the second byte (i.e. for "Ä" it is 0xC4 0xFF instead of 0xC4 0x00)
// Also: 0x62 0x00 0x65 0x00 0x49 0x00 0x4e 0x00 0x20 0x00 0x01 0x00 0x30 0x00 0x5a - here 0x01 0x00 0x30 0x00 refers to U+0130 (the upper case I with dot), in "beIN İZ"
// Version 100: CAN use little-endian UTF16: 0x11 0x04 0x35 0x04 0x3B 0x04 0x30 0x04 0x40 0x04 0x43 0x04 0x41 0x04 0x4C 0x04 0x20 0x00 0x31 0x00 0x20 0x00 0x48 0x00 0x44 0x00 for "Беларусь 1 HD"
// also 100: odd bytes contain UTF8, high are all 0: https://github.com/PredatH0r/ChanSort/issues/421:
// 0x38 0x00 0x20 0x00 0xD0 0x00 0xBA 0x00 0xD0 0x00 0xB0 0x00 0xD0 0x00 0xBD 0x00 0xD0 0x00 0xB0 0x00 0xD0 0x00 0xBB 0x00 0x20 0x00 0x48 0x00 0x44 0x00 for "8 канал HD"

// https://github.com/PredatH0r/ChanSort/issues/421: 0x38 0x00 0x20 0x00 0xD0 0x00 0xBA ... seems to contain cyrillic UTF-8 encoding in channel names instead of UTF-16
// Version 120/125: Umlauts in channel names are encoded as 1 byte CP-1252/UTF16 code point + 0xFF as the second byte (i.e. for "Ä" it is 0xC4 0xFF instead of 0xC4 0x00)
// Also 125: 0x62 0x00 0x65 0x00 0x49 0x00 0x4e 0x00 0x20 0x00 0x01 0x00 0x30 0x00 0x5a for "beIN İZ" where 0x01 0x00 0x30 0x00 refers to U+0130 "İ"


var hexParts = input.Split(' ');
var utf16 = new MemoryStream();
var utf8 = new MemoryStream();

bool highByte = false;
bool isHighByte = false;
int intValue = 0;
bool invalidUtf8 = false;
byte bigEndianUnicodeHighByte = 0;
int bigEndianUnicodeIndex = -1;
bool isBigEndianUtf16InOddBytes = false;
var hexParts = input.Split(' ');
foreach (var part in hexParts)
{
if (part == "")
continue;
var val = (byte)ParseInt(part);
invalidUtf8 |= highByte && val != 0;
if (highByte && val == 0xff) // hack-around for version 120
val = 0;
var curByte = (byte)ParseInt(part);

if (!isHighByte)
{
intValue = curByte;
isHighByte = true;
continue;
}

isHighByte = false;
if (curByte == 0xff) // hack-around for version 120 where 0xFFxx is actually a CP1252 code point xx
curByte = 0;

intValue += curByte << 8;
invalidUtf8 |= curByte != 0;

if (intValue == 0) // break when reaching a 0x00 0x00 sequence
break;

if (!invalidUtf8)
utf8.WriteByte((byte)intValue);

if (bigEndianUnicodeIndex >= 0) // special handling when a character < 32 was detected, which means we have a messed up "HI 00 LO 00" encoding for an UTF16 character (where HI is < 32)
if (isBigEndianUtf16InOddBytes) // special handling when a character < 32 was detected, which means we may have a "HI 00 LO 00" encoding for a UTF16 character
{
++bigEndianUnicodeIndex;
if (bigEndianUnicodeIndex == 2)
if (curByte == 0) // expected case where LO is followed by 00
{
utf16.WriteByte(val);
utf16.WriteByte((byte)intValue);
utf16.WriteByte(bigEndianUnicodeHighByte);
bigEndianUnicodeHighByte = 0;
}
else if (bigEndianUnicodeIndex == 3)
bigEndianUnicodeIndex = -1;
else // fallback to write full 4 byte sequence
{
utf16.WriteByte(bigEndianUnicodeHighByte);
utf16.WriteByte(0);
utf16.WriteByte((byte)(intValue >> 8));
utf16.WriteByte(curByte);
}

isBigEndianUtf16InOddBytes = false;
}
else
{
if (!highByte)
if (intValue < 32) // an int < 32 is likely the high byte of a "HI 00 LO 00" encoded UTF16 character
{
if (val < 32 && val != 0) // a char < 32 is likely the high byte of a "HI 00 LO 00" encoded UTF16 character
{
bigEndianUnicodeHighByte = val;
bigEndianUnicodeIndex = 0;
invalidUtf8 = true;
}
else if (!invalidUtf8)
utf8.WriteByte(val);
isBigEndianUtf16InOddBytes = true;
bigEndianUnicodeHighByte = (byte)intValue;
invalidUtf8 = true;
}
else
{
utf16.WriteByte((byte)(intValue & 0xFF));
utf16.WriteByte(curByte);
}
if (bigEndianUnicodeIndex < 0)
utf16.WriteByte(val);
}


highByte = !highByte;
}

// in the FavList the name can be a random locale based on the country setting (other than CP-1252 or U-0000-00FF)
// in the FavList the name can be a random locale based on the country setting (other than CP-1252 or U-0000-00FF, i.e. turkish)
if (nameType == NameType.FavList)
return this.DefaultEncoding.GetString(utf8.GetBuffer(), 0, (int)utf8.Length).TrimGarbage();

// e.g. for cyrillic names, where only the low-byte is used for an utf8 encoding while the high-byte is always 0
// best-effort utf8 decoding
if (!invalidUtf8 && Tools.IsUtf8(utf8.GetBuffer(), 0, (int)utf8.Length))
return Encoding.UTF8.GetString(utf8.GetBuffer(), 0, (int)utf8.Length).TrimGarbage();

Expand Down
1 change: 1 addition & 0 deletions source/ChanSort.Loader.Sony/ChanSort.Loader.Sony.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\ChanSort.Api\ChanSort.Api.csproj" />
<ProjectReference Include="..\ChanSort.Loader.MediaTek\ChanSort.Loader.MediaTek.csproj" />
</ItemGroup>
<ItemGroup>
<None Update="ChanSort.Loader.Sony.ini">
Expand Down
11 changes: 10 additions & 1 deletion source/ChanSort.Loader.Sony/SonyPlugin.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using ChanSort.Api;
using System.IO;
using ChanSort.Api;

namespace ChanSort.Loader.Sony
{
Expand All @@ -10,6 +11,14 @@ public class SonyPlugin : ISerializerPlugin

public SerializerBase CreateSerializer(string inputFile)
{
using (var rdr = new StreamReader(inputFile))
{
var line1 = rdr.ReadLine() ?? "";
var line2 = rdr.ReadLine() ?? "";
if (line1.Contains("<service_list_transfer>") || line2.Contains("<service_list_transfer>"))
return new MediaTek.Serializer(inputFile, true);
}

return new Serializer(inputFile);
}
}
Expand Down
11 changes: 6 additions & 5 deletions source/ChanSort.sln
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@ VisualStudioVersion = 17.0.32112.339
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ChanSort", "ChanSort\ChanSort.csproj", "{5FAFDABC-A52F-498C-BD2F-AFFC4119797A}"
ProjectSection(ProjectDependencies) = postProject
{4D5AF0A3-1B96-42C8-910D-0C4852EA22F4} = {4D5AF0A3-1B96-42C8-910D-0C4852EA22F4}
{4E68F218-5135-4D92-8C17-14FAA5D4CBF3} = {4E68F218-5135-4D92-8C17-14FAA5D4CBF3}
{74A18C6F-09FF-413E-90D9-827066FA5B36} = {74A18C6F-09FF-413E-90D9-827066FA5B36}
{68DA8072-3A29-4076-9F64-D66F38349585} = {68DA8072-3A29-4076-9F64-D66F38349585}
{74A18C6F-09FF-413E-90D9-827066FA5B36} = {74A18C6F-09FF-413E-90D9-827066FA5B36}
{A1C9A98D-368A-44E8-9B7F-7EACA46C9EC5} = {A1C9A98D-368A-44E8-9B7F-7EACA46C9EC5}
{F6F02792-07F1-48D5-9AF3-F945CA5E3931} = {F6F02792-07F1-48D5-9AF3-F945CA5E3931}
{E972D8A1-2F5F-421C-AC91-CFF45E5191BE} = {E972D8A1-2F5F-421C-AC91-CFF45E5191BE}
{4D5AF0A3-1B96-42C8-910D-0C4852EA22F4} = {4D5AF0A3-1B96-42C8-910D-0C4852EA22F4}
{A5C22199-1C51-4265-89CA-A7183F1BDB8B} = {A5C22199-1C51-4265-89CA-A7183F1BDB8B}
{B594DDA4-7BD5-450E-B648-668E0F659813} = {B594DDA4-7BD5-450E-B648-668E0F659813}
{D093E7EE-D3AD-4E7B-AF82-C6918CA017FB} = {D093E7EE-D3AD-4E7B-AF82-C6918CA017FB}
{E972D8A1-2F5F-421C-AC91-CFF45E5191BE} = {E972D8A1-2F5F-421C-AC91-CFF45E5191BE}
{F6F02792-07F1-48D5-9AF3-F945CA5E3931} = {F6F02792-07F1-48D5-9AF3-F945CA5E3931}
EndProjectSection
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ChanSort.Api", "ChanSort.Api\ChanSort.Api.csproj", "{DCCFFA08-472B-4D17-BB90-8F513FC01392}"
Expand Down Expand Up @@ -160,7 +161,7 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChanSort.Loader.TechniSat",
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ChanSort.Loader.Amdb", "ChanSort.Loader.Amdb\ChanSort.Loader.Amdb.csproj", "{30E9D084-6F3C-41A9-9B46-846178C91BDB}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChanSort.Loader.MediaTek", "ChanSort.Loader.MediaTek\ChanSort.Loader.MediaTek.csproj", "{5FC54726-B7EC-4A81-919F-F924110C723E}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ChanSort.Loader.MediaTek", "ChanSort.Loader.MediaTek\ChanSort.Loader.MediaTek.csproj", "{5FC54726-B7EC-4A81-919F-F924110C723E}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down
1 change: 1 addition & 0 deletions source/ChanSort/ChanSort.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
<ProjectReference Include="..\ChanSort.Loader.Sharp\ChanSort.Loader.Sharp.csproj" />
<ProjectReference Include="..\ChanSort.Loader.Sony\ChanSort.Loader.Sony.csproj" />
<ProjectReference Include="..\ChanSort.Loader.TCL\ChanSort.Loader.TCL.csproj" />
<ProjectReference Include="..\ChanSort.Loader.TechniSat\ChanSort.Loader.TechniSat.csproj" />
<ProjectReference Include="..\ChanSort.Loader.Toshiba\ChanSort.Loader.Toshiba.csproj" />
<ProjectReference Include="..\ChanSort.Loader.Unsupported\ChanSort.Loader.Unsupported.csproj" />
<ProjectReference Include="..\ChanSort.Loader.VDR\ChanSort.Loader.VDR.csproj" />
Expand Down
6 changes: 5 additions & 1 deletion source/changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
ChanSort Change Log
===================

2024-09-31
2024-10-02
- Philips formats 100-125: further improvements/fixes for decoding non-latin characters
- Sony Bravia 7 (2024) sdb.xml: support to read it as a reference list (the TV will ignore any edits to this file)

2024-10-01
- experimental support for Philips channel list format 125 (with automatic sync to MtkChannelList.xml)
- experimental support for MtkChannelList.xml (which is part of several MediaTek based Google TVs, e.g. Philips formats 120 and 125)
- Philips formats 100-125: improved decoding of non-latin characters (turkish, cyrillic, ...)
Expand Down

0 comments on commit 1904e97

Please sign in to comment.