Skip to content

Commit

Permalink
Add --unescape-entities
Browse files Browse the repository at this point in the history
  • Loading branch information
MiguelDomingues committed Feb 14, 2024
1 parent 8e7046f commit 10d8dec
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 14 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,8 @@ This results in a string containing markdown.
When parsing HTML, if the HTML code contains only the tags specified by this flag, then the HTML is not broken into smaller chunks.
This flag can be used multiple times to allow specifying multiple HTML tags.
To use this you must also enable HTML parsing with `--parse-html`.

### --unescape-entities ENTITY

Unescape/decode HTML entities.
When using `--unescape-entities "` then the HTML entity `"` will be replaced by `"` during the translation process, i.e., when generating the output markdown file.
5 changes: 4 additions & 1 deletion src/MarkdownLocalize.CLI/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ public static int Main(string[] args)
[Option("--keep-html-together", "HTML tags to keep within text when extracting strings", CommandOptionType.MultipleValue)]
public string[] KeepHTMLTagsTogether { get; } = Array.Empty<string>();

[Option("--unescape-entities", "HTML entities to be unescaped/decoded", CommandOptionType.MultipleValue)]
public string[] UnescapeEntities { get; } = Array.Empty<string>();

private int OnExecute()
{
Console.OutputEncoding = System.Text.Encoding.UTF8;
Expand Down Expand Up @@ -213,7 +216,7 @@ private void Translate(string inputMarkdown, string outputMarkdown, string input
}
TranslationInfo info;
string relativeToSource = PathUtils.GetRelativePath(outputMarkdown, inputMarkdown, true);
string translatedMarkdown = POT.Translate(catalog, md, inputMarkdown, relativeToSource, KeepSourceStrings, out info);
string translatedMarkdown = POT.Translate(catalog, md, inputMarkdown, relativeToSource, KeepSourceStrings, UnescapeEntities, out info);
Log(string.Format(TRANSLATION_INFO, info.TranslatedCount, info.TotalCount));
int ratio = info.TotalCount > 0 ? (int)(info.TranslatedCount * 1.0 / info.TotalCount * 100) : 0;
if (ratio >= MinRatio)
Expand Down
18 changes: 16 additions & 2 deletions src/MarkdownLocalize.POT/POT.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using MarkdownLocalize.Utils;
using static MarkdownLocalize.Markdown.TranslateRenderer;
using System.Linq;
using System.Web;

namespace MarkdownLocalize;
public class POT
Expand Down Expand Up @@ -148,7 +149,7 @@ static POCatalog GenerateCatalog()
return catalog;
}

public static string Translate(POCatalog catalog, string markdown, string fileName, string pathToSource, bool keepSourceStrings, out TranslationInfo info)
public static string Translate(POCatalog catalog, string markdown, string fileName, string pathToSource, bool keepSourceStrings, IEnumerable<string> unescapeEntities, out TranslationInfo info)
{
string translatedMarkdown = MarkdownParser.Translate(markdown, (si) =>
{
Expand All @@ -158,7 +159,20 @@ public static string Translate(POCatalog catalog, string markdown, string fileNa
{
translation = si.String;
}
return translation != null ? translation.Trim() : "";
if (translation != null)
{
if (unescapeEntities != null)
{
foreach (string entity in unescapeEntities)
{
translation = translation.Replace(entity, HttpUtility.HtmlDecode(entity));
}
}
translation = translation.Replace("&quot;", "\"");
return translation.Trim();
}
return "";
}, fileName, pathToSource, catalog.Language, out info);

return translatedMarkdown;
Expand Down
41 changes: 30 additions & 11 deletions test/MarkdownLocalize.Tests/TranslateMarkdown.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public void TranslateSimple(string poFile, string originalMarkdown, string trans
});
var catalog = POT.Load(ReadPO(poFile));
TranslationInfo info;
string md = POT.Translate(catalog, originalMarkdown, null, null, false, out info);
string md = POT.Translate(catalog, originalMarkdown, null, null, false, new string[] { "&quot;" }, out info);

Assert.Equal(translatedMarkdown, md);
Assert.Equal(expectedTotalCount, info.TotalCount);
Expand All @@ -45,7 +45,7 @@ public void TranslateHtml(string poFile, string originalMarkdown, string transla
});
var catalog = POT.Load(ReadPO(poFile));
TranslationInfo info;
string md = POT.Translate(catalog, originalMarkdown, null, null, false, out info);
string md = POT.Translate(catalog, originalMarkdown, null, null, false, new string[] { "&quot;" }, out info);

Assert.Equal(translatedMarkdown, md);
Assert.Equal(expectedTotalCount, info.TotalCount);
Expand All @@ -57,7 +57,7 @@ public void TranslateKeepSource()
{
var catalog = POT.Load(ReadPO("headings.pt-PT.po"));
TranslationInfo info;
string md = POT.Translate(catalog, "# Heading\n\n##New Heading", null, null, true, out info);
string md = POT.Translate(catalog, "# Heading\n\n##New Heading", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal("# Título\n\n##New Heading", md);
Assert.Equal(2, info.TotalCount);
Expand All @@ -74,7 +74,7 @@ public void Quote()
> Hello
World
", null, null, true, out info);
", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(@"- Título
Expand All @@ -95,7 +95,7 @@ public void QuoteTwo()
> Hello
> World
", null, null, true, out info);
", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(@"- Título
Expand All @@ -122,7 +122,7 @@ public void ListWithIndentedText()
World
1. Heading
", null, null, true, out info);
", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(@"1. Título
Expand All @@ -148,7 +148,7 @@ public void MultipleLiteralsTogetherImage()
});
var catalog = POT.Load(ReadPO("headings.pt-PT.po"));
TranslationInfo info;
string md = POT.Translate(catalog, @"![Hello](image.png)", null, null, true, out info);
string md = POT.Translate(catalog, @"![Hello](image.png)", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(@"![Olá](image.png)", md);

Expand Down Expand Up @@ -177,7 +177,7 @@ public void KeepHtmlTagsTogether()
Hello
World
</p>", null, null, true, out info);
</p>", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(@"# Título
Expand Down Expand Up @@ -214,7 +214,7 @@ public void WhiteSpaceAfterHTML()
</div>
Hello", null, null, true, out info);
Hello", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(@"# Título
Expand Down Expand Up @@ -271,7 +271,7 @@ Text 7
<td>
</td></tr>
</tbody>
</table>", null, null, true, out info);
</table>", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(@"<table>
<tbody>
Expand Down Expand Up @@ -319,11 +319,30 @@ public void UpdateHTMLImagePath()
});
var catalog = POT.Load(ReadPO("headings.pt-PT.po"));
TranslationInfo info;
string md = POT.Translate(catalog, @"<img src=""images/img.png"">", null, null, true, out info);
string md = POT.Translate(catalog, @"<img src=""images/img.png"">", null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(@"<img src=""../../images/img.png"">".ReplaceLineEndings(), md.ReplaceLineEndings());

Assert.Equal(0, info.TotalCount);
Assert.Equal(0, info.TranslatedCount);
}

[Theory]
[InlineData("Hello \"Hey!\"")]
[InlineData("Some `\"code\"`")]
public void Quotes(string original)
{
MarkdownParser.SetParserOptions(new RendererOptions()
{
ParseHtml = true,
});
var catalog = POT.Load(ReadPO("quotes.po"));
TranslationInfo info;
string md = POT.Translate(catalog, original, null, null, true, new string[] { "&quot;" }, out info);

Assert.Equal(original, md);

Assert.Equal(1, info.TotalCount);
Assert.Equal(1, info.TranslatedCount);
}
}
18 changes: 18 additions & 0 deletions test/MarkdownLocalize.Tests/resources/quotes.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
msgid ""
msgstr ""
"POT-Creation-Date: 1987-05-13 20:45+0000\n"
"Project-Id-Version: Markdown POT\n"
"X-Generator: Markdown POT\n"
"Content-Transfer-Encoding: 8bit\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Last-Translator: \n"
"Language-Team: Spanish\n"
"Language: pt-PT\n"

msgctxt "Text"
msgid "Hello \"Hey!\""
msgstr "Hello &quot;Hey!&quot;"

msgctxt "Text"
msgid "Some `\"code\"`"
msgstr "Some `&quot;code&quot;`"

0 comments on commit 10d8dec

Please sign in to comment.