Skip to content

Commit

Permalink
Added support for HTML fragments in Document Assembler Content tag (#86)
Browse files Browse the repository at this point in the history
* Added inline HTML support in Document Assembler with associated tests and a Tutorial under docs.

* Ran csharpier!
Updated DA Multiline content test because the HtmlConverter creates paragraphs rather than inserting soft line breaks.

* Fixed behaviour of HtmlConverter to allow returning of multiple elements in a Content select (joins them with a new line).
Ran csharpier on all files.
Added HtmlAgilityPack to paket references.

* Fixed issue with duplicate package references as I had added via nuget rather than using paket initially!

* fix: no more paket

* Removed HTML Agility Pack.
Added tests for HTML content (not supported) and non-well formed XHTML.
Looking at possibility of replacing XMLReader approach with something closer to HtmlToWmlConverter.

* Work in progress.  Builds!

* Updated to use HtmlToWmlConverter type approach.

* Csharpier fixes.

---------

Co-authored-by: Sergey Tihon <[email protected]>
  • Loading branch information
MalcolmJohnston and sergey-tihon authored Dec 8, 2024
1 parent 3eb4277 commit ab3be1f
Show file tree
Hide file tree
Showing 13 changed files with 602 additions and 119 deletions.
13 changes: 5 additions & 8 deletions Clippit.Tests/Word/DocumentAssemblerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ public DocumentAssemblerTests(ITestOutputHelper log)
[InlineData("DA285-ImageSelectNoParagraphFollowedAfterMetadata.docx", "DA-Data-WithImages.xml", true)]
[InlineData("DA285A-ImageSelectNoParagraphFollowedAfterMetadata.docx", "DA-Data-WithImages.xml", true)]
[InlineData("DA-I0038-TemplateWithMultipleXPathResults.docx", "DA-I0038-Data.xml", false)]
[InlineData("DA289A-xhtml-formatting.docx", "DA-html-input.xml", false)]
[InlineData("DA289B-html-not-supported.docx", "DA-html-input.xml", true)]
[InlineData("DA289C-not-well-formed-xhtml.docx", "DA-html-input.xml", true)]
public void DA101(string name, string data, bool err)
{
var templateDocx = new FileInfo(Path.Combine(_sourceDir.FullName, name));
Expand Down Expand Up @@ -185,14 +188,8 @@ public void DA259(string name, string data, bool err)
Path.Combine(TempDir, name.Replace(".docx", "-processed-by-DocumentAssembler.docx"))
);
var afterAssembling = new WmlDocument(assembledDocx.FullName);
var brCount = afterAssembling
.MainDocumentPart.Element(W.body)
.Elements(W.p)
.ElementAt(1)
.Elements(W.r)
.Elements(W.br)
.Count();
Assert.Equal(4, brCount);
var brCount = afterAssembling.MainDocumentPart.Element(W.body).Elements(W.p).Count();
Assert.Equal(6, brCount);
}

[Theory]
Expand Down
42 changes: 23 additions & 19 deletions Clippit/Html/HtmlToWmlConverterCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ private static object NormalizeTransform(XNode node)
return node;
}

private enum NextExpected
internal enum NextExpected
{
Paragraph,
Run,
Expand Down Expand Up @@ -2830,7 +2830,7 @@ string pictureDescription
</a:graphicData>
</a:graphic>
#endif
private static XElement GetParagraphProperties(
internal static XElement GetParagraphProperties(
XElement blockLevelElement,
string styleName,
HtmlToWmlConverterSettings settings
Expand Down Expand Up @@ -3041,14 +3041,18 @@ private static XElement[] GetSpacingProperties(XElement paragraph, HtmlToWmlConv
return new XElement[] { spacing, ind, contextualSpacing };
}

private static XElement GetRunProperties(XText textNode, HtmlToWmlConverterSettings settings)
internal static XElement GetRunProperties(XText textNode, HtmlToWmlConverterSettings settings)
{
var parent = textNode.Parent;
var rPr = GetRunProperties(parent, settings);
return rPr;
if (parent != null)
{
return GetRunProperties(parent, settings);
}

return new XElement(W.rPr);
}

private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSettings settings)
internal static XElement GetRunProperties(XElement element, HtmlToWmlConverterSettings settings)
{
var colorProperty = element.GetProp("color");
var fontFamilyProperty = element.GetProp("font-family");
Expand All @@ -3060,15 +3064,15 @@ private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSet
var letterSpacingProperty = element.GetProp("letter-spacing");
var directionProp = element.GetProp("direction");

var colorPropertyString = colorProperty.ToString();
var colorPropertyString = colorProperty?.ToString();
var fontFamilyString = GetUsedFontFromFontFamilyProperty(fontFamilyProperty);
var fontSizeTPoint = GetUsedSizeFromFontSizeProperty(fontSizeProperty);
var textDecorationString = textDecorationProperty.ToString();
var fontStyleString = fontStyleProperty.ToString();
var fontWeightString = fontWeightProperty.ToString().ToLower();
var backgroundColorString = backgroundColorProperty.ToString().ToLower();
var letterSpacingString = letterSpacingProperty.ToString().ToLower();
var directionString = directionProp.ToString().ToLower();
var textDecorationString = textDecorationProperty?.ToString();
var fontStyleString = fontStyleProperty?.ToString();
var fontWeightString = fontWeightProperty?.ToString().ToLower();
var backgroundColorString = backgroundColorProperty?.ToString().ToLower();
var letterSpacingString = letterSpacingProperty?.ToString().ToLower();
var directionString = directionProp?.ToString().ToLower();

var subAncestor = element.AncestorsAndSelf(XhtmlNoNamespace.sub).Any();
var supAncestor = element.AncestorsAndSelf(XhtmlNoNamespace.sup).Any();
Expand All @@ -3085,7 +3089,7 @@ private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSet
dirAttributeString = dirAttribute.Value.ToLower();

XElement shd = null;
if (backgroundColorString != "transparent")
if (backgroundColorString != null && backgroundColorString != "transparent")
shd = new XElement(
W.shd,
new XAttribute(W.val, "clear"),
Expand Down Expand Up @@ -3155,7 +3159,7 @@ private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSet
rStyle = new XElement(W.rStyle, new XAttribute(W.val, "Hyperlink"));

XElement spacing = null;
if (letterSpacingProperty.IsNotNormal)
if (letterSpacingProperty != null && letterSpacingProperty.IsNotNormal)
spacing = new XElement(W.spacing, new XAttribute(W.val, (long)(Twip)letterSpacingProperty));

XElement rtl = null;
Expand Down Expand Up @@ -3191,9 +3195,9 @@ private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSet
// todo this is not right - needs to be rationalized for all characters in an entire paragraph.
// if there is text like <p>abc <em> def </em> ghi</p> then there needs to be just one space between abc and def, and between
// def and ghi.
private static string GetDisplayText(XText node, bool preserveWhiteSpace)
internal static string GetDisplayText(XText node, bool preserveWhiteSpace)
{
var textTransform = node.Parent.GetProp("text-transform").ToString();
var textTransform = node.Parent.GetProp("text-transform")?.ToString();
var isFirst = node.Parent.Name == XhtmlNoNamespace.p && node == node.Parent.FirstNode;
var isLast = node.Parent.Name == XhtmlNoNamespace.p && node == node.Parent.LastNode;

Expand Down Expand Up @@ -3884,7 +3888,7 @@ private static XElement GetTableRowProperties(XElement element)
return trPr;
}

private static XAttribute GetXmlSpaceAttribute(string value)
internal static XAttribute GetXmlSpaceAttribute(string value)
{
if (value.StartsWith(" ") || value.EndsWith(" "))
return new XAttribute(XNamespace.Xml + "space", "preserve");
Expand Down Expand Up @@ -4331,7 +4335,7 @@ private static XElement GetBackgroundProperty(XElement element)
var color = element.GetProp("background-color");

// todo this really should test against default background color
if (color.ToString() != "transparent")
if (color != null && color.ToString() != "transparent")
{
var hexString = color.ToString();
var shd = new XElement(
Expand Down
Loading

0 comments on commit ab3be1f

Please sign in to comment.