diff --git a/Clippit.Tests/Word/DocumentAssemblerTests.cs b/Clippit.Tests/Word/DocumentAssemblerTests.cs index da5c92f7..770075fc 100644 --- a/Clippit.Tests/Word/DocumentAssemblerTests.cs +++ b/Clippit.Tests/Word/DocumentAssemblerTests.cs @@ -154,6 +154,9 @@ public DocumentAssemblerTests(ITestOutputHelper log) [InlineData("DA285-ImageSelectNoParagraphFollowedAfterMetadata.docx", "DA-Data-WithImages.xml", true)] [InlineData("DA285A-ImageSelectNoParagraphFollowedAfterMetadata.docx", "DA-Data-WithImages.xml", true)] [InlineData("DA-I0038-TemplateWithMultipleXPathResults.docx", "DA-I0038-Data.xml", false)] + [InlineData("DA289A-xhtml-formatting.docx", "DA-html-input.xml", false)] + [InlineData("DA289B-html-not-supported.docx", "DA-html-input.xml", true)] + [InlineData("DA289C-not-well-formed-xhtml.docx", "DA-html-input.xml", true)] public void DA101(string name, string data, bool err) { var templateDocx = new FileInfo(Path.Combine(_sourceDir.FullName, name)); @@ -185,14 +188,8 @@ public void DA259(string name, string data, bool err) Path.Combine(TempDir, name.Replace(".docx", "-processed-by-DocumentAssembler.docx")) ); var afterAssembling = new WmlDocument(assembledDocx.FullName); - var brCount = afterAssembling - .MainDocumentPart.Element(W.body) - .Elements(W.p) - .ElementAt(1) - .Elements(W.r) - .Elements(W.br) - .Count(); - Assert.Equal(4, brCount); + var brCount = afterAssembling.MainDocumentPart.Element(W.body).Elements(W.p).Count(); + Assert.Equal(6, brCount); } [Theory] diff --git a/Clippit/Html/HtmlToWmlConverterCore.cs b/Clippit/Html/HtmlToWmlConverterCore.cs index 6c785a27..47d333af 100644 --- a/Clippit/Html/HtmlToWmlConverterCore.cs +++ b/Clippit/Html/HtmlToWmlConverterCore.cs @@ -824,7 +824,7 @@ private static object NormalizeTransform(XNode node) return node; } - private enum NextExpected + internal enum NextExpected { Paragraph, Run, @@ -2830,7 +2830,7 @@ string pictureDescription #endif - private static XElement GetParagraphProperties( + internal static XElement GetParagraphProperties( XElement blockLevelElement, string styleName, HtmlToWmlConverterSettings settings @@ -3041,14 +3041,18 @@ private static XElement[] GetSpacingProperties(XElement paragraph, HtmlToWmlConv return new XElement[] { spacing, ind, contextualSpacing }; } - private static XElement GetRunProperties(XText textNode, HtmlToWmlConverterSettings settings) + internal static XElement GetRunProperties(XText textNode, HtmlToWmlConverterSettings settings) { var parent = textNode.Parent; - var rPr = GetRunProperties(parent, settings); - return rPr; + if (parent != null) + { + return GetRunProperties(parent, settings); + } + + return new XElement(W.rPr); } - private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSettings settings) + internal static XElement GetRunProperties(XElement element, HtmlToWmlConverterSettings settings) { var colorProperty = element.GetProp("color"); var fontFamilyProperty = element.GetProp("font-family"); @@ -3060,15 +3064,15 @@ private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSet var letterSpacingProperty = element.GetProp("letter-spacing"); var directionProp = element.GetProp("direction"); - var colorPropertyString = colorProperty.ToString(); + var colorPropertyString = colorProperty?.ToString(); var fontFamilyString = GetUsedFontFromFontFamilyProperty(fontFamilyProperty); var fontSizeTPoint = GetUsedSizeFromFontSizeProperty(fontSizeProperty); - var textDecorationString = textDecorationProperty.ToString(); - var fontStyleString = fontStyleProperty.ToString(); - var fontWeightString = fontWeightProperty.ToString().ToLower(); - var backgroundColorString = backgroundColorProperty.ToString().ToLower(); - var letterSpacingString = letterSpacingProperty.ToString().ToLower(); - var directionString = directionProp.ToString().ToLower(); + var textDecorationString = textDecorationProperty?.ToString(); + var fontStyleString = fontStyleProperty?.ToString(); + var fontWeightString = fontWeightProperty?.ToString().ToLower(); + var backgroundColorString = backgroundColorProperty?.ToString().ToLower(); + var letterSpacingString = letterSpacingProperty?.ToString().ToLower(); + var directionString = directionProp?.ToString().ToLower(); var subAncestor = element.AncestorsAndSelf(XhtmlNoNamespace.sub).Any(); var supAncestor = element.AncestorsAndSelf(XhtmlNoNamespace.sup).Any(); @@ -3085,7 +3089,7 @@ private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSet dirAttributeString = dirAttribute.Value.ToLower(); XElement shd = null; - if (backgroundColorString != "transparent") + if (backgroundColorString != null && backgroundColorString != "transparent") shd = new XElement( W.shd, new XAttribute(W.val, "clear"), @@ -3155,7 +3159,7 @@ private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSet rStyle = new XElement(W.rStyle, new XAttribute(W.val, "Hyperlink")); XElement spacing = null; - if (letterSpacingProperty.IsNotNormal) + if (letterSpacingProperty != null && letterSpacingProperty.IsNotNormal) spacing = new XElement(W.spacing, new XAttribute(W.val, (long)(Twip)letterSpacingProperty)); XElement rtl = null; @@ -3191,9 +3195,9 @@ private static XElement GetRunProperties(XElement element, HtmlToWmlConverterSet // todo this is not right - needs to be rationalized for all characters in an entire paragraph. // if there is text like

abc def ghi

then there needs to be just one space between abc and def, and between // def and ghi. - private static string GetDisplayText(XText node, bool preserveWhiteSpace) + internal static string GetDisplayText(XText node, bool preserveWhiteSpace) { - var textTransform = node.Parent.GetProp("text-transform").ToString(); + var textTransform = node.Parent.GetProp("text-transform")?.ToString(); var isFirst = node.Parent.Name == XhtmlNoNamespace.p && node == node.Parent.FirstNode; var isLast = node.Parent.Name == XhtmlNoNamespace.p && node == node.Parent.LastNode; @@ -3884,7 +3888,7 @@ private static XElement GetTableRowProperties(XElement element) return trPr; } - private static XAttribute GetXmlSpaceAttribute(string value) + internal static XAttribute GetXmlSpaceAttribute(string value) { if (value.StartsWith(" ") || value.EndsWith(" ")) return new XAttribute(XNamespace.Xml + "space", "preserve"); @@ -4331,7 +4335,7 @@ private static XElement GetBackgroundProperty(XElement element) var color = element.GetProp("background-color"); // todo this really should test against default background color - if (color.ToString() != "transparent") + if (color != null && color.ToString() != "transparent") { var hexString = color.ToString(); var shd = new XElement( diff --git a/Clippit/Word/Assembler/HtmlConverter.cs b/Clippit/Word/Assembler/HtmlConverter.cs new file mode 100644 index 00000000..4061e6c9 --- /dev/null +++ b/Clippit/Word/Assembler/HtmlConverter.cs @@ -0,0 +1,395 @@ +using System.Collections; +using System.Text.RegularExpressions; +using System.Xml; +using System.Xml.Linq; +using System.Xml.XPath; +using Clippit.Html; +using Clippit.Internal; +using DocumentFormat.OpenXml.Packaging; +using NextExpected = Clippit.Html.HtmlToWmlConverterCore.NextExpected; + +namespace Clippit.Word.Assembler +{ + internal static class HtmlConverter + { + private static readonly HtmlToWmlConverterSettings htmlConverterSettings = + HtmlToWmlConverter.GetDefaultSettings(); + + private static readonly Regex detectEntityRegEx = new Regex("^&(?:#([0-9]+)|#x([0-9a-fA-F]+)|([0-9a-zA-Z]+));"); + + /// + /// Method processes a string that contains inline html tags and generates a run with the necessary properties + /// Supported inline html tags: b, i, em, strong, u, br, a + /// Supported block tags: p, div + /// TODO: add support for the following html tags: big, small, sub, sup, span. + /// + /// Source element. + /// Data element with content. + /// The paragraph properties. + /// Error indicator. + internal static IEnumerable ProcessContentElement( + this XElement element, + XElement data, + TemplateError templateError, + ref OpenXmlPart part + ) + { + var xPath = (string)element.Attribute(PA.Select); + var optionalString = (string)element.Attribute(PA.Optional); + bool optional = (optionalString != null && optionalString.ToLower() == "true"); + + string[] values = data.EvaluateXPath(xPath, optional); + + // if we no data returned then just return an empty run + if (values.Length == 0) + { + return new[] { new XElement(W.r, W.t) }; + } + + // otherwise split the values if there are new line characters + values = values + .SelectMany(x => x.Replace("\r\n", "\n", StringComparison.OrdinalIgnoreCase).Split('\n')) + .ToArray(); + + List results = new List(); + for (int i = 0; i < values.Length; i++) + { + // try processing as XML + XElement parsedElement = XElement.Parse($"{EscapeAmpersands(values[i])}"); + + results.Add( + Transform( + parsedElement, + htmlConverterSettings, + part, + i == 0 ? NextExpected.Run : NextExpected.Paragraph, + true + ) + ); + } + + results = FlattenResults(results); + + if (results.Count == 0) + { + return new[] { new XElement(W.r, W.t) }; + } + + return results; + } + + private static List FlattenResults(IEnumerable content) + { + // flatten the returned content + List results = new List(); + foreach (object obj in content) + { + if (obj is IEnumerable) + { + results.AddRange(FlattenResults(obj as IEnumerable)); + } + else + { + results.Add(obj); + } + } + + return results; + } + + private static object Transform( + XNode node, + HtmlToWmlConverterSettings settings, + OpenXmlPart part, + NextExpected nextExpected, + bool preserveWhiteSpace + ) + { + var element = node as XElement; + if (element != null) + { + if (element.Name == XhtmlNoNamespace.a) + { + var rId = Relationships.GetNewRelationshipId(); + var href = (string)element.Attribute(NoNamespace.href); + if (href != null) + { + Uri uri = null; + try + { + uri = href.GetUri(); + } + catch (UriFormatException) + { + var rPr = HtmlToWmlConverterCore.GetRunProperties(element, settings); + var run = new XElement(W.r, rPr, new XElement(W.t, element.Value)); + return new[] { run }; + } + + if (uri != null) + { + part.AddHyperlinkRelationship(uri, true, rId); + if (element.Element(XhtmlNoNamespace.img) != null) + { + var imageTransformed = element + .Nodes() + .Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)) + .OfType(); + var newImageTransformed = imageTransformed + .Select(i => + { + if (i.Elements(W.drawing).Any()) + { + var newRun = new XElement(i); + var docPr = newRun + .Elements(W.drawing) + .Elements(WP.inline) + .Elements(WP.docPr) + .FirstOrDefault(); + if (docPr != null) + { + var hlinkClick = new XElement( + A.hlinkClick, + new XAttribute(R.id, rId), + new XAttribute(XNamespace.Xmlns + "a", A.a.NamespaceName) + ); + docPr.Add(hlinkClick); + } + return newRun; + } + return i; + }) + .ToList(); + return newImageTransformed; + } + + var rPr = HtmlToWmlConverterCore.GetRunProperties(element, settings); + + var hyperlink = new XElement( + W.hyperlink, + new XAttribute(R.id, rId), + new XElement(W.r, rPr, new XElement(W.t, element.Value)) + ); + + if (nextExpected == NextExpected.Paragraph) + { + return new XElement(W.p, hyperlink); + } + + return new[] { hyperlink }; + } + } + return null; + } + + if (element.Name == XhtmlNoNamespace.b) + return element.Nodes().Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.div) + { + if (nextExpected == NextExpected.Paragraph) + { + if ( + element + .Descendants() + .Any(d => d.Name == XhtmlNoNamespace.li || d.Name == XhtmlNoNamespace.p) + ) + { + return element + .Nodes() + .Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + } + else + { + return GenerateNextExpected(element, settings, part, null, nextExpected, false); + } + } + else + { + return element + .Nodes() + .Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + } + } + + if (element.Name == XhtmlNoNamespace.em) + return element + .Nodes() + .Select(n => Transform(n, settings, part, NextExpected.Run, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.html) + return element + .Nodes() + .Select(n => Transform(n, settings, part, NextExpected.Paragraph, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.i) + return element.Nodes().Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.li) + { + return GenerateNextExpected(element, settings, part, null, NextExpected.Paragraph, false); + } + + if (element.Name == XhtmlNoNamespace.ol) + return element + .Nodes() + .Select(n => Transform(n, settings, part, NextExpected.Paragraph, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.p) + { + return GenerateNextExpected(element, settings, part, null, NextExpected.Paragraph, false); + } + + if (element.Name == XhtmlNoNamespace.strong) + return element.Nodes().Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.sub) + return element.Nodes().Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.sup) + return element.Nodes().Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.u) + return element.Nodes().Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.ul) + return element.Nodes().Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + + if (element.Name == XhtmlNoNamespace.br) + if (nextExpected == NextExpected.Paragraph) + { + return new XElement(W.p, new XElement(W.r, new XElement(W.t))); + } + else + { + return new XElement(W.r); + } + + // if no match up to this point, then just recursively process descendants + return element.Nodes().Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)); + } + + // process text nodes unless their parent is a title tag + if (node.Parent.Name != XhtmlNoNamespace.title) + return GenerateNextExpected(node, settings, part, null, nextExpected, preserveWhiteSpace); + + return null; + } + + private static object GenerateNextExpected( + XNode node, + HtmlToWmlConverterSettings settings, + OpenXmlPart part, + string styleName, + NextExpected nextExpected, + bool preserveWhiteSpace + ) + { + if (nextExpected == NextExpected.Paragraph) + { + var element = node as XElement; + if (element != null) + { + return new XElement( + W.p, + element.Nodes().Select(n => Transform(n, settings, part, NextExpected.Run, preserveWhiteSpace)) + ); + } + else + { + var xTextNode = node as XText; + if (xTextNode != null) + { + var textNodeString = HtmlToWmlConverterCore.GetDisplayText(xTextNode, preserveWhiteSpace); + var p = new XElement( + W.p, + new XElement( + W.r, + HtmlToWmlConverterCore.GetRunProperties(xTextNode, settings), + new XElement( + W.t, + HtmlToWmlConverterCore.GetXmlSpaceAttribute(textNodeString), + textNodeString + ) + ) + ); + return p; + } + return null; + } + } + else + { + var element = node as XElement; + if (element != null) + { + return element + .Nodes() + .Select(n => Transform(n, settings, part, nextExpected, preserveWhiteSpace)) + .AsEnumerable(); + } + else + { + var textNodeString = HtmlToWmlConverterCore.GetDisplayText((XText)node, preserveWhiteSpace); + var rPr = HtmlToWmlConverterCore.GetRunProperties((XText)node, settings); + var r = new XElement( + W.r, + rPr, + new XElement(W.t, HtmlToWmlConverterCore.GetXmlSpaceAttribute(textNodeString), textNodeString) + ); + return r; + } + } + } + + private static string EscapeAmpersands(string value) + { + // check whether we have any processing to do + if (!string.IsNullOrWhiteSpace(value) && value.Contains('&', StringComparison.OrdinalIgnoreCase)) + { + string result = string.Empty; + + int ampIndex = value.IndexOf('&', StringComparison.OrdinalIgnoreCase); + while (ampIndex >= 0) + { + // put everything before the ampersand into the result + result += value.Substring(0, ampIndex); + + // then trim the value back + value = value.Substring(ampIndex); + + // now check whether ampersand we have found is the start of an entity + Match m = detectEntityRegEx.Match(value); + if (m.Success) + { + // if this is an entity then add to result + result += value.Substring(0, m.Length); + + // then remove entity from input + value = value.Substring(m.Length); + } + else + { + // add escaped ampersand to result + result += "&"; + + // then remove ampersand from input + value = value.Substring(1); + } + + ampIndex = value.IndexOf('&', StringComparison.OrdinalIgnoreCase); + } + + // add any remaining string + if (!string.IsNullOrEmpty(value)) + { + result += value; + } + + return result; + } + + return value; + } + } +} diff --git a/Clippit/Word/Assembler/UriExtensions.cs b/Clippit/Word/Assembler/UriExtensions.cs new file mode 100644 index 00000000..ec27feaa --- /dev/null +++ b/Clippit/Word/Assembler/UriExtensions.cs @@ -0,0 +1,12 @@ +using System; + +namespace Clippit.Word.Assembler +{ + internal static class UriExtensions + { + internal static Uri GetUri(this string s) + { + return new UriBuilder(s).Uri; + } + } +} diff --git a/Clippit/Word/Assembler/XPathExtensions.cs b/Clippit/Word/Assembler/XPathExtensions.cs index ee275082..9234d290 100644 --- a/Clippit/Word/Assembler/XPathExtensions.cs +++ b/Clippit/Word/Assembler/XPathExtensions.cs @@ -1,11 +1,6 @@ using System; using System.Collections; -using System.Collections.Generic; -using System.IO; using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System.Xml; using System.Xml.Linq; using System.Xml.XPath; diff --git a/Clippit/Word/DocumentAssembler.cs b/Clippit/Word/DocumentAssembler.cs index 9aac9a8a..b4d4c08a 100644 --- a/Clippit/Word/DocumentAssembler.cs +++ b/Clippit/Word/DocumentAssembler.cs @@ -6,13 +6,16 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Runtime.Remoting; using System.Text.RegularExpressions; using System.Xml; using System.Xml.Linq; using System.Xml.Schema; using System.Xml.XPath; +using Clippit.Internal; using Clippit.Word.Assembler; using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Wordprocessing; using SixLabors.ImageSharp; using SixLabors.ImageSharp.PixelFormats; using Path = System.IO.Path; @@ -843,47 +846,6 @@ private static string ValidatePerSchema(XElement element) private static Dictionary s_paSchemaSets; - /// - /// Gets the next image relationship identifier of given part. The - /// parts can be either header, footer or main document part. The method - /// scans for already present relationship identifiers, then increments and - /// returns the next available value. - /// - /// The part. - /// System.String. - private static string GetNextImageRelationshipId(OpenXmlPart part) - { - switch (part) - { - case MainDocumentPart mainDocumentPart: - { - var imageId = mainDocumentPart - .Parts.Select(p => Regex.Match(p.RelationshipId, @"rId(?\d+)").Groups["rId"].Value) - .Max(Convert.ToDecimal); - - return $"rId{++imageId}"; - } - case HeaderPart headerPart: - { - var imageId = headerPart - .Parts.Select(p => Regex.Match(p.RelationshipId, @"rId(?\d+)").Groups["rId"].Value) - .Max(Convert.ToDecimal); - - return $"rId{++imageId}"; - } - case FooterPart footerPart: - { - var imageId = footerPart - .Parts.Select(p => Regex.Match(p.RelationshipId, @"rId(?\d+)").Groups["rId"].Value) - .Max(Convert.ToDecimal); - - return $"rId{++imageId}"; - } - default: - return null; - } - } - /// /// Calculates the maximum docPr id. The identifier is /// unique throughout the document. This method @@ -1003,7 +965,7 @@ OpenXmlPart part // assign unique image and paragraph ids. Image id is document property Id (wp:docPr) // and relationship id is rId. Their numbering is different. const string imageId = InvalidImageId; // Ids will be replaced with real ones later, after transform is done - var relationshipId = GetNextImageRelationshipId(part); + var relationshipId = Relationships.GetNewRelationshipId(); var inline = para.Descendants(W.drawing).Descendants(WP.inline).FirstOrDefault(); if (inline == null) @@ -1303,63 +1265,80 @@ OpenXmlPart part } if (element.Name == PA.Content) { - if (element.Descendants(A.r).FirstOrDefault() is not null) + XElement parentPara = element.Ancestors(W.p).FirstOrDefault(); // is the Content element in a paragraph + XElement embeddedPara = element.Descendants(W.p).FirstOrDefault(); // does the Content element contain a paragraph + + // if so create a new paragraph to add our content to + if (embeddedPara != null) { - return ProcessAParagraph(element, data, templateError); - } + // get the current paragraph properties + XElement pProps = embeddedPara.Descendants(W.pPr).FirstOrDefault(); - var para = element.Descendants(W.p).FirstOrDefault(); - var run = element.Descendants(W.r).FirstOrDefault(); + // create a new paragraph to return + embeddedPara = new XElement(W.p); - var xPath = (string)element.Attribute(PA.Select); - var optionalString = (string)element.Attribute(PA.Optional); - var optional = (optionalString != null && optionalString.ToLower() == "true"); + // add the paragraph properties + if (pProps != null) + { + embeddedPara.Add(pProps); + } + } - string[] newValues; + XElement currentPara = embeddedPara ?? parentPara; + XElement currentParaProps = currentPara.Descendants(W.pPr).FirstOrDefault(); + + // get the list of created elements, could be all paragraphs or a run followed by paragraphs + IList content; try { - newValues = data.EvaluateXPath(xPath, optional); + content = element.ProcessContentElement(data, templateError, ref part).ToList(); } - catch (XPathException e) + catch (Exception ex) { - return element.CreateContextErrorMessage("XPathException: " + e.Message, templateError); + return element.CreateContextErrorMessage($"Content: {ex.Message}", templateError); } - var lines = newValues.SelectMany(x => x.Split('\n')); - if (para is not null) + // get XElements and ensure all but the first element is in a + List elements = new List(); + for (int i = 0; i < content.Count; i++) { - var p = new XElement(W.p, para.Elements(W.pPr)); - var rPr = para.Elements(W.r).Elements(W.rPr).FirstOrDefault(); - foreach (var line in lines) + object obj = content[i]; + if (obj is XElement) { - p.Add( - new XElement( - W.r, - rPr, - (p.Elements().Count() > 1) ? new XElement(W.br) : null, - new XElement(W.t, line) - ) - ); + var objEl = obj as XElement; + if (i > 0 && objEl.Name == W.r || objEl.Name == W.hyperlink) + { + elements.Add(new XElement(W.p, currentParaProps, content[i])); + } + else + { + elements.Add(objEl); + } } - return p; } - else + + // add all but the first element after the current paragraph + for (int i = elements.Count - 1; i > 0; i--) { - var list = new List(); - var rPr = run.Elements().Where(e => e.Name != W.t); - foreach (var line in lines) + if (embeddedPara == null && parentPara != null) { - list.Add( - new XElement( - W.r, - rPr, - (list.Count > 0) ? new XElement(W.br) : null, - new XElement(W.t, line) - ) - ); + parentPara.AddAfterSelf(elements[i]); + } + else + { + element.AddAfterSelf(elements[i]); } - return list; } + + // return first element wrapped in the embedded para if we do not have a paragraph + if (elements[0].Name != W.p && embeddedPara != null) + { + embeddedPara.Add(elements[0]); + return embeddedPara; + } + + // or simply return the first element + return elements[0]; } if (element.Name == PA.Repeat) { @@ -1382,11 +1361,6 @@ OpenXmlPart part if (optional) { return null; - //XElement para = element.Descendants(W.p).FirstOrDefault(); - //if (para != null) - // return new XElement(W.p, new XElement(W.r)); - //else - // return new XElement(W.r); } return element.CreateContextErrorMessage("Repeat: Select returned no data", templateError); } diff --git a/TestFiles/DA/DA-html-input.xml b/TestFiles/DA/DA-html-input.xml new file mode 100644 index 00000000..a64e4f64 --- /dev/null +++ b/TestFiles/DA/DA-html-input.xml @@ -0,0 +1,40 @@ + + + <p><b>Some Bold Text</b></p> + <i>Some Italic Text</i> + <u>Some Underline Text</u> + <a href="https://www.google.co.uk">Google</a> + <i><b>Some Bold Italic Text</b></i> + <u><b>Some Bold Underline Text</b></u> + <i><u>Some Italic Underline Text</u></i> + <b><i><u>Some Bold Italic Underline Text</u></i></b> + <b><a href="https://www.google.co.uk">Bold Google</a></b> + <i><a href="https://www.google.co.uk">Italic Google</a></i> + <u><a href="https://www.google.co.uk">Underline Google</a></u> + <i><b><a href="https://www.google.co.uk">Bold Italic Google</a></b></i> + <u><b><a href="https://www.google.co.uk">Bold Underline Google</a></b></u> + <u><i><a href="https://www.google.co.uk">Italic Underline Google</a></i></u> + <b><u><i><a href="https://www.google.co.uk">Bold Italic Underline Google</a></i></u></b> + <a href="www.google.co.uk">Google</a> + This content comes from CDATA

+ ]]> +
+ What follows is Subscript!

+

What follows is more Subscript! followed by a break
and some more text.

+ ]]> +
+ What follows is Superscript!

+ ]]> +
+ What follows is Struckout!

+ ]]> +
+ + This is NOT valid XHTML.<br>But is Valid HTML. + + <b><i>This is NOT well formed XHTML.</b></i> +
\ No newline at end of file diff --git a/TestFiles/DA/DA289A-xhtml-formatting.docx b/TestFiles/DA/DA289A-xhtml-formatting.docx new file mode 100644 index 00000000..3a2fa70d Binary files /dev/null and b/TestFiles/DA/DA289A-xhtml-formatting.docx differ diff --git a/TestFiles/DA/DA289B-html-not-supported.docx b/TestFiles/DA/DA289B-html-not-supported.docx new file mode 100644 index 00000000..3d8a62a3 Binary files /dev/null and b/TestFiles/DA/DA289B-html-not-supported.docx differ diff --git a/TestFiles/DA/DA289C-not-well-formed-xhtml.docx b/TestFiles/DA/DA289C-not-well-formed-xhtml.docx new file mode 100644 index 00000000..642a9c69 Binary files /dev/null and b/TestFiles/DA/DA289C-not-well-formed-xhtml.docx differ diff --git a/docs/images/word/documentassembler/inline-HTML-in-Word-example.png b/docs/images/word/documentassembler/inline-HTML-in-Word-example.png new file mode 100644 index 00000000..422f0dd2 Binary files /dev/null and b/docs/images/word/documentassembler/inline-HTML-in-Word-example.png differ diff --git a/docs/tutorials/word/DocumentAssembler_InlineHtmlSupport.md b/docs/tutorials/word/DocumentAssembler_InlineHtmlSupport.md new file mode 100644 index 00000000..5adeb096 --- /dev/null +++ b/docs/tutorials/word/DocumentAssembler_InlineHtmlSupport.md @@ -0,0 +1,64 @@ +--- +uid: Tutorial.Word.DocumentAssembler.InlineHtmlSupport +--- + +# Inline HTML Support + +## Introduction + +Document Assembler now supports basic inline HTML tags and if these are found in your Content select then formatting will be placed directly on the run. + +For example assuming you had an XML file with an HTML fragment such as: + +```xml + + +

Document Assembler is AWESOME!

+

It now supports simple inline HTML.

+
+
+``` + +And you had a `Content` tag in your template: + +```xml + +``` + +Then Document Assembler would render this in Word as: + +![Example Word output for Inline HTML fragment](../../images/word/documentassembler/inline-HTML-in-Word-example.png) + +## Supported HTML tags + +Currently the following HTMl tags are supported. + +### Block Tags + +Both `div` and `p` tags are supported for block level content. When Document Assembler finds either of these then it will treat them as an encapsulating paragraph. + +### Inline Tags + +* Either `b` or `strong` are supported for Bold +* Either `i` or `em` are support for Italic +* `u` is translated to Underline +* `a` will create a clickable Hyperlink in Word +* `br` forces a new line + +## HTML Parsing + +HTML parsing is provided using the HTML Agility Pack and is fairly forgiving. If you have HTML elements in use that are not supported Document Assembler will simply ignore them and process the rest of your content. + +## Usage in Templates + +Inline HTML formatting is supported by default, you do not need to change your `Content` elements, you just need to pass valid HTML rather than text to them in the `Select` attribute. + +## Future Developments + +Inline HTML support is in it's infancy but it would make sense to add support for: + +* Ordered lists `ol` +* Unordered lists `ul` +* Superscript `sup` +* Subscript `sup` +* Strike-through `s` \ No newline at end of file diff --git a/docs/tutorials/word/toc.yml b/docs/tutorials/word/toc.yml index e013d9a4..b223bd8f 100644 --- a/docs/tutorials/word/toc.yml +++ b/docs/tutorials/word/toc.yml @@ -7,4 +7,6 @@ - name: DocumentAssembler items: - name: Images Support - href: DocumentAssembler_ImagesSupport.md \ No newline at end of file + href: DocumentAssembler_ImagesSupport.md + - name: Inline HTML Support + href: DocumentAssembler_InlineHtmlSupport.md \ No newline at end of file