From 687e328608ca02cabd0d600aad6932ab2de56c55 Mon Sep 17 00:00:00 2001 From: Cristina Matonte Date: Fri, 14 Aug 2015 14:54:20 +0200 Subject: [PATCH 1/6] Add xslt to group inline elements i mixed content As a preparation for later transformations --- lib/htmltoword/xslt/inline_elements.xslt | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 lib/htmltoword/xslt/inline_elements.xslt diff --git a/lib/htmltoword/xslt/inline_elements.xslt b/lib/htmltoword/xslt/inline_elements.xslt new file mode 100644 index 0000000..74b823a --- /dev/null +++ b/lib/htmltoword/xslt/inline_elements.xslt @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + +
+ +
+ +
+ + + + + + + + +
From 9cec88576777053d50760e6962e64377f02c606b Mon Sep 17 00:00:00 2001 From: Cristina Matonte Date: Tue, 10 Nov 2015 15:21:34 +0100 Subject: [PATCH 2/6] Handle a bit more complex structures in li tags --- lib/htmltoword/document.rb | 3 +- lib/htmltoword/xslt/base.xslt | 77 ++++++++++++++++++------ lib/htmltoword/xslt/inline_elements.xslt | 4 +- 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/lib/htmltoword/document.rb b/lib/htmltoword/document.rb index 93095b4..1c29bb7 100644 --- a/lib/htmltoword/document.rb +++ b/lib/htmltoword/document.rb @@ -85,7 +85,8 @@ def replace_file(html, file_name = Document.doc_xml_file, extras = false) source = Nokogiri::HTML(html.gsub(/>\s+<')) transform_and_replace(source, Document.numbering_xslt, Document.numbering_xml_file) transform_and_replace(source, Document.relations_xslt, Document.relations_xml_file) - transform_and_replace(source, Document.xslt_template(extras), file_name, extras) + cleaned_source = Nokogiri::XSLT(File.open(File.join(Htmltoword.config.default_xslt_path, 'inline_elements.xslt'))).transform(source) + transform_and_replace(cleaned_source, Document.xslt_template(extras), file_name, extras) end private diff --git a/lib/htmltoword/xslt/base.xslt b/lib/htmltoword/xslt/base.xslt index 018f251..c96a4f3 100644 --- a/lib/htmltoword/xslt/base.xslt +++ b/lib/htmltoword/xslt/base.xslt @@ -110,31 +110,72 @@ - - - - + - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
- +
From f7b4026c29b086fc0c83089727a9d4d2d63c3859 Mon Sep 17 00:00:00 2001 From: Cristina Matonte Date: Wed, 11 Nov 2015 13:14:40 +0100 Subject: [PATCH 3/6] Check for inline elem in td and pass class/style --- lib/htmltoword/xslt/base.xslt | 4 ++-- lib/htmltoword/xslt/inline_elements.xslt | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/htmltoword/xslt/base.xslt b/lib/htmltoword/xslt/base.xslt index c96a4f3..11244e9 100644 --- a/lib/htmltoword/xslt/base.xslt +++ b/lib/htmltoword/xslt/base.xslt @@ -51,14 +51,14 @@
- - diff --git a/lib/htmltoword/xslt/inline_elements.xslt b/lib/htmltoword/xslt/inline_elements.xslt index a6cd095..77a5c08 100644 --- a/lib/htmltoword/xslt/inline_elements.xslt +++ b/lib/htmltoword/xslt/inline_elements.xslt @@ -11,11 +11,13 @@ - +
+ +
- +
From bd9f06d51a5b4a6884c90ca0a095ba1762e6eeee Mon Sep 17 00:00:00 2001 From: Cristina Matonte Date: Wed, 11 Nov 2015 13:15:11 +0100 Subject: [PATCH 4/6] Update tests --- .../fixtures/wordml/lists_inline_elements.xml | 161 +++++++++++++++--- spec/spec_helper.rb | 3 +- spec/xslt_breaks_spec.rb | 85 +++++++-- spec/xslt_simple_text_style_spec.rb | 14 -- 4 files changed, 212 insertions(+), 51 deletions(-) diff --git a/spec/fixtures/wordml/lists_inline_elements.xml b/spec/fixtures/wordml/lists_inline_elements.xml index 9303ecf..eaf0356 100644 --- a/spec/fixtures/wordml/lists_inline_elements.xml +++ b/spec/fixtures/wordml/lists_inline_elements.xml @@ -15,9 +15,26 @@ and some more text - - - + + + + + + + + + + + + + + + + + + + + Text in a new line in div @@ -39,9 +56,26 @@ ) and some more text - - - + + + + + + + + + + + + + + + + + + + + Text in a new line in div @@ -84,9 +118,26 @@ and again normal - - - + + + + + + + + + + + + + + + + + + + + New paragraph @@ -111,9 +162,26 @@ and normal text - - - + + + + + + + + + + + + + + + + + + + + New paragraph @@ -138,9 +206,26 @@ and normal text - - - + + + + + + + + + + + + + + + + + + + + New paragraph @@ -165,9 +250,26 @@ and normal text - - - + + + + + + + + + + + + + + + + + + + + New paragraph @@ -192,9 +294,26 @@ and normal text - - - + + + + + + + + + + + + + + + + + + + + New paragraph diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index b46ecc2..ba64514 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -10,8 +10,9 @@ def compare_transformed_files(test_file_name, extras: false) def compare_resulting_wordml_with_expected(html, resulting_wordml, extras: false) source = Nokogiri::HTML(html.gsub(/>\s+<")) + cleaned_source = Nokogiri::XSLT(File.open(File.join(Htmltoword.config.default_xslt_path, 'inline_elements.xslt'))).transform(source) xslt = Nokogiri::XSLT(File.open(Htmltoword::Document.xslt_template(extras))) - result = xslt.transform(source) + result = xslt.transform(cleaned_source) result.xpath('//comment()').remove result = remove_declaration(result.to_s) expect(remove_whitespace(result.to_s)).to eq(remove_whitespace(resulting_wordml)) diff --git a/spec/xslt_breaks_spec.rb b/spec/xslt_breaks_spec.rb index 775ee8f..beb746b 100644 --- a/spec/xslt_breaks_spec.rb +++ b/spec/xslt_breaks_spec.rb @@ -99,9 +99,11 @@ Lorem ipsum 4 - - - + + + + + Lorem ipsum 5 @@ -110,9 +112,11 @@ Lorem ipsum 6 - - - + + + + + Lorem ipsum 7 @@ -184,9 +188,26 @@ Text - - - + + + + + + + + + + + + + + + + + + + + new line @@ -238,9 +259,26 @@ Some text - - - + + + + + + + + + + + + + + + + + + + + Text in div @@ -338,9 +376,26 @@ Text - - - + + + + + + + + + + + + + + + + + + + + new line diff --git a/spec/xslt_simple_text_style_spec.rb b/spec/xslt_simple_text_style_spec.rb index 2c0720f..764815f 100644 --- a/spec/xslt_simple_text_style_spec.rb +++ b/spec/xslt_simple_text_style_spec.rb @@ -625,8 +625,6 @@ Text: - - @@ -648,8 +646,6 @@ Strong - - @@ -671,8 +667,6 @@ More bold. - - End @@ -684,8 +678,6 @@ Text: - - @@ -707,8 +699,6 @@ More em text - - @@ -730,8 +720,6 @@ More italic. - - End @@ -844,8 +832,6 @@ Td - - Span From da014df3647788b3f9de69ac1efa4b65f3d17560 Mon Sep 17 00:00:00 2001 From: Cristina Matonte Date: Wed, 11 Nov 2015 18:03:47 +0100 Subject: [PATCH 5/6] Fix indentation on li/divs --- lib/htmltoword/xslt/base.xslt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/htmltoword/xslt/base.xslt b/lib/htmltoword/xslt/base.xslt index 11244e9..7ca870f 100644 --- a/lib/htmltoword/xslt/base.xslt +++ b/lib/htmltoword/xslt/base.xslt @@ -149,7 +149,7 @@ - + From 3dc9999e8db1eeb36c3b5812ea4e38f3fd9fbde4 Mon Sep 17 00:00:00 2001 From: Cristina Matonte Date: Wed, 11 Nov 2015 18:06:04 +0100 Subject: [PATCH 6/6] Update numbering generation Lists can be more complex and the numbering transformation needs to be able to handle this cases and generate all necessary ilvls NOTE: Generates extra definitions for the same level, word picks the first one --- lib/htmltoword/xslt/numbering.xslt | 63 +++++++++---------- .../fixtures/wordml/lists_inline_elements.xml | 14 ++--- spec/xslt_breaks_spec.rb | 6 +- spec/xslt_lists_spec.rb | 57 +++++------------ 4 files changed, 54 insertions(+), 86 deletions(-) diff --git a/lib/htmltoword/xslt/numbering.xslt b/lib/htmltoword/xslt/numbering.xslt index b713165..6ca03cf 100644 --- a/lib/htmltoword/xslt/numbering.xslt +++ b/lib/htmltoword/xslt/numbering.xslt @@ -72,26 +72,22 @@
- - - + + - + - + - - - - - + + - + @@ -99,31 +95,25 @@ - + - - - - - - - - - - - + - - - - - - - + + + + + + + + + + + @@ -165,13 +155,19 @@ - + + + 1 + + + + - + - + @@ -190,5 +186,4 @@ - diff --git a/spec/fixtures/wordml/lists_inline_elements.xml b/spec/fixtures/wordml/lists_inline_elements.xml index eaf0356..331f291 100644 --- a/spec/fixtures/wordml/lists_inline_elements.xml +++ b/spec/fixtures/wordml/lists_inline_elements.xml @@ -33,7 +33,7 @@ - + Text in a new line in div @@ -74,7 +74,7 @@ - + Text in a new line in div @@ -136,7 +136,7 @@ - + New paragraph @@ -180,7 +180,7 @@ - + New paragraph @@ -224,7 +224,7 @@ - + New paragraph @@ -268,7 +268,7 @@ - + New paragraph @@ -312,7 +312,7 @@ - + New paragraph diff --git a/spec/xslt_breaks_spec.rb b/spec/xslt_breaks_spec.rb index beb746b..f12a219 100644 --- a/spec/xslt_breaks_spec.rb +++ b/spec/xslt_breaks_spec.rb @@ -206,7 +206,7 @@ - + new line @@ -277,7 +277,7 @@ - + Text in div @@ -394,7 +394,7 @@ - + new line diff --git a/spec/xslt_lists_spec.rb b/spec/xslt_lists_spec.rb index 0f5d06d..f7fb682 100644 --- a/spec/xslt_lists_spec.rb +++ b/spec/xslt_lists_spec.rb @@ -387,42 +387,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -443,30 +407,39 @@ - - + + + + + - - + + + + + - - + + + + +