From cf6a6c8a730afa2724c0c257ccae923f48b0c581 Mon Sep 17 00:00:00 2001 From: Ross Gibb Date: Sun, 16 Jul 2017 14:13:14 -0600 Subject: [PATCH] Adding escape-cell function to escape Excel unicode sequences if the user desires --- src/dk/ative/docjure/spreadsheet.clj | 27 ++++++++++++++++++++++ test/dk/ative/docjure/spreadsheet_test.clj | 20 ++++++++++++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/dk/ative/docjure/spreadsheet.clj b/src/dk/ative/docjure/spreadsheet.clj index 6235ebb..cb1e36d 100644 --- a/src/dk/ative/docjure/spreadsheet.clj +++ b/src/dk/ative/docjure/spreadsheet.clj @@ -252,6 +252,33 @@ (if (= (.getCellType cell) Cell/CELL_TYPE_FORMULA) (.setCellType cell Cell/CELL_TYPE_BLANK)) (.setCellValue cell null)))) +(defn escape-cell + "When inputting a cell's value in an xlsx workbook, Excel allows 16-bit + unicode characters to be input in an escaped form matching this regular + expression: _x[0-9A-F]{4}_. + + For example, the character 'A' can be input as '_x0041_', the + lowercase greek character pi can be input as '_x03C0_'. + + If your data contains cell values that match this form that you do not + want to be interpreted as 16-bit unicode values, the string '_x005F' + needs to be prepended to each occurance. For example, to stop + '_x03C0_' from being interpreted as the character pi you must input + '_x005F_x03C0_'. + + This function will escape all occurances of the Excel 16-bit unicode + escape sequence in the specified cell value. + + (escape-cell \"foo _x0041_ bar _x03C0_\") + => \"foo _x005F_x0041_ bar _x005F_x03C0_\"" + [value] + (if (string? value) + (clojure.string/replace + value + #"(_x[0-9A-F]{4}_)" + "_x005F$1") + value)) + (defn add-row! [^Sheet sheet values] (assert-type sheet Sheet) (let [row-num (if (= 0 (.getPhysicalNumberOfRows sheet)) diff --git a/test/dk/ative/docjure/spreadsheet_test.clj b/test/dk/ative/docjure/spreadsheet_test.clj index bfd5873..bd043e0 100644 --- a/test/dk/ative/docjure/spreadsheet_test.clj +++ b/test/dk/ative/docjure/spreadsheet_test.clj @@ -260,8 +260,24 @@ (is (= 1.0 (.getNumericCellValue a1)))) (testing "should set double" (set-cell! a1 (double 1.2)) - (is (= 1.2 (.getNumericCellValue a1))))))) - + (is (= 1.2 (.getNumericCellValue a1))))) + (testing "set-cell! for unicode" + (testing "regular unicode" + (set-cell! a1 "foo\u220Fbar") + (is (= "foo\u220Fbar" + (.getStringCellValue a1)))) + (testing "unescaped" + (set-cell! a1 "foo_x220F_bar") + (is (= "foo\u220Fbar" + (.getStringCellValue a1)))) + (testing "escaped" + (set-cell! a1 (escape-cell "foo_x220F_bar")) + (is (= "foo_x220F_bar" + (.getStringCellValue a1)))) + (testing "multiple escaped" + (set-cell! a1 (escape-cell "foo _x0041_ bar _x03C0_")) + (is (= "foo _x0041_ bar _x03C0_" + (.getStringCellValue a1))))))) (deftest sheet-seq-test (let [sheet-name "Sheet 1"