Skip to content

Commit

Permalink
Merge pull request #65 from rossgibb/issue-61
Browse files Browse the repository at this point in the history
Add escape-cell function to escape Excel unicode sequences if desired
  • Loading branch information
mjul authored Jun 17, 2020
2 parents 3cfdbf7 + cf6a6c8 commit 5f43246
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
27 changes: 27 additions & 0 deletions src/dk/ative/docjure/spreadsheet.clj
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,33 @@
(if (= (.getCellType cell) Cell/CELL_TYPE_FORMULA) (.setCellType cell Cell/CELL_TYPE_BLANK))
(.setCellValue cell null)))

(defn escape-cell
"When inputting a cell's value in an xlsx workbook, Excel allows 16-bit
unicode characters to be input in an escaped form matching this regular
expression: _x[0-9A-F]{4}_.
For example, the character 'A' can be input as '_x0041_', the
lowercase greek character pi can be input as '_x03C0_'.
If your data contains cell values that match this form that you do not
want to be interpreted as 16-bit unicode values, the string '_x005F'
needs to be prepended to each occurance. For example, to stop
'_x03C0_' from being interpreted as the character pi you must input
'_x005F_x03C0_'.
This function will escape all occurances of the Excel 16-bit unicode
escape sequence in the specified cell value.
(escape-cell \"foo _x0041_ bar _x03C0_\")
=> \"foo _x005F_x0041_ bar _x005F_x03C0_\""
[value]
(if (string? value)
(clojure.string/replace
value
#"(_x[0-9A-F]{4}_)"
"_x005F$1")
value))

(defn add-row! [^Sheet sheet values]
(assert-type sheet Sheet)
(let [row-num (if (= 0 (.getPhysicalNumberOfRows sheet))
Expand Down
20 changes: 18 additions & 2 deletions test/dk/ative/docjure/spreadsheet_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,24 @@
(is (= 1.0 (.getNumericCellValue a1))))
(testing "should set double"
(set-cell! a1 (double 1.2))
(is (= 1.2 (.getNumericCellValue a1)))))))

(is (= 1.2 (.getNumericCellValue a1)))))
(testing "set-cell! for unicode"
(testing "regular unicode"
(set-cell! a1 "foo\u220Fbar")
(is (= "foo\u220Fbar"
(.getStringCellValue a1))))
(testing "unescaped"
(set-cell! a1 "foo_x220F_bar")
(is (= "foo\u220Fbar"
(.getStringCellValue a1))))
(testing "escaped"
(set-cell! a1 (escape-cell "foo_x220F_bar"))
(is (= "foo_x220F_bar"
(.getStringCellValue a1))))
(testing "multiple escaped"
(set-cell! a1 (escape-cell "foo _x0041_ bar _x03C0_"))
(is (= "foo _x0041_ bar _x03C0_"
(.getStringCellValue a1)))))))

(deftest sheet-seq-test
(let [sheet-name "Sheet 1"
Expand Down

0 comments on commit 5f43246

Please sign in to comment.