forked from tealeg/xlsx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlib.go
352 lines (329 loc) · 9.55 KB
/
lib.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
package xlsx
import (
"archive/zip"
"encoding/xml"
"errors"
"fmt"
"io"
"math"
"strconv"
"strings"
)
// XLSXReaderError is the standard error type for otherwise undefined
// errors in the XSLX reading process.
type XLSXReaderError struct {
Err string
}
// String() returns a string value from an XLSXReaderError struct in
// order that it might comply with the os.Error interface.
func (e *XLSXReaderError) Error() string {
return e.Err
}
// Cell is a high level structure intended to provide user access to
// the contents of Cell within an xlsx.Row.
type Cell struct {
data string
}
// CellInterface defines the public API of the Cell.
type CellInterface interface {
String() string
}
func (c *Cell) String() string {
return c.data
}
// Row is a high level structure indended to provide user access to a
// row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
type Row struct {
Cells []*Cell
}
// Sheet is a high level structure intended to provide user access to
// the contents of a particular sheet within an XLSX file.
type Sheet struct {
Rows []*Row
}
// File is a high level structure providing a slice of Sheet structs
// to the user.
type File struct {
worksheets map[string]*zip.File
referenceTable []string
Sheets []*Sheet
}
// getRangeFromString is an internal helper function that converts
// XLSX internal range syntax to a pair of integers. For example,
// the range string "1:3" yield the upper and lower intergers 1 and 3.
func getRangeFromString(rangeString string) (lower int, upper int, error error) {
var parts []string
parts = strings.SplitN(rangeString, ":", 2)
if parts[0] == "" {
error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
}
if parts[1] == "" {
error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
}
lower, error = strconv.Atoi(parts[0])
if error != nil {
error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
}
upper, error = strconv.Atoi(parts[1])
if error != nil {
error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
}
return lower, upper, error
}
// positionalLetterMultiplier gives an integer multiplier to use for a
// position in a letter based column identifer. For example, the
// column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
// 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
// ((26**2)*1)+((26**1)*2)+((26**0))*1
func positionalLetterMultiplier(extent, pos int) int {
var result float64
var power float64
var offset int
offset = pos + 1
power = float64(extent - offset)
result = math.Pow(26, power)
return int(result)
}
// lettersToNumeric is used to convert a character based column
// reference to a zero based numeric column identifier.
func lettersToNumeric(letters string) int {
var sum int = 0
var shift int
extent := len(letters)
for i, c := range letters {
// Just to make life akward. If we think of this base
// 26 notation as being like HEX or binary we hit a
// nasty little problem. The issue is that we have no
// 0s and therefore A can be both a 1 and a 0. The
// value range of a letter is different in the most
// significant position if (and only if) there is more
// than one positions. For example:
// "A" = 0
// 676 | 26 | 0
// ----+----+----
// 0 | 0 | 0
//
// "Z" = 25
// 676 | 26 | 0
// ----+----+----
// 0 | 0 | 25
// "AA" = 26
// 676 | 26 | 0
// ----+----+----
// 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
if i == 0 && extent > 1 {
shift = 1
} else {
shift = 0
}
multiplier := positionalLetterMultiplier(extent, i)
switch {
case 'A' <= c && c <= 'Z':
sum += multiplier * (int((c - 'A')) + shift)
case 'a' <= c && c <= 'z':
sum += multiplier * (int((c - 'a')) + shift)
}
}
return sum
}
// letterOnlyMapF is used in conjunction with strings.Map to return
// only the characters A-Z and a-z in a string
func letterOnlyMapF(rune rune) rune {
switch {
case 'A' <= rune && rune <= 'Z':
return rune
case 'a' <= rune && rune <= 'z':
return rune - 32
}
return -1
}
// intOnlyMapF is used in conjunction with strings.Map to return only
// the numeric portions of a string.
func intOnlyMapF(rune rune) rune {
if rune >= 48 && rune < 58 {
return rune
}
return -1
}
// getCoordsFromCellIDString returns the zero based cartesian
// coordinates from a cell name in Excel format, e.g. the cellIDString
// "A1" returns 0, 0 and the "B3" return 1, 2.
func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
if error != nil {
return x, y, error
}
y -= 1 // Zero based
x = lettersToNumeric(letterPart)
return x, y, error
}
// makeRowFromSpan will, when given a span expressed as a string,
// return an empty Row large enough to encompass that span and
// populate it with empty cells. All rows start from cell 1 -
// regardless of the lower bound of the span.
func makeRowFromSpan(spans string) *Row {
var error error
var upper int
var row *Row
var cell *Cell
row = new(Row)
_, upper, error = getRangeFromString(spans)
if error != nil {
panic(error)
}
error = nil
row.Cells = make([]*Cell, upper)
for i := 0; i < upper; i++ {
cell = new(Cell)
cell.data = ""
row.Cells[i] = cell
}
return row
}
// getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
// Note - this is not actually general enough - we should support retaining tabs and newlines.
func getValueFromCellData(rawcell xlsxC, reftable []string) string {
var value string = ""
var data string = rawcell.V
if len(data) > 0 {
vval := strings.Trim(data, " \t\n\r")
if rawcell.T == "s" {
ref, error := strconv.Atoi(vval)
if error != nil {
panic(error)
}
value = reftable[ref]
} else {
value = vval
}
}
return value
}
// readRowsFromSheet is an internal helper function that extracts the
// rows from a XSLXWorksheet, poulates them with Cells and resolves
// the value references from the reference table and stores them in
func readRowsFromSheet(Worksheet *xlsxWorksheet, reftable []string) []*Row {
var rows []*Row
var row *Row
rows = make([]*Row, len(Worksheet.SheetData.Row))
for i, rawrow := range Worksheet.SheetData.Row {
row = makeRowFromSpan(rawrow.Spans)
for _, rawcell := range rawrow.C {
x, _, error := getCoordsFromCellIDString(rawcell.R)
if error != nil {
panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
}
row.Cells[x].data = getValueFromCellData(rawcell, reftable)
}
rows[i] = row
}
return rows
}
// readSheetsFromZipFile is an internal helper function that loops
// over the Worksheets defined in the XSLXWorkbook and loads them into
// Sheet objects stored in the Sheets slice of a xlsx.File struct.
func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, error) {
var workbook *xlsxWorkbook
var error error
var rc io.ReadCloser
var decoder *xml.Decoder
workbook = new(xlsxWorkbook)
rc, error = f.Open()
if error != nil {
return nil, error
}
decoder = xml.NewDecoder(rc)
error = decoder.Decode(workbook)
if error != nil {
return nil, error
}
sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
for i, rawsheet := range workbook.Sheets.Sheet {
worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
if error != nil {
return nil, error
}
sheet := new(Sheet)
sheet.Rows = readRowsFromSheet(worksheet, file.referenceTable)
sheets[i] = sheet
}
return sheets, nil
}
// readSharedStringsFromZipFile() is an internal helper function to
// extract a reference table from the sharedStrings.xml file within
// the XLSX zip file.
func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
var sst *xlsxSST
var error error
var rc io.ReadCloser
var decoder *xml.Decoder
var reftable []string
rc, error = f.Open()
if error != nil {
return nil, error
}
sst = new(xlsxSST)
decoder = xml.NewDecoder(rc)
error = decoder.Decode(sst)
if error != nil {
return nil, error
}
reftable = MakeSharedStringRefTable(sst)
return reftable, nil
}
// OpenFile() take the name of an XLSX file and returns a populated
// xlsx.File struct for it.
func OpenFile(filename string) (x *File, e error) {
var f *zip.ReadCloser
var error error
var file *File
var v *zip.File
var workbook *zip.File
var sharedStrings *zip.File
var reftable []string
var worksheets map[string]*zip.File
f, error = zip.OpenReader(filename)
if error != nil {
return nil, error
}
file = new(File)
worksheets = make(map[string]*zip.File, len(f.File))
for _, v = range f.File {
switch v.Name {
case "xl/sharedStrings.xml":
sharedStrings = v
case "xl/workbook.xml":
workbook = v
default:
if len(v.Name) > 12 {
if v.Name[0:13] == "xl/worksheets" {
worksheets[v.Name[14:len(v.Name)-4]] = v
}
}
}
}
file.worksheets = worksheets
reftable, error = readSharedStringsFromZipFile(sharedStrings)
if error != nil {
return nil, error
}
if reftable == nil {
error := new(XLSXReaderError)
error.Err = "No valid sharedStrings.xml found in XLSX file"
return nil, error
}
file.referenceTable = reftable
sheets, error := readSheetsFromZipFile(workbook, file)
if error != nil {
return nil, error
}
if sheets == nil {
error := new(XLSXReaderError)
error.Err = "No sheets found in XLSX File"
return nil, error
}
file.Sheets = sheets
f.Close()
return file, nil
}