From 6daee2d9c0440f9ff42a773e4e346bbbcde2f25b Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Fri, 27 Oct 2023 20:18:03 +0200 Subject: [PATCH] Fixes #567 - Implement ods import --- HISTORY.md | 2 + docs/formats.rst | 11 +++++- src/tablib/formats/_ods.py | 76 +++++++++++++++++++++++++++++++++++++ tests/files/book.ods | Bin 0 -> 8833 bytes tests/test_tablib.py | 25 +++++++++--- 5 files changed, 106 insertions(+), 8 deletions(-) create mode 100644 tests/files/book.ods diff --git a/HISTORY.md b/HISTORY.md index 2c1e59e8..5677b9a1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,8 @@ ### Improvements - The html format now supports importing from HTML content (#243) +- The ods format now supports importing from .ods files (#567). The support is + still a bit experimental. ### Changes diff --git a/docs/formats.rst b/docs/formats.rst index 636f7309..178edbda 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -145,12 +145,19 @@ If a title has been set, it will be exported as the table caption. ods === -Export data in OpenDocument Spreadsheet format. The ``ods`` format is currently -export-only. +Import/export data in OpenDocument Spreadsheet format. + +.. versionadded:: 3.6.0 + + Import functionality was added. This format is optional, install Tablib with ``pip install "tablib[ods]"`` to make the format available. +The ``import_set()`` method also supports a ``skip_lines`` parameter that you +can set to a number of lines that should be skipped before starting to read +data. + .. admonition:: Binary Warning :class:`Dataset.ods` contains binary data, so make sure to write in binary mode:: diff --git a/src/tablib/formats/_ods.py b/src/tablib/formats/_ods.py index ec618f67..853f8846 100644 --- a/src/tablib/formats/_ods.py +++ b/src/tablib/formats/_ods.py @@ -2,10 +2,13 @@ """ import numbers +from datetime import date, datetime from io import BytesIO from odf import opendocument, style, table, text +import tablib + bold = style.Style(name="bold", family="paragraph") bold.addElement(style.TextProperties( fontweight="bold", @@ -49,6 +52,73 @@ def export_book(cls, databook): wb.save(stream) return stream.getvalue() + @classmethod + def import_sheet(cls, dset, sheet, headers=True, skip_lines=0): + """Populate dataset `dset` with sheet data.""" + + dset.title = sheet.getAttribute('name') + + for i, row in enumerate(sheet.childNodes): + if i < skip_lines: + continue + row_vals = [cls.read_cell(cell) for cell in row.childNodes] + if i == skip_lines and headers: + dset.headers = row_vals + else: + if i > skip_lines and len(row_vals) < dset.width: + row_vals += [''] * (dset.width - len(row_vals)) + dset.append(row_vals) + + @classmethod + def read_cell(cls, cell): + if not cell.childNodes: + if data := getattr(cell, 'data', None): + return data + value_type = cell.getAttribute('valuetype') + value = cell.getAttribute('value') + if value_type == 'float': + return float(value) + if value_type == 'date': + if 'T' in value: + return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S") + else: + return datetime.strptime(value, "%Y-%m-%d").date() + else: + msg = f"value_type {value_type} not handled" + raise ValueError(msg) + + for subnode in cell.childNodes: + value = cls.read_cell(subnode) + if value: + return value + + @classmethod + def import_set(cls, dset, in_stream, headers=True, skip_lines=0): + """Populate dataset `dset` from ODS stream.""" + + dset.wipe() + + ods_book = opendocument.load(in_stream) + for sheet in ods_book.spreadsheet.childNodes: + if sheet.qname[1] == 'table': + sheet = ods_book.spreadsheet.childNodes[0] + cls.import_sheet(dset, sheet, headers, skip_lines) + + @classmethod + def import_book(cls, dbook, in_stream, headers=True): + """Populate databook `dbook` from ODS stream.""" + + dbook.wipe() + + ods_book = opendocument.load(in_stream) + + for sheet in ods_book.spreadsheet.childNodes: + if sheet.qname[1] != 'table': + continue + dset = tablib.Dataset() + cls.import_sheet(dset, sheet, headers) + dbook.add_sheet(dset) + @classmethod def dset_sheet(cls, dataset, ws): """Completes given worksheet from given Dataset.""" @@ -66,6 +136,12 @@ def dset_sheet(cls, dataset, ws): for j, col in enumerate(row): if isinstance(col, numbers.Number): cell = table.TableCell(valuetype="float", value=col) + elif isinstance(col, datetime): + cell = table.TableCell( + valuetype="date", value=col.strftime('%Y-%m-%dT%H:%M:%S') + ) + elif isinstance(col, date): + cell = table.TableCell(valuetype="date", value=col.strftime('%Y-%m-%d')) else: cell = table.TableCell(valuetype="string") cell.addElement(text.P(text=str(col), stylename=style)) diff --git a/tests/files/book.ods b/tests/files/book.ods new file mode 100644 index 0000000000000000000000000000000000000000..a26976805478cf9a8dbd28f51acd15fe94a43162 GIT binary patch literal 8833 zcmcIq2Q-{p*B%nldlw~o5)49!5`-v;UPAObj5ZjfMhJqa5u!wIK?o9M)DS&-8NEjd zW0dHff84wBxw&6*|Nmd>`?mG2Iq$p9e)c(MzwbW#?1!4-rOOlm01f~k`$AtQz)mCt z2mk=iFZ3gTy|q0E=HUo3b#%0|HaCS?J3x3{Ar?RfQ)g>upo1d_V&P!!Vh@7AfXWCud~)8U6ZaBy(`c%9Ie{v`zHmc1#& z+7jdp141nFl^ZPzfHYa9Ke1 zZs?@wyR>3FQ@F;bAo#UczEa`1O6|{K{`6HUip>JCTk0b9>8rD?CZ7=g8R912xgvT- zrd|+aFBr{y`mko;KkXF*_FOafbU`$4OtZc=NxHSBx?C;hakh90>^Y;IEh=@rZzt8A zX>}O6Sh~sdNWMr-5gVs;58RK10RZ@40s#K}27a}Ce|(J~m?_ZR-Y#6T12WD>QnSZ( zSx6_CxGh#zfuL7L^@d&=QEwu1^wk(1WjYGVN+099$ypoe!mEy!R#p#U_Tr+0Q|imd zM)CTdr{TPZJsi)LetFchK{Odk{Pu}wKUoE|Sj?Aj16P`XP-A?vg$s0ZTj04sYsLCe zmMZz;)lQrTPX3P=miKjK&PaJe)D6Dnh^Y>LBFiTLYf%3|YPnUCK%Bs&GVQ)nj3!9umMAqq88=aqmR0q%G9=i$$Jgzn7 z5tlWaT#JA7TGI@fPIQTt5svLg8r$-^iF+Espeme1`5>iVa6Aj9IeRrk`m-@Y)qee^75k4}yqDQ)S4Vdh?u)XwdPK2%G^{a}t^$LYs#i@DWs7ivO5EzCt|YyxR!tVVX{2m)*mY9GTT0mY z!oK%b!gn<$!lr%~A*~nbw69G%LsqFe8RRo5*>OV2>MEJMP{t%a80Q|v`06K}g!brz zfAlCG`h#$WdDwxR&wbjgtM4!_MBzPM#7AM`5MoiiO2+H-p$sLOh8rj}&WN9oVNv+{ zdL%4qd-$cBVlkV5O0~~W^z<@HY&$xVj zu4KvPpb@LfnV^p)(@`_yVUvM4NfW0=T=Tel`t`VihGaH=e5SV$_<+5H##hY(*2|M1d7axKqmnRV*1#AXk^(qb7rd4T8Jwxon#qQbG&`&5!o(5cHBDJl zoky3W4maq;R0VU}OmcYxVsi8adc(R(9Ctj120&#r!CNj3YRYfETTD>aYfVsIN(Kx+ z!-~$R=aCz1!P3MNtyN@`$}MgXa>2cuQL&0deCQR_wQr|1tDk5auQv)S`n2)P(V#+Z{BSq1j*h4BiE8dV%)mur6{a$$ZBi&oc0%jE43lKnQlwQ-}fvx zX|*Av9LiN?E1og1~$LHFw<%kt%3m_ZF_y-2!FFK+;llAf|-R4`kjtXjZdv; zPVZJ|{AVbfpCvd}8_Zx8kFWNwI$QjmQ6se6czI-8XTX=p&QK*he$JuYNDu#$oYW&b zDdsq6Hg(zxLA^z%gXDd~l8^6Q)m&^rLF#aZ7I!}B#iq8}g5)MoXTNgcBZ)ze`@JU^ zMT~=}Va4GJ)M)5wMPJfhWMvH>kAqlU^M}#ZM8V}}9J_f6!O%b<$SejOxbnT&}M zlTWeNmBjlT4cQypnMl-1c3x~Y4QDZx&cWx>8xWK-x#C2s8Yf8Yv6Ywt!4k_W{tt33AyTaA}*apufNqbpIXuc)y()Rf7Hu2-VP17UBjJqo~OMthrF}x+) z9RqJ-2#v|Vla!N0Bh@=ahUDXI8F8#8w-Y~2a7~mYkugQ@*@>@@D$d`%+EJ}j(eY$A zzQzGWuWD2Oil{CflL4fUz2Mev3){8Dni=+c_v(ggS4s!*bFovYdt6epl)+em{0v_eS@tIT>RS z^ns17!)X&biL%f4M*YLO#i0`BRm)%Gz2$GK#Rj{E8LTCYVtk>pTE2@pf+PBD$sMoz ztOQ`zn3_N5og*mL4ktv#r}(Sw87SChArpnI>1uU_!K{zxc6X&|M>W~lGPkx`LEBvJ zPTpET-%+VttG6?Drx@==U7w>Yw@)$-!};5dYVi#svA5cGI88dm`EE$q_a0$Mzd3bQ zo<4hG#(L>J2E9n#n7vr`{gvo1tPkg3Bw+{YByKsCpva-#O%pJ$?ybJ0Mc!oRY{{>| zUeaF2%!kP25wW$qbC0u%B`m&>&*ZwH&wo2=KF;$vpuXZ!;XbZ{>O-H-A{eG z;|#Gcfy-q{MpxcPlKDZ)ri?78CXEc$nFofSzVj)MBapX{@i@4SRq#aGE+~4%=>w}r z>Lae}7<(Mz!>;PObWhF(Evj>9I$OATx8r2WPCABhXty_)mzP zZ4-NqVopw05JwRr|AR%Q#OS8Nt-!Ndu7gXae0O-?Y)iL{O|PAnjuGF;FFI0Ea#gYh zZnYxKgi=)=Gb*-kZ95&V?hwNyPpVF>*`uGga%bEE3dCNe=$QCCjSF5By-9)iE*mDG zktZH?-f=zlM-n9b21{6eUc#fVN_iA@rvU{7v~b}H z*_;p!CtQZkMP?O#_P{g-tHGXv7H`|)P`c2dAncVa2CXd&1paZ)7DBNGM0-zsIB=d{ z%%*Q#kKxX%n}|#MfZ^WJP-(4+}3Vv?agQqYMz9d#q=vhD^ ztmGok<9_<|&c^#vF|cf2zP?`P$7Xk>*?c84mf7xoQoYv??~sJ^lXpT4=9=yLV&Vq< zp|%Zg`wKDHiaV_ht6h#qYE44)Nl8_BL_f~7Dku8WJRKrhu}x%6O>ECz)a97dB-r-r z=Jd_1OED)N@jWJ|M|GH`PafWXhlh0Ei9D>zW_8bmytb(jx*}2?Tp|3C#@r;a2;Jz;R zw&x>qR4nIC`NGDXljIY{Qy?cVTM)*T;J$`iK%$ojV+H0N2gYopcoh{TJ?@9PL|6S7 zbBM=2tX$S`ByThrBW*VPIEzY3xpRMtVcf;&?gR~la`=$ywm3zrW%6sehO`BKsB0CV zdOAA13EC%O8(&=-v`h|)Rn=GaA*GDORHE(PW_#5q$G^#7PxD%grnFE$l(1%Yp!?W_ zYGShW&D?rlWed-~IHV59G}WL?*sR=^Cr_a5rCs&~2%%Li?}ilZ&wvcaQ;YL8t|mNR`t4vIxo+;vzNTr?P7Z2koPvhB#30zTnWxZR~HFoX1xCAW`dUt);OcW8I5h9#lj;#gz&;Z*qoAOKgHpJ>dzE6uHt66>hMdF0b*G+oMTELa&ffysN9 zQc9)(LSd)%BsNZs}!NqqB<$7a+TRMSOKhA!Y(Vjy*MLxXR6|`L53>GFAt7W~x4=a3c5B%$!ZuS{9*}j50NW8Q#lh)n{CaW$L|Q zK7e?X8%MtPgmC9Y~NJ;`aAfUyHH(QTU(x+uyG2~ z)T=Ec@2nooX=gfVs1~K_bLz-4jECKFvfDbV?k!DT6CxV_>c%T%{=mm(LW-pI8`xP) z$Q2pck<3Zdv6w5Odt$WfF_WN*wN3E>H?Vs*T{mbuQgIG;m3oKi-qd9;d;xRp1A)CR z7W~8EZ==YmrAsqgve}EAIRzO@WzPa`1D`(jt+U0A9WAd%$StxR7zi=sriR_^VS9jxPR6)n=9 zUWQz+pcN1anjGO(BX$3J*#o!MrD8T=v>{^amZ$23O{ICLXF!XI8uNFZCKKuFqeXVa zz1SQXh67D~mYH@0BlNRl-jpZr*($0(FRX{G_qR}}1EN)ib8#M01#MrCpHVddt${TX zaSf&#k~~o2`T`uVh>u~3MVfQ@UITf7FCPZh0GKE(moG6f>@R(mKNFK^f06{w%Wz4` zEua^PYtMuF(xlB!tI=^7sHT%>WnyAlm)#hY)WOuQ8(cK?YMi)|*VbjlF^C?)-0l`hFZ+jpL_}xVqtA#3Dq{G>J7rM zme(Wflp@tJei=v!VJFzx`4-vs7Oy<(`RQf$=we3MegXA{9&Lh8p)b6O+D5DVY8Ey( zB(|+<_wi5YIe0z7prM^inq|zSL0CP}m2?A~RbQsIa;aNrK9UTCH#{q5ADcYM?;Tch z&;GhHAEKB&Bx@j{aLV#sGIQQ-lw65Qoi8R#zc+>dSmyBE=j0Xq0U>DX)KJ*M30tDR z0K9xm<22&H7LyIk)qm|E; z2yCcM*AA1!mAu8L9D^s==UJa*N?fn`WdH!o{v$a3D9Y+;9M83)^TG4$yi}+KcCj~u zm|EL8^TK{KfsPQXU^NwaLOicT;nApnd+8-nN`OEo1eSxih! z0s;auGP0{zud=eT-n@BJKtMoTTwGdOT2WC^OH0eZz`)$x+}76C+1c6C)6?JIKP)UP zIyyQrF)iwc{5D~z zS?__O?K}B<`A_gRtMWLIFSA9HQJ(G~qwK~924NAV5_D7zH^rXIG7^shSBjtb!s+AU zGDXJ;eb%K7O)>h$Fl)D;>~=y? zUBdtm-h`{SOoA)=_A+;!B$^j?33%hIwNUuTlQu&M?&ZSA(=X{{1cF=Rp4ZR^^??@M znRd7)yOz}^3fC&XFhh9pm*OE~r4-R^+||qh?)3JE9_5s~2DU|#y^zDRLSg9_E@PyU zDn$G>N=NL57Km}KVRh8XLa`!KXrbbluZY~~XI{yV%08V>JP5hFWk3`voRxT+IaNML zaJ{jnYAxc(>#^@*W48OTHXWqZV~q8%EJ`AG#cHcqA%AynUUNe_YFX}U|*NLZ8W*@#rthK|?^jn9^?s;>2j&VO}UEb6gz`VA>gd$Mdt_3CWcxbJ)RhyQ$Rn(E%ML6O))7Q~ zogHyB2s!>X<5uO#lu#RWNw2M!nuHzVQ{?<3QI9jK@zatx zYx1#fs@2Kd4ZhSI=OA@N`5ds|5CNsx37zc8JNtgJ@$O0SDdqfkPM@>t^|O=VM&j15 zamo_M<&jvl#n)s4xxYj*PU-FvnUMIGz&wq~L1IKFoS!HZu7 zoy<8oBR6QvGBlpz`e{A`2k|Uq3AeznSv}{suFiw#@D1HFY@a*eFe!F+QG1E2fnLVmEq zf7iO`@n@oPaj%7L{+{>!+4hCj-%&z-fpWnE{~qO>DZP--=;q(Y@>gE?|BAE!3!K05 z#J|TmCs8ki5AB108_!R?@$Zq&)4>a|`32GikNkU-pVuetHz+@N<=^9+Gs_n;@(Y}Q zJUf2r^OUGcBSg`X$5Px5p9 z;)mw%juRIqW6%NicZZ7~C_k3zuhHW?E?-C#+H=2*(rSuW=o1V8K#KkYp@Ys9#`C-X E0fVp6TmS$7 literal 0 HcmV?d00001 diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 47659f84..ab3356c7 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1107,13 +1107,26 @@ def test_tsv_export(self): class ODSTests(BaseTestCase): - def test_ods_export_datatypes(self): + def test_ods_export_import_set(self): + date = datetime.date(2019, 10, 4) date_time = datetime.datetime(2019, 10, 4, 12, 30, 8) - data.append(('string', '004', 42, 21.55, Decimal('34.5'), date_time)) - data.headers = ('string', 'start0', 'integer', 'float', 'decimal', 'date/time') - # ODS is currently write-only, just test that output doesn't crash. - assert data.ods is not None - assert len(data.ods) + data.append(('string', '004', 42, 21.55, Decimal('34.5'), date, date_time)) + data.headers = ('string', 'start0', 'integer', 'float', 'decimal', 'date', 'date/time') + _ods = data.ods + data.ods = _ods + self.assertEqual(data.dict[0]['string'], 'string') + self.assertEqual(data.dict[0]['start0'], '004') + self.assertEqual(data.dict[0]['integer'], 42) + self.assertEqual(data.dict[0]['float'], 21.55) + self.assertEqual(data.dict[0]['decimal'], 34.5) + self.assertEqual(data.dict[0]['date'], date) + self.assertEqual(data.dict[0]['date/time'], date_time) + + def test_ods_import_book(self): + ods_source = Path(__file__).parent / 'files' / 'book.ods' + with ods_source.open('rb') as fh: + dbook = tablib.Databook().load(fh, 'ods') + self.assertEqual(len(dbook.sheets()), 2) class XLSTests(BaseTestCase):