diff --git a/pydifact/parser.py b/pydifact/parser.py index f1c7c93..9d2f74d 100644 --- a/pydifact/parser.py +++ b/pydifact/parser.py @@ -52,13 +52,25 @@ def parse( # If there is a UNA, take the following 6 characters # unconditionally, strip them, and make control Characters() # for further parsing - una_found = message[0:3] == "UNA" + + # If it starts by UNA + una_pattern = "UNA" + if message.startswith(una_pattern): + idx_una = 0 + # Otherwise we look for UNA, so to avoid finding "lorem ipsum UNA lorem ipsum" we look for the segment separator following by UNA. + else: + una_pattern = "'UNA" + idx_una = message.find(una_pattern) + una_found = idx_una != -1 if una_found: - characters = Characters.from_str("UNA" + message[3:9]) + idx_begin = idx_una + len(una_pattern) + idx_end = idx_begin + 6 + characters = Characters.from_str(f"UNA{message[idx_begin: idx_end]}") - # remove the UNA segment from the string - message = message[9:].lstrip("\r\n") + # remove the UNA segment from the string, + # ignore everything before UNA because it should be the first segment if una_found. + message = message[idx_end:].lstrip("\r\n") else: # if no UNA header present, use default control characters diff --git a/tests/data/sage_coala.ped b/tests/data/sage_coala.ped new file mode 100644 index 0000000..e92f70a --- /dev/null +++ b/tests/data/sage_coala.ped @@ -0,0 +1 @@ +DOC+01=serveur\grps\dossiers\BLABLApatr+02=9215001+03=04/07/2024+04=FD2401+05=24+06=DGFiP+07=EDITDFC+08=32489+09=99999999800028+10=BLABLApatr+11=BLABLA PATRIMOINE+12=99999999900017++14=FR96999999999+15=corinne+16=isrn+17=01/01/2023+18=31/12/2023+19=240315+++++24=103200+25=JDC+26=contact@BLABLADEXPERT.com+27=++29=++++++35={55D57981-DA87-426D-911E-464844992C9C}'UNA:+,? 'UNB+UNOL:3+99999999800028:5:I+9215001:146+240704:1032+20241861032cor+++++TDT-PED-IN-TD2401'UNG+INFENT+NON_SECURISE_NON_SIGNE+MULTI_DISTRIBUTION+240704:1032+1+UN+D:00B:FD2401'UNH+00001+INFENT:D:00B:UN:FD2401'BGM+IDF:71:211+INFENT BLABLApatr BI RN IS'DTM+242:20240704:102'RFF+AUM:SAGE Experts Comptables'RFF+AUN:COALA EXPORT EDI::1.0:240315'RFF+AUO:2012.01.0310'NAD+DT+999999999:100:107++BLABLA PATRIMOINE+0000 55 Boulevard grbbaf+Bibor++75016'RFF+AWR:20241861032cor'RFF+ACD:IS1'NAD+FR+99999999800028:100:107++CEC_EDI_TDFC:BLABLADEXPERT::::3+0000 323, rue Saint-Martin+Bibor++75003'RFF+ACD:BLABLApatr BLABLA PATRIMOINE SAGE COALA'NAD+MS+99999999800028:100:107++BLABLADEXPERT+0000 323, rue Saint-Martin+Bibor++75003'RFF+ACD:BLABLApatr BLABLA PATRIMOINE'NAD+MR+9215001:100:268++SAGE+0000 10 traoe de transpor+La petropa taratata++92250'RFF+CU:BLABLApatr BLABLA PATRIMOINE'NAD+HP+++DGI_EDI_TDFC:::::3'SEQ++1'IND++F-IDENTIF 2400000000AANAD'NAD+ZZZ+999999999:100:ZZZ++BLABLA PATRIMOINE+0000 55 Boulevard grbbaf+Bibor++75016'SEQ++2'IND++F-IDENTIF 2400000000ABRFF'RFF+ZZZ:7022Z'SEQ++3'IND++F-IDENTIF 2400000000BACCI'CCI+++TCF:LIS:211'CAV+BI:TCF:211'SEQ++4'IND++F-IDENTIF 2400000000BBCCI'CCI+++TRF:LIS:211'CAV+RN:TRF:211'SEQ++5'IND++F-IDENTIF 2400000000BCCCI'CCI+++TBS:LIS:211'CAV+IS:TBS:211'SEQ++6'IND++F-IDENTIF 2400000000BFCCI'CCI+++TDP:LIS:211'CAV+NOR:TDP:211'SEQ++7'IND++F-IDENTIF 2400000000CADTM'DTM+ZZZ:20230101:102'SEQ++8'IND++F-IDENTIF 2400000000CBDTM'DTM+ZZZ:20231231:102'SEQ++9'IND++F-IDENTIF 2400000000CCDTM'DTM+ZZZ:12:802'SEQ++10'IND++F-IDENTIF 2400000000CDDTM'DTM+ZZZ:20221231:102'SEQ++11'IND++F-IDENTIF 2400000000CEDTM'DTM+ZZZ:12:802'SEQ++12'IND++F-IDENTIF 2400000000DACUX'CUX+3:EUR'SEQ++13'IND++F-IDENTIF 2400000000KDRFF'RFF+ZZZ:IS1'SEQ++14'IND++2050 2400000000ATMOA'MOA+ZZZ:1387'SEQ++15'IND++2050 2400000000AUMOA'MOA+ZZZ:870'SEQ++16'IND++2050 2400000000BJMOA'MOA+ZZZ:265563'SEQ++17'IND++2050 2400000000BKMOA'MOA+ZZZ:870'SEQ++18'IND++2050 2400000000BXMOA'MOA+ZZZ:85899'SEQ++19'IND++2050 2400000000CFMOA'MOA+ZZZ:18439'SEQ++20'IND++2050 2400000000CJMOA'MOA+ZZZ:104338'SEQ++21'IND++2050 2400000000COMOA'MOA+ZZZ:369901'SEQ++22'IND++2050 2400000000CUMOA'MOA+ZZZ:264176'SEQ++23'IND++2050 2400000000DSMOA'MOA+ZZZ:517'SEQ++24'IND++2050 2400000000DWMOA'MOA+ZZZ:264176'SEQ++25'IND++2050 2400000000EBMOA'IND++2069RCI 2400010001BBMOA'MOA+ZZZ:2112'UNT+542+00001'UNE+1+1'UNZ+1+20241861032cor' diff --git a/tests/test_sage_coala.py b/tests/test_sage_coala.py new file mode 100644 index 0000000..daddc41 --- /dev/null +++ b/tests/test_sage_coala.py @@ -0,0 +1,47 @@ +# pydifact - a python edifact library +# Copyright (C) 2017-2024 Christian González +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +import os +from pydifact.segmentcollection import Interchange +from pydifact.segments import Segment + + +path = os.path.dirname(os.path.realpath(__file__)) + "/data" + + +def test_sage_coala_file(): + """test parsing a file generated by SAGE COALA""" + interchange = Interchange.from_file("{}/sage_coala.ped".format(path)) + assert interchange + assert interchange.get_header_segment() == Segment( + "UNB", + ["UNOL", "3"], + ["99999999800028", "5", "I"], + ["9215001", "146"], + ["240704", "1032"], + "20241861032cor", + "", + "", + "", + "", + "TDT-PED-IN-TD2401", + ) + assert interchange.get_segment("RFF") == Segment( + "RFF", ["AUM", "SAGE Experts Comptables"] + ) + + +if __name__ == "__main__": + test_sage_coala_file()