From e88c3cf800df0ee4e6693532accdfb66767d7cba Mon Sep 17 00:00:00 2001 From: stefantrapp Date: Wed, 3 Aug 2022 14:37:01 +0200 Subject: [PATCH 1/4] Update parse_xml_v4_file.py Bugfix for ipcr classification --- parsers/parse_xml_v4_file.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parsers/parse_xml_v4_file.py b/parsers/parse_xml_v4_file.py index fe3c35e..b1db94a 100644 --- a/parsers/parse_xml_v4_file.py +++ b/parsers/parse_xml_v4_file.py @@ -379,11 +379,11 @@ def get_patent_classifications(root_tree): classifications_ipcr_list = [] classification_ipcr_tags = ['ipc-version-indicator/date', 'classification-level', 'section', 'class', 'subclass', 'main-group', 'subgroup', 'symbol-position', 'classification-value', 'action-date/date', 'generating-office/country', 'classification-status','classification-data-source'] for classification in classifications_ipcr: - classification_data = {} + classification_data_ipcr = {} for tag in classification_ipcr_tags: if classification.find(tag) != None: - classification_data[tag] = classification.find(tag).text - classifications_ipcr_list.append(classification_data) + classification_data_ipcr[tag] = classification.find(tag).text + classifications_ipcr_list.append(classification_data_ipcr) classification_data['classifications_ipcr_list'] = classifications_ipcr_list if classifications_ipc: classifications_ipc_list = [] From ec6f6052a452be0f034450b37222f8d921c46c5d Mon Sep 17 00:00:00 2001 From: stefantrapp Date: Wed, 3 Aug 2022 19:51:07 +0200 Subject: [PATCH 2/4] Update parse_xml_v4_file.py I believe this is the desires behavior --- parsers/parse_xml_v4_file.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parsers/parse_xml_v4_file.py b/parsers/parse_xml_v4_file.py index b1db94a..e619e1d 100644 --- a/parsers/parse_xml_v4_file.py +++ b/parsers/parse_xml_v4_file.py @@ -101,8 +101,7 @@ def get_patent_identification_data(root_tree): if application_info !=None: application_reference_info = {element.tag: element.text for element in list(application_info)} if application_info.attrib and application_info.attrib['appl-type']: - application_reference_info['application_type'] = application_info.attrib['appl-type'] - document_data = {**document_data,**application_reference_info} + document_data['application_type'] = application_info.attrib['appl-type'] if term_of_grant_info != None: term_of_grant = {} if term_of_grant_length != None: From b73a0a73853c49616e92cb4bd65da3b119e002f0 Mon Sep 17 00:00:00 2001 From: stefantrapp Date: Mon, 8 Aug 2022 12:42:55 +0200 Subject: [PATCH 3/4] remove line --- parsers/parse_xml_v4_file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/parsers/parse_xml_v4_file.py b/parsers/parse_xml_v4_file.py index e619e1d..e965def 100644 --- a/parsers/parse_xml_v4_file.py +++ b/parsers/parse_xml_v4_file.py @@ -99,7 +99,6 @@ def get_patent_identification_data(root_tree): publication_reference_info = {element.tag: element.text for element in list(publication_info)} document_data = {**document_data,**publication_reference_info} if application_info !=None: - application_reference_info = {element.tag: element.text for element in list(application_info)} if application_info.attrib and application_info.attrib['appl-type']: document_data['application_type'] = application_info.attrib['appl-type'] if term_of_grant_info != None: From 5140a312be9cb5a649667b36f788b3e9939c7de2 Mon Sep 17 00:00:00 2001 From: stefantrapp Date: Wed, 10 Aug 2022 09:45:44 +0200 Subject: [PATCH 4/4] remove line --- .idea/uspto-patent-data-parser.iml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .idea/uspto-patent-data-parser.iml diff --git a/.idea/uspto-patent-data-parser.iml b/.idea/uspto-patent-data-parser.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/uspto-patent-data-parser.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file