From 434719fe7b3e0614c7268925c9a1cacda809a73a Mon Sep 17 00:00:00 2001 From: ajstanley Date: Fri, 25 Feb 2022 15:01:26 -0400 Subject: [PATCH 1/6] Added additional mapping --- workbench_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/workbench_utils.py b/workbench_utils.py index 24769616..73eedd11 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -4040,6 +4040,7 @@ def get_extension_from_mimetype(mimetype): 'image/jp2': '.jp2', 'image/png': '.png', 'audio/mpeg': '.mp3', + 'audio/mp3': '.mp3', 'text/plain': '.txt', 'application/octet-stream': '.bin' } From 0b91f47dbae5cb87c5ed447c1a1a7a5c12598a18 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Sun, 27 Feb 2022 12:59:09 -0400 Subject: [PATCH 2/6] Allow additions and overrides for mimetype/extension mapping. --- WorkbenchConfig.py | 18 ++++++++++++++++++ workbench_utils.py | 21 +++++++++------------ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py index 3ae59bd7..642b9930 100644 --- a/WorkbenchConfig.py +++ b/WorkbenchConfig.py @@ -26,6 +26,12 @@ def __init__(self, args): def get_config(self): config = self.get_default_config() user_mods = self.get_user_config() + # Allow extension additions and overrides. + if user_mods.get('mimetype_extensions'): + for candidate in user_mods.get('mimetype_extensions'): + for mimetype, extension in candidate.items(): + config['mimetype_extensions'][mimetype] = extension + del user_mods['mimetype_extensions'] # Blend defaults with user mods for key, value in user_mods.items(): config[key] = value @@ -87,6 +93,17 @@ def get_media_types(self): {'extracted_text': ['txt']} ] + # Returns default file extensions for mimetypes + def get_default_extensions(self): + return {'image/jpeg': '.jpg', + 'image/jp2': '.jp2', + 'image/png': '.png', + 'audio/mpeg': '.mp3', + 'audio/mp3': '.mp3', + 'text/plain': '.txt', + 'application/octet-stream': '.bin' + } + # Returns default configs, to be updated by user-supplied config. def get_default_config(self): return { @@ -143,6 +160,7 @@ def get_default_config(self): 'paged_content_sequence_separator': '-', 'media_bundle_file_fields': self.get_media_fields(), 'media_fields': self.get_media_fields(), + 'mimetype_extensions': self.get_default_extensions(), } # Tests validity and existence of path. diff --git a/workbench_utils.py b/workbench_utils.py index 73eedd11..9a05be04 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -2003,7 +2003,7 @@ def create_media(config, filename, file_fieldname, node_id, node_csv_row, media_ media_type = set_media_type(config, filename, file_fieldname, node_csv_row) media_bundle_response_code = ping_media_bundle(config, media_type) if media_bundle_response_code == 404: - message = 'File "' + file_check_row[filename_field] + '" identified in CSV row ' + file_check_row[config['id_field']] + \ + message = 'File "' + node_csv_row[file_fieldname] + '" identified in CSV row ' + node_csv_row[config['id_field']] + \ ' will create a media of type (' + media_type + '), but that media type is not configured in the destination Drupal.' logging.error(message) return False @@ -4032,22 +4032,19 @@ def get_csv_from_excel(config): csv_writer_file_handle.close() -def get_extension_from_mimetype(mimetype): +def get_extension_from_mimetype(config, mimetype): # mimetypes.add_type() is not working, e.g. mimetypes.add_type('image/jpeg', '.jpg') # Maybe related to https://bugs.python.org/issue4963? In the meantime, provide our own # MIMETYPE to extension mapping for common types, then let mimetypes guess at others. - map = {'image/jpeg': '.jpg', - 'image/jp2': '.jp2', - 'image/png': '.png', - 'audio/mpeg': '.mp3', - 'audio/mp3': '.mp3', - 'text/plain': '.txt', - 'application/octet-stream': '.bin' - } + map = config['mimetype_extensions'] if mimetype in map: return map[mimetype] else: - return mimetypes.guess_extension(mimetype) + extension = mimetypes.guess_extension(mimetype) + if (extension): + return mimetypes.guess_extension(mimetype) + else: + print (f"There is no mapping available for {mimetype}") def get_deduped_file_path(path): @@ -4194,7 +4191,7 @@ def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id = No except KeyError: mimetype = 'application/octet-stream' - extension_with_dot = get_extension_from_mimetype(mimetype) + extension_with_dot = get_extension_from_mimetype(config, mimetype) downloaded_file_path = os.path.join(subdir, filename + extension_with_dot) # Check to see if a file with this path already exists; if so, insert an From b9357de13e487c41ce02da3a5bd6b823ab7c3a57 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Sun, 27 Feb 2022 13:02:18 -0400 Subject: [PATCH 3/6] Coding standards --- workbench_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workbench_utils.py b/workbench_utils.py index 9a05be04..c88793d0 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -4044,7 +4044,7 @@ def get_extension_from_mimetype(config, mimetype): if (extension): return mimetypes.guess_extension(mimetype) else: - print (f"There is no mapping available for {mimetype}") + print(f"There is no mapping available for {mimetype}") def get_deduped_file_path(path): @@ -4119,7 +4119,7 @@ def check_file_exists(config, filename): return True -def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id = None): +def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id=None): """For remote/downloaded files, generates the path to the local temporary copy and returns that path. For local files, just returns the value of node_csv_row['file']. From cd016c530228c2895781a9789f8a272aeedbda93 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Sun, 6 Mar 2022 15:50:48 -0400 Subject: [PATCH 4/6] Made dictionary treatment more consistent --- i7Import/i7ImportUtilities.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 51d5f970..6f40d7de 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -144,9 +144,8 @@ def get_default_metadata_solr_request(self): model = self.config['content_model'] query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info:fedora/{model}"' if self.config['solr_filters']: - for filter in self.config['solr_filters']: - for key, value in filter.items(): - query = f'{query}&fq={key}:"{value}"' + for key, value in self.config['solr_filters'].items(): + query = f'{query}&fq={key}:"{value}"' # Get the populated CSV from Solr, with the object namespace and field list filters applied. return query @@ -196,6 +195,18 @@ def print_config(self): table.add_column("Parameter", justify="left") table.add_column("Value", justify="left") for key, value in self.config.items(): + test = str(type(value)) + if test == '': + new_value = '' + for k, v in value.items(): + new_value += f"{k}: {v}\n" + value = new_value + if test == '': + new_value = '' + for candidate in value: + for k, v in candidate.items(): + new_value += f"{k}: {v}\n" + value = new_value table.add_row(key, str(value)) console = Console() console.print(table) From 60d0baab945823ab3c0400fa92e1b2759744f355 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Mon, 7 Mar 2022 11:15:47 -0400 Subject: [PATCH 5/6] Same logic for workbnch config --- WorkbenchConfig.py | 5 +++++ i7Import/i7ImportUtilities.py | 9 +-------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py index 642b9930..c22f28be 100644 --- a/WorkbenchConfig.py +++ b/WorkbenchConfig.py @@ -204,6 +204,11 @@ def print_config(self): table.add_column("Parameter", justify="left") table.add_column("Value", justify="left") for key, value in self.config.items(): + if str(type(value)) == '': + new_value = '' + for k, v in value.items(): + new_value += f"{k}: {v}\n" + value = new_value table.add_row(key, str(value)) console = Console() console.print(table) diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 6f40d7de..e15a66a0 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -195,18 +195,11 @@ def print_config(self): table.add_column("Parameter", justify="left") table.add_column("Value", justify="left") for key, value in self.config.items(): - test = str(type(value)) - if test == '': + if str(type(value)) == '': new_value = '' for k, v in value.items(): new_value += f"{k}: {v}\n" value = new_value - if test == '': - new_value = '' - for candidate in value: - for k, v in candidate.items(): - new_value += f"{k}: {v}\n" - value = new_value table.add_row(key, str(value)) console = Console() console.print(table) From c5e696220dcae04ac919d5633852614ce38440d3 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Sat, 19 Mar 2022 11:37:12 -0300 Subject: [PATCH 6/6] Avoided duplicated File and media names --- workbench_utils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/workbench_utils.py b/workbench_utils.py index c88793d0..1b1a2c3d 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -2020,7 +2020,11 @@ def create_media(config, filename, file_fieldname, node_id, node_csv_row, media_ logging.warning(message) media_name = os.path.basename(filename) if config['use_nid_in_media_title']: - media_name = f"{node_id}-Original File" + if file_fieldname == 'file': + identifier = 'Original File' + else: + identifier = file_fieldname + media_name = f"{node_id}-{identifier}" if config['field_for_media_title']: media_name = node_csv_row[config['field_for_media_title']].replace(':', '_') media_json = { @@ -4175,7 +4179,11 @@ def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id=None downloaded_file_path = os.path.join(subdir, filename) if config['use_nid_in_media_title']: - filename = f"{node_id}-Original File" + if file_fieldname == 'file': + file_identifier = 'Original File' + else: + file_identifier = file_fieldname + filename = f"{node_id}-{file_identifier}" downloaded_file_path = os.path.join(subdir, filename) if extension == '':