diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py index fa612cbd..a5f50035 100644 --- a/WorkbenchConfig.py +++ b/WorkbenchConfig.py @@ -26,6 +26,12 @@ def __init__(self, args): def get_config(self): config = self.get_default_config() user_mods = self.get_user_config() + # Allow extension additions and overrides. + if user_mods.get('mimetype_extensions'): + for candidate in user_mods.get('mimetype_extensions'): + for mimetype, extension in candidate.items(): + config['mimetype_extensions'][mimetype] = extension + del user_mods['mimetype_extensions'] # Blend defaults with user mods for key, value in user_mods.items(): config[key] = value @@ -89,6 +95,17 @@ def get_media_types(self): {'extracted_text': ['txt']} ] + # Returns default file extensions for mimetypes + def get_default_extensions(self): + return {'image/jpeg': '.jpg', + 'image/jp2': '.jp2', + 'image/png': '.png', + 'audio/mpeg': '.mp3', + 'audio/mp3': '.mp3', + 'text/plain': '.txt', + 'application/octet-stream': '.bin' + } + # Returns default configs, to be updated by user-supplied config. def get_default_config(self): return { @@ -145,6 +162,7 @@ def get_default_config(self): 'paged_content_sequence_separator': '-', 'media_bundle_file_fields': self.get_media_fields(), 'media_fields': self.get_media_fields(), + 'mimetype_extensions': self.get_default_extensions(), } # Tests validity and existence of path. @@ -188,6 +206,11 @@ def print_config(self): table.add_column("Parameter", justify="left") table.add_column("Value", justify="left") for key, value in self.config.items(): + if str(type(value)) == '': + new_value = '' + for k, v in value.items(): + new_value += f"{k}: {v}\n" + value = new_value table.add_row(key, str(value)) console = Console() console.print(table) diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 51d5f970..e15a66a0 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -144,9 +144,8 @@ def get_default_metadata_solr_request(self): model = self.config['content_model'] query = f'{query}&fq=RELS_EXT_hasModel_uri_s:"info:fedora/{model}"' if self.config['solr_filters']: - for filter in self.config['solr_filters']: - for key, value in filter.items(): - query = f'{query}&fq={key}:"{value}"' + for key, value in self.config['solr_filters'].items(): + query = f'{query}&fq={key}:"{value}"' # Get the populated CSV from Solr, with the object namespace and field list filters applied. return query @@ -196,6 +195,11 @@ def print_config(self): table.add_column("Parameter", justify="left") table.add_column("Value", justify="left") for key, value in self.config.items(): + if str(type(value)) == '': + new_value = '' + for k, v in value.items(): + new_value += f"{k}: {v}\n" + value = new_value table.add_row(key, str(value)) console = Console() console.print(table) diff --git a/workbench_utils.py b/workbench_utils.py index 24769616..1b1a2c3d 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -2003,7 +2003,7 @@ def create_media(config, filename, file_fieldname, node_id, node_csv_row, media_ media_type = set_media_type(config, filename, file_fieldname, node_csv_row) media_bundle_response_code = ping_media_bundle(config, media_type) if media_bundle_response_code == 404: - message = 'File "' + file_check_row[filename_field] + '" identified in CSV row ' + file_check_row[config['id_field']] + \ + message = 'File "' + node_csv_row[file_fieldname] + '" identified in CSV row ' + node_csv_row[config['id_field']] + \ ' will create a media of type (' + media_type + '), but that media type is not configured in the destination Drupal.' logging.error(message) return False @@ -2020,7 +2020,11 @@ def create_media(config, filename, file_fieldname, node_id, node_csv_row, media_ logging.warning(message) media_name = os.path.basename(filename) if config['use_nid_in_media_title']: - media_name = f"{node_id}-Original File" + if file_fieldname == 'file': + identifier = 'Original File' + else: + identifier = file_fieldname + media_name = f"{node_id}-{identifier}" if config['field_for_media_title']: media_name = node_csv_row[config['field_for_media_title']].replace(':', '_') media_json = { @@ -4032,21 +4036,19 @@ def get_csv_from_excel(config): csv_writer_file_handle.close() -def get_extension_from_mimetype(mimetype): +def get_extension_from_mimetype(config, mimetype): # mimetypes.add_type() is not working, e.g. mimetypes.add_type('image/jpeg', '.jpg') # Maybe related to https://bugs.python.org/issue4963? In the meantime, provide our own # MIMETYPE to extension mapping for common types, then let mimetypes guess at others. - map = {'image/jpeg': '.jpg', - 'image/jp2': '.jp2', - 'image/png': '.png', - 'audio/mpeg': '.mp3', - 'text/plain': '.txt', - 'application/octet-stream': '.bin' - } + map = config['mimetype_extensions'] if mimetype in map: return map[mimetype] else: - return mimetypes.guess_extension(mimetype) + extension = mimetypes.guess_extension(mimetype) + if (extension): + return mimetypes.guess_extension(mimetype) + else: + print(f"There is no mapping available for {mimetype}") def get_deduped_file_path(path): @@ -4121,7 +4123,7 @@ def check_file_exists(config, filename): return True -def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id = None): +def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id=None): """For remote/downloaded files, generates the path to the local temporary copy and returns that path. For local files, just returns the value of node_csv_row['file']. @@ -4177,7 +4179,11 @@ def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id = No downloaded_file_path = os.path.join(subdir, filename) if config['use_nid_in_media_title']: - filename = f"{node_id}-Original File" + if file_fieldname == 'file': + file_identifier = 'Original File' + else: + file_identifier = file_fieldname + filename = f"{node_id}-{file_identifier}" downloaded_file_path = os.path.join(subdir, filename) if extension == '': @@ -4193,7 +4199,7 @@ def get_prepocessed_file_path(config, file_fieldname, node_csv_row, node_id = No except KeyError: mimetype = 'application/octet-stream' - extension_with_dot = get_extension_from_mimetype(mimetype) + extension_with_dot = get_extension_from_mimetype(config, mimetype) downloaded_file_path = os.path.join(subdir, filename + extension_with_dot) # Check to see if a file with this path already exists; if so, insert an