Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Case insensitive changes #708

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions workbench
Original file line number Diff line number Diff line change
Expand Up @@ -2120,9 +2120,9 @@ if config['check'] is False and 'bootstrap' in config and len(config['bootstrap'
else:
logging.error(f"Bootstrap script {command} failed with exit code {str(return_code)}.")

if config['task'] != 'create_from_files' and config['input_csv'].startswith('http') is True:
if config['task'] != 'create_from_files' and config['input_csv'].lower().startswith('http') is True:
get_csv_from_google_sheet(config)
if config['task'] != 'create_from_files' and config['input_csv'].endswith('.xlsx') is True:
if config['task'] != 'create_from_files' and config['input_csv'].lower().endswith('.xlsx') is True:
get_csv_from_excel(config)

validate_input_dir(config)
Expand Down
56 changes: 28 additions & 28 deletions workbench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def set_media_type(config, filepath, file_fieldname, csv_row):
if oembed_media_type is not None:
return oembed_media_type

if filepath.strip().startswith('http'):
if filepath.strip().lower().startswith('http'):
preprocessed_file_path = get_preprocessed_file_path(config, file_fieldname, csv_row)
filename = preprocessed_file_path.split('/')[-1]
extension = filename.split('.')[-1]
Expand Down Expand Up @@ -185,7 +185,7 @@ def set_model_from_extension(file_name, config):
normalized_extension = extension.lower()
for model_tids in config['models']:
for tid, extensions in model_tids.items():
if str(tid).startswith('http'):
if str(tid).lower().startswith('http'):
tid = get_term_id_from_uri(config, tid)
if normalized_extension in extensions:
return tid
Expand Down Expand Up @@ -2214,7 +2214,7 @@ def check_input(config, args):
rows_with_missing_files.append(file_check_row[config['id_field']])
logging.warning(message)
# Check for URLs.
elif file_check_row['file'].startswith('http'):
elif file_check_row['file'].lower().startswith('http'):
http_response_code = ping_remote_file(config, file_check_row['file'])
if http_response_code != 200 or ping_remote_file(config, file_check_row['file']) is False:
message = 'Remote file "' + file_check_row['file'] + '" identified in CSV "file" column for record with ID "' \
Expand Down Expand Up @@ -2273,7 +2273,7 @@ def check_input(config, args):

# Check that each file's extension is allowed for the current media type. 'file' is the only
# CSV field to check here. Files added using the 'additional_files' setting are checked below.
if file_check_row['file'].startswith('http'):
if file_check_row['file'].lower().startswith('http'):
# First check to see if the file has an extension.
extension = os.path.splitext(file_check_row['file'])[1]
if len(extension) > 0:
Expand Down Expand Up @@ -2325,7 +2325,7 @@ def check_input(config, args):
else:
logging.warning(message)

if file_check_row[additional_file_field].startswith('http'):
if file_check_row[additional_file_field].lower().startswith('http'):
http_response_code = ping_remote_file(config, file_check_row[additional_file_field])
if http_response_code != 200 or ping_remote_file(config, file_check_row[additional_file_field]) is False:
message = 'Remote file ' + file_check_row[additional_file_field] + ' identified in CSV "' + additional_file_field + '" column for record with ID ' \
Expand Down Expand Up @@ -2372,7 +2372,7 @@ def check_input(config, args):
additional_filenames = file_check_row[additional_file_field].split(config['subdelimiter'])
media_type_file_field = config['media_type_file_fields'][media_type]
for additional_filename in additional_filenames:
if additional_filename.startswith('http'):
if additional_filename.lower().startswith('http'):
# First check to see if the file has an extension.
extension = os.path.splitext(additional_filename)[1]
if len(extension) > 0:
Expand Down Expand Up @@ -2967,9 +2967,9 @@ def validate_media_use_tid_in_additional_files_setting(config, media_use_tid_val
media_use_tids.append(media_use_tid_value)

for media_use_tid in media_use_tids:
if not value_is_numeric(media_use_tid) and media_use_tid.strip().startswith('http'):
if not value_is_numeric(media_use_tid) and media_use_tid.strip().lower().startswith('http'):
media_use_tid = get_term_id_from_uri(config, media_use_tid.strip())
if not value_is_numeric(media_use_tid) and not media_use_tid.strip().startswith('http'):
if not value_is_numeric(media_use_tid) and not media_use_tid.strip().lower().startswith('http'):
media_use_tid = find_term_in_vocab(config, 'islandora_media_use', media_use_tid.strip())

term_endpoint = config['host'] + '/taxonomy/term/' + str(media_use_tid).strip() + '?_format=json'
Expand Down Expand Up @@ -3016,7 +3016,7 @@ def validate_media_use_tid(config, media_use_tid_value_from_csv=None, csv_row_id

media_use_terms = str(media_use_tid_value).split(config['subdelimiter'])
for media_use_term in media_use_terms:
if value_is_numeric(media_use_term) is not True and media_use_term.strip().startswith('http'):
if value_is_numeric(media_use_term) is not True and media_use_term.strip().lower().startswith('http'):
media_use_tid = get_term_id_from_uri(config, media_use_term.strip())
if csv_row_id is None:
if media_use_tid is False:
Expand All @@ -3041,7 +3041,7 @@ def validate_media_use_tid(config, media_use_tid_value_from_csv=None, csv_row_id
"derivative media. You should temporarily disable the Context or Action that generates those derivatives."
logging.warning(message)

elif value_is_numeric(media_use_term) is not True and media_use_term.strip().startswith('http') is not True:
elif value_is_numeric(media_use_term) is not True and media_use_term.strip().lower().startswith('http') is not True:
media_use_tid = find_term_in_vocab(config, 'islandora_media_use', media_use_term.strip())
if csv_row_id is None:
if media_use_tid is False:
Expand Down Expand Up @@ -3235,7 +3235,7 @@ def create_file(config, filename, file_fieldname, node_csv_row, node_id):
is_remote = False
filename = filename.strip()

if filename.startswith('http'):
if filename.lower().startswith('http'):
remote_file_http_response_code = ping_remote_file(config, filename)
if remote_file_http_response_code != 200:
return False
Expand Down Expand Up @@ -3371,7 +3371,7 @@ def create_media(config, filename, file_fieldname, node_id, csv_row, media_use_t
else:
file_result = create_file(config, filename, file_fieldname, csv_row, node_id)

if filename.startswith('http'):
if filename.lower().startswith('http'):
if file_result > 0:
filename = get_preprocessed_file_path(config, file_fieldname, csv_row, node_id, False)

Expand All @@ -3389,9 +3389,9 @@ def create_media(config, filename, file_fieldname, node_id, csv_row, media_use_t
for media_use_term in media_use_terms:
if value_is_numeric(media_use_term):
media_use_tids.append(media_use_term)
if not value_is_numeric(media_use_term) and media_use_term.strip().startswith('http'):
if not value_is_numeric(media_use_term) and media_use_term.strip().lower().startswith('http'):
media_use_tids.append(get_term_id_from_uri(config, media_use_term))
if not value_is_numeric(media_use_term) and not media_use_term.strip().startswith('http'):
if not value_is_numeric(media_use_term) and not media_use_term.strip().lower().startswith('http'):
media_use_tids.append(find_term_in_vocab(config, 'islandora_media_use', media_use_term.strip()))

media_bundle_response_code = ping_media_bundle(config, media_type)
Expand Down Expand Up @@ -3882,12 +3882,12 @@ def get_csv_data(config, csv_file_target='node_fields', file_path=None):

if os.path.isabs(file_path):
input_csv_path = file_path
elif file_path.startswith('http') is True:
elif file_path.lower().startswith('http') is True:
input_csv_path = get_extracted_csv_file_path(config)
if os.path.exists(input_csv_path):
os.remove(input_csv_path)
get_csv_from_google_sheet(config)
elif file_path.endswith('.xlsx') is True:
elif file_path.lower().endswith('.xlsx') is True:
input_csv_path = get_extracted_csv_file_path(config)
if os.path.exists(input_csv_path):
os.remove(input_csv_path)
Expand Down Expand Up @@ -4580,7 +4580,7 @@ def prepare_term_id(config, vocab_ids, field_name, term):
return None
# Special case: if the term starts with 'http', assume it's a Linked Data URI
# and get its term ID from the URI.
elif term.startswith('http'):
elif term.lower().startswith('http'):
# Note: get_term_id_from_uri() will return False if the URI doesn't match a term.
tid_from_uri = get_term_id_from_uri(config, term)
if value_is_numeric(tid_from_uri):
Expand Down Expand Up @@ -4949,7 +4949,7 @@ def validate_media_track_fields(config, csv_data):

# Confirm that config['media_use_tid'] and row-level media_use_term is for Service File (http://pcdm.org/use#ServiceFile).
if 'media_use_tid' in row:
if row['media_use_tid'].startswith('http') and row['media_use_tid'] != 'http://pcdm.org/use#ServiceFile':
if row['media_use_tid'].lower().startswith('http') and row['media_use_tid'] != 'http://pcdm.org/use#ServiceFile':
message = f"{row['media_use_tid']} cannot be used as a \"media_use_tid\" value in your CSV when creating media tracks."
logging.error(message)
sys.exit('Error: ' + message)
Expand Down Expand Up @@ -5545,7 +5545,7 @@ def validate_taxonomy_reference_value(config, field_definitions, csv_field_name,
# If this is a multi-taxonomy field, all term names (not IDs or URIs) must be namespaced using the vocab_id:term_name pattern,
# regardless of whether config['allow_adding_terms'] is True. Also, we need to accommodate terms that are namespaced
# and also contain a ':'.
if len(this_fields_vocabularies) > 1 and value_is_numeric(field_value) is False and not field_value.startswith('http'):
if len(this_fields_vocabularies) > 1 and value_is_numeric(field_value) is False and not field_value.lower().startswith('http'):
split_field_values = field_value.split(config['subdelimiter'])
for split_field_value in split_field_values:
if ':' in field_value:
Expand Down Expand Up @@ -5583,7 +5583,7 @@ def validate_taxonomy_reference_value(config, field_definitions, csv_field_name,
logging.error(message + message_2)
sys.exit('Error: ' + message + message_2)
# Then check values that are URIs.
elif field_value.strip().startswith('http'):
elif field_value.strip().lower().startswith('http'):
field_value = field_value.strip()
tid_from_uri = get_term_id_from_uri(config, field_value)
if value_is_numeric(tid_from_uri):
Expand Down Expand Up @@ -5775,7 +5775,7 @@ def create_children_from_directory(config, parent_csv_record, parent_node_id):
# Add field_model if that field exists in the child's content type.
entity_fields = get_entity_fields(config, 'node', config['paged_content_page_content_type'])
if 'field_model' in entity_fields:
if not value_is_numeric(config['paged_content_page_model_tid'].strip()) and config['paged_content_page_model_tid'].strip().startswith('http'):
if not value_is_numeric(config['paged_content_page_model_tid'].strip()) and config['paged_content_page_model_tid'].strip().lower().startswith('http'):
paged_content_model_tid = get_term_id_from_uri(config, config['paged_content_page_model_tid'].strip())
else:
paged_content_model_tid = config['paged_content_page_model_tid'].strip()
Expand Down Expand Up @@ -6035,9 +6035,9 @@ def get_extracted_csv_file_path(config):
A file path with the current config file's unique ID appended to it.
False if config['input_csv'] is not a Google Sheet or Excel file.
"""
if config['input_csv'].startswith('http'):
if config['input_csv'].lower().startswith('http'):
exported_csv_filename = config['google_sheets_csv_filename']
elif config['input_csv'].endswith('xlsx'):
elif config['input_csv'].lower().endswith('xlsx'):
exported_csv_filename = config['excel_csv_filename']
else:
return False
Expand Down Expand Up @@ -6135,7 +6135,7 @@ def check_file_exists(config, filename):
True if the file exists, false if not.
"""
# It's a remote file.
if filename.startswith('http'):
if filename.lower().startswith('http'):
try:
head_response = requests.head(filename, allow_redirects=True, verify=config['secure_ssl_only'])
if head_response.status_code == 200:
Expand Down Expand Up @@ -6201,7 +6201,7 @@ def get_preprocessed_file_path(config, file_fieldname, node_csv_row, node_id=Non
return file_path_from_csv

# It's a remote file.
if file_path_from_csv.startswith('http'):
if file_path_from_csv.lower().startswith('http'):
if config['task'] == 'add_media':
subdir = os.path.join(config['temp_dir'], re.sub('[^A-Za-z0-9]+', '_', str(node_csv_row['node_id'])))
elif config['task'] == 'update_media':
Expand Down Expand Up @@ -6418,7 +6418,7 @@ def download_file_from_drupal(config, node_id):
logging.warning(f'Node {node_id} has no media.')
return False

if str(config['export_file_media_use_term_id']).startswith('http'):
if str(config['export_file_media_use_term_id']).lower().startswith('http'):
config['export_file_media_use_term_id'] = get_term_id_from_uri(config, config['export_file_media_use_term_id'])

if config['export_file_media_use_term_id'] is False:
Expand Down Expand Up @@ -6558,12 +6558,12 @@ def check_csv_file_exists(config, csv_file_target, file_path=None):
if os.path.isabs(config['input_csv']):
input_csv = config['input_csv']
# For Google Sheets, the "extraction" is fired over in workbench.
elif config['input_csv'].startswith('http'):
elif config['input_csv'].lower().startswith('http'):
input_csv = get_extracted_csv_file_path(config)
message = "Extracting CSV data from " + config['input_csv'] + " (worksheet gid " + str(config['google_sheets_gid']) + ") to " + input_csv + '.'
print(message)
logging.info(message)
elif config['input_csv'].endswith('xlsx'):
elif config['input_csv'].lower().endswith('xlsx'):
input_csv = get_extracted_csv_file_path(config)
message = "Extracting CSV data from " + config['input_csv'] + " to " + input_csv + '.'
print(message)
Expand Down