From a45226dec1288749e48eca2d13893bfa786bd580 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 4 Jun 2024 15:51:03 -0500 Subject: [PATCH 1/4] Fix tests on windows/with new 8.3 --- tests/schema/test_ontology_util.py | 2 +- tests/scripts/test_script_util.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/schema/test_ontology_util.py b/tests/schema/test_ontology_util.py index bcdd2b1d..de6111c8 100644 --- a/tests/schema/test_ontology_util.py +++ b/tests/schema/test_ontology_util.py @@ -150,7 +150,7 @@ def test_update_dataframes_from_schema(self): try: updated_dataframes = update_dataframes_from_schema(schema_dataframes_new, schema) except HedFileError as e: - self.assertEqual(len(e.issues), 111) + self.assertEqual(len(e.issues), 115) breakHere = 3 diff --git a/tests/scripts/test_script_util.py b/tests/scripts/test_script_util.py index 694a61bf..d4270360 100644 --- a/tests/scripts/test_script_util.py +++ b/tests/scripts/test_script_util.py @@ -15,9 +15,9 @@ def test_regular_extension(self): def test_tsv_extension(self): """Test that .tsv extensions are handled differently.""" # Assuming the function correctly handles paths with directories - self.assertEqual(add_extension("path/to/filename", ".tsv"), "path/to/hedtsv/filename") + self.assertEqual(add_extension(os.path.normpath("path/to/filename"), ".tsv"), os.path.normpath("path/to/hedtsv/filename")) # Testing with a basename only - self.assertEqual(add_extension("filename", ".tsv"), "hedtsv/filename") + self.assertEqual(add_extension("filename", ".tsv"), os.path.normpath("hedtsv/filename")) def test_empty_extension(self): """Test adding an empty extension.""" @@ -33,7 +33,7 @@ class TestSortBaseSchemas(unittest.TestCase): def test_mixed_file_types(self): filenames = [ "test_schema.mediawiki", - "hedtsv/test_schema/test_schema_Tag.tsv", + os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), "other_schema.xml" ] expected = { @@ -45,9 +45,9 @@ def test_mixed_file_types(self): def test_tsv_in_correct_subfolder(self): filenames = [ - "hedtsv/test_schema/test_schema_Tag.tsv", - "hedtsv/test_schema/test_schema_Tag.tsv", - "hedtsv/wrong_folder/wrong_name_Tag.tsv" # Should be ignored + os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("hedtsv/wrong_folder/wrong_name_Tag.tsv") # Should be ignored ] expected = { "test_schema": {".tsv"} @@ -57,12 +57,12 @@ def test_tsv_in_correct_subfolder(self): def test_tsv_in_correct_subfolder2(self): filenames = [ - "prerelease/hedtsv/test_schema/test_schema_Tag.tsv", - "prerelease/hedtsv/test_schema/test_schema_Tag.tsv", - "prerelease/hedtsv/wrong_folder/wrong_name_Tag.tsv" # Should be ignored + os.path.normpath("prerelease/hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("prerelease/hedtsv/test_schema/test_schema_Tag.tsv"), + os.path.normpath("prerelease/hedtsv/wrong_folder/wrong_name_Tag.tsv") # Should be ignored ] expected = { - "prerelease/test_schema": {".tsv"} + os.path.normpath("prerelease/test_schema"): {".tsv"} } result = sort_base_schemas(filenames) self.assertEqual(dict(result), expected) @@ -70,7 +70,7 @@ def test_tsv_in_correct_subfolder2(self): def test_ignored_files(self): filenames = [ "test_schema.mediawiki", - "not_hedtsv/test_schema/test_schema_Tag.tsv" # Should be ignored + os.path.normpath("not_hedtsv/test_schema/test_schema_Tag.tsv") # Should be ignored ] expected = { "test_schema": {".mediawiki"} From 6ce29e28dbc5e1dc1f682bf75bb47edbf2343e5f Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 4 Jun 2024 18:26:20 -0500 Subject: [PATCH 2/4] First pass create_ontology script --- hed/scripts/convert_and_update_schema.py | 2 +- hed/scripts/create_ontology.py | 47 ++++++++++++++++++++++++ hed/scripts/script_util.py | 24 ++++++++++++ pyproject.toml | 1 + 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 hed/scripts/create_ontology.py diff --git a/hed/scripts/convert_and_update_schema.py b/hed/scripts/convert_and_update_schema.py index 4117985d..1883c499 100644 --- a/hed/scripts/convert_and_update_schema.py +++ b/hed/scripts/convert_and_update_schema.py @@ -72,7 +72,7 @@ def convert_and_update(filenames, set_ids): def main(): parser = argparse.ArgumentParser(description='Update other schema formats based on the changed one.') parser.add_argument('filenames', nargs='*', help='List of files to process') - parser.add_argument('--set-ids', action='store_true', help='Set IDs for each file') + parser.add_argument('--set-ids', action='store_true', help='Add missing hed ids') args = parser.parse_args() diff --git a/hed/scripts/create_ontology.py b/hed/scripts/create_ontology.py new file mode 100644 index 00000000..0725a691 --- /dev/null +++ b/hed/scripts/create_ontology.py @@ -0,0 +1,47 @@ +from hed.schema import load_schema_version +from hed.schema.schema_io.df2schema import load_dataframes +from hed.schema.schema_io.ontology_util import convert_df_to_omn +from hed.scripts.script_util import get_prerelease_path, get_schema_filename +import argparse +import os + + +def create_ontology(repo_path, schema_name, schema_version, dest): + final_source = get_prerelease_path(repo_path, schema_name, schema_version) + + dataframes = load_dataframes(final_source) + _, omn_dict = convert_df_to_omn(dataframes) + + base = get_schema_filename(schema_name, schema_version) + output_dest = os.path.join(dest, base) + os.makedirs(output_dest, exist_ok=True) + for suffix, omn_text in omn_dict.items(): + filename = os.path.join(output_dest, f"{base}_{suffix}.omn") + with open(filename, mode='w', encoding='utf-8') as opened_file: + opened_file.writelines(omn_text) + + return 0 + + +def main(): + parser = argparse.ArgumentParser(description='Convert a specified schema in the prerelease folder to an ontology.') + parser.add_argument('repo_path', help='The location of the hed-schemas directory') + parser.add_argument('schema_name', help='The name of the schema to convert("standard" for standard schema)') + parser.add_argument('schema_version', help='The location of the hed-schemas directory') + parser.add_argument('--dest', default=os.path.join("src", "ontology"), help='The base location to save to') + + args = parser.parse_args() + + repo_path = args.repo_path + schema_name = args.schema_name + schema_version = args.schema_version + dest = args.dest + + # Trigger a local cache hit (this ensures trying to load withStandard schemas will work properly) + _ = load_schema_version("8.2.0") + + return create_ontology(repo_path, schema_name, schema_version, dest) + + +if __name__ == "__main__": + exit(main()) diff --git a/hed/scripts/script_util.py b/hed/scripts/script_util.py index 441ec736..06be5af1 100644 --- a/hed/scripts/script_util.py +++ b/hed/scripts/script_util.py @@ -159,3 +159,27 @@ def validate_all_schemas(schema_files): all_issues += single_schema_issues return all_issues + + + +def get_schema_filename(schema_name, schema_version): + schema_name = schema_name.lower() + if schema_name == "standard" or schema_name == "": + return f"HED{schema_version}" + else: + return f"HED_{schema_name}_{schema_version}" + + +def get_prerelease_path(repo_path, schema_name, schema_version): + """Returns the location of the given pre-release schema in the repo""" + schema_name = schema_name.lower() + if schema_name == "" or schema_name == "standard": + base_path = "standard_schema" + else: + base_path = os.path.join("library_schemas", schema_name) + + base_path = os.path.join(repo_path, base_path, "prerelease") + + schema_filename = get_schema_filename(schema_name, schema_version) + + return os.path.join(base_path, "hedtsv", schema_filename) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 7634e7e5..b7e341ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ run_remodel_backup = "hed.tools.remodeling.cli.run_remodel_backup:main" run_remodel_restore = "hed.tools.remodeling.cli.run_remodel_restore:main" hed_validate_schemas = "hed.scripts.validate_schemas:main" hed_update_schemas = "hed.scripts.convert_and_update_schema:main" +hed_create_ontology = "hed.scripts.create_ontology:main" [tool.versioneer] VCS = "git" From d5d5f2663f34c29dc3f148ce9759e09fee37e87a Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 4 Jun 2024 18:36:44 -0500 Subject: [PATCH 3/4] Add debug print --- hed/scripts/create_ontology.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hed/scripts/create_ontology.py b/hed/scripts/create_ontology.py index 0725a691..2b4aa993 100644 --- a/hed/scripts/create_ontology.py +++ b/hed/scripts/create_ontology.py @@ -8,6 +8,7 @@ def create_ontology(repo_path, schema_name, schema_version, dest): final_source = get_prerelease_path(repo_path, schema_name, schema_version) + print(f"Creating ontology from {final_source}") dataframes = load_dataframes(final_source) _, omn_dict = convert_df_to_omn(dataframes) From 193696a43789dc787d1061f74a3fab61717024b6 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 4 Jun 2024 18:53:55 -0500 Subject: [PATCH 4/4] remove debug print --- hed/scripts/create_ontology.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hed/scripts/create_ontology.py b/hed/scripts/create_ontology.py index 2b4aa993..24878d33 100644 --- a/hed/scripts/create_ontology.py +++ b/hed/scripts/create_ontology.py @@ -8,7 +8,7 @@ def create_ontology(repo_path, schema_name, schema_version, dest): final_source = get_prerelease_path(repo_path, schema_name, schema_version) - print(f"Creating ontology from {final_source}") + # print(f"Creating ontology from {final_source}") dataframes = load_dataframes(final_source) _, omn_dict = convert_df_to_omn(dataframes)