diff --git a/backend/src/impl/db_utils/system_db_utils.py b/backend/src/impl/db_utils/system_db_utils.py index 56acd7c5..e5cd67ef 100644 --- a/backend/src/impl/db_utils/system_db_utils.py +++ b/backend/src/impl/db_utils/system_db_utils.py @@ -229,7 +229,7 @@ def process(): } return processor.get_overall_statistics( - metadata=processor_metadata, sys_output=system_output_data + metadata=processor_metadata, sys_output=system_output_data.samples ) try: @@ -240,10 +240,6 @@ def process(): for metric_stat in overall_statistics.metric_stats ] - # TODO(chihhao) needs proper serializiation & deserializiation in SDK - overall_statistics.sys_info.tokenizer = ( - overall_statistics.sys_info.tokenizer.json_repr() - ) # TODO avoid None as nullable seems undeclarable for array and object # in openapi.yaml if overall_statistics.sys_info.results.calibration is None: @@ -271,7 +267,7 @@ def db_operations(session: ClientSession) -> str: ) DBUtils.drop(output_collection) DBUtils.insert_many( - output_collection, list(system_output_data), False, session + output_collection, list(system_output_data.samples), False, session ) return system_db_id diff --git a/backend/src/impl/default_controllers_impl.py b/backend/src/impl/default_controllers_impl.py index 6a0f5901..34584eb7 100644 --- a/backend/src/impl/default_controllers_impl.py +++ b/backend/src/impl/default_controllers_impl.py @@ -17,7 +17,6 @@ from explainaboard.loaders.loader_registry import get_supported_file_types_for_loader from explainaboard.metric import MetricStats from explainaboard.processors.processor_registry import get_metric_list_for_processor -from explainaboard.utils import tokenizer as exb_tokenzier from explainaboard_web.impl.auth import get_user from explainaboard_web.impl.db_utils.dataset_db_utils import DatasetDBUtils from explainaboard_web.impl.db_utils.system_db_utils import SystemDBUtils @@ -272,10 +271,6 @@ def systems_analyses_post(body: SystemsAnalysesBody): ] system_output_info.metric_configs = metric_configs - cls_name = system_output_info.tokenizer.pop("cls_name") - system_output_info.tokenizer = getattr(exb_tokenzier, cls_name)( - **system_output_info.tokenizer - ) processor = get_processor(TaskType(system_output_info.task_name)) metric_stats = [MetricStats(stat) for stat in system.metric_stats] diff --git a/backend/templates/requirements.mustache b/backend/templates/requirements.mustache index ce0be5dc..ed064355 100644 --- a/backend/templates/requirements.mustache +++ b/backend/templates/requirements.mustache @@ -7,7 +7,7 @@ Flask-PyMongo swagger-ui-bundle >= 0.0.9 python-dotenv pyjwt[crypto] == 2.3.0 -explainaboard == 0.8.15 +explainaboard == 0.9.1 en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl pre-commit marisa_trie diff --git a/frontend/src/components/SystemSubmitDrawer/index.tsx b/frontend/src/components/SystemSubmitDrawer/index.tsx index 7118a5f8..ab88e68a 100644 --- a/frontend/src/components/SystemSubmitDrawer/index.tsx +++ b/frontend/src/components/SystemSubmitDrawer/index.tsx @@ -100,7 +100,8 @@ export function SystemSubmitDrawer(props: Props) { task, dataset, metric_names, - language, + source_language, + target_language, sys_out_file, custom_dataset_file, code, @@ -120,7 +121,8 @@ export function SystemSubmitDrawer(props: Props) { model_name: name, paper_info: {}, task: task, - language, + source_language, + target_language, code, is_private, }, @@ -143,7 +145,8 @@ export function SystemSubmitDrawer(props: Props) { model_name: name, paper_info: {}, task: task, - language, + source_language, + target_language, code, is_private, }, @@ -163,7 +166,8 @@ export function SystemSubmitDrawer(props: Props) { "sys_out_file", "custom_dataset_file", "metric_names", - "language", + "source_language", + "target_language", "code", ]); } catch (e) { @@ -361,14 +365,22 @@ export function SystemSubmitDrawer(props: Props) { + + @@ -385,7 +397,8 @@ interface FormData { custom_dataset_file: DataFileValue; metric_names: string[]; - language: string; + source_language: string; + target_language: string; code: string; is_private: boolean; } diff --git a/frontend/src/components/SystemsTable/SystemTableContent.tsx b/frontend/src/components/SystemsTable/SystemTableContent.tsx index 185a53bc..d031c7eb 100644 --- a/frontend/src/components/SystemsTable/SystemTableContent.tsx +++ b/frontend/src/components/SystemsTable/SystemTableContent.tsx @@ -121,18 +121,28 @@ export function SystemTableContent({ ), }, { - dataIndex: ["system_info", "language"], + dataIndex: ["system_info", "split"], width: 110, - title: "Dataset split", + title: "Dataset Split", fixed: "left", align: "center", render: (_, record) => record.system_info.dataset_split || "unspecified", }, { - dataIndex: ["system_info", "language"], + dataIndex: ["system_info", "source_language"], width: 100, - title: "Language", + title: "Input Lang", align: "center", + render: (_, record) => + record.system_info.source_language || "unspecified", + }, + { + dataIndex: ["system_info", "target_language"], + width: 100, + title: "Output Lang", + align: "center", + render: (_, record) => + record.system_info.target_language || "unspecified", }, ...metricColumns, { diff --git a/openapi/openapi.yaml b/openapi/openapi.yaml index faabc253..a9098534 100644 --- a/openapi/openapi.yaml +++ b/openapi/openapi.yaml @@ -468,7 +468,10 @@ components: dataset_split: type: string nullable: true - language: + source_language: + type: string + example: en + target_language: type: string example: en reload_stat: @@ -487,14 +490,23 @@ components: properties: name: type: string - language: + source_language: + type: string + nullable: True + target_language: type: string nullable: True cls_name: type: string additionalProperties: true required: [name, cls_name] - tokenizer: + source_tokenizer: + type: object + properties: + cls_name: + type: string + example: "SingleSpaceTokenizer" + target_tokenizer: type: object properties: cls_name: @@ -523,11 +535,13 @@ components: [ task_name, model_name, - language, + source_language, + target_language, reload_stat, is_print_case, conf_value, - tokenizer, + source_tokenizer, + target_tokenizer, features, results, metric_configs, @@ -658,7 +672,10 @@ components: items: type: string example: [Accuracy] - language: + source_language: + type: string + example: en + target_language: type: string example: en code: