From 33f72b19ec8c4b460c1685da5ecdb3c57d4c8d6f Mon Sep 17 00:00:00 2001 From: weijie <34210233+shiweijiezero@users.noreply.github.com> Date: Thu, 18 Apr 2024 12:02:02 +0800 Subject: [PATCH] fix Bug: KeyError: 'text' Corresponding to issue #296 (#300) * fix Bug: KeyError: 'text' File data_juice/config/config.py lines 418-429 did not consider the situation when arg: text_key was initialized to 'text', resulting in arg: text_key not being updated properly and always being initialized to the value of 'text' * Fix Bug: key_text do not update correctly * Update config.py Normalize Format --- data_juicer/config/config.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/data_juicer/config/config.py b/data_juicer/config/config.py index 05fbad14c..5fbf3494f 100644 --- a/data_juicer/config/config.py +++ b/data_juicer/config/config.py @@ -268,8 +268,8 @@ def init_configs(args=None): try: cfg = parser.parse_args(args=args) - cfg = update_op_process(cfg, parser) cfg = init_setup_from_cfg(cfg) + cfg = update_op_process(cfg, parser) # copy the config file into the work directory config_backup(cfg) @@ -422,11 +422,15 @@ def init_setup_from_cfg(cfg): 'audio_key': cfg.audio_key, 'video_key': cfg.video_key, } - elif args['text_key'] is None: - args['text_key'] = text_key - args['image_key'] = cfg.image_key - args['audio_key'] = cfg.audio_key - args['video_key'] = cfg.video_key + else: + if 'text_key' not in args or args['text_key'] is None: + args['text_key'] = text_key + if 'image_key' not in args or args['image_key'] is None: + args['image_key'] = cfg.image_key + if 'audio_key' not in args or args['audio_key'] is None: + args['audio_key'] = cfg.audio_key + if 'video_key' not in args or args['video_key'] is None: + args['video_key'] = cfg.video_key op[op_name] = args return cfg