Skip to content

Commit

Permalink
fix param definition (#424)
Browse files Browse the repository at this point in the history
* fix param def

* add param check
  • Loading branch information
Cathy0908 authored Sep 11, 2024
1 parent da1acc9 commit 8ab74a3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/extract_qa_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class ExtractQAMapper(Mapper):

def __init__(self,
hf_model: str = 'alibaba-pai/pai-qwen1_5-7b-doc2qa',
trust_remote_code=False,
trust_remote_code: bool = False,
pattern: str = None,
qa_format: str = 'chatml',
enable_vllm: bool = True,
Expand Down
10 changes: 7 additions & 3 deletions data_juicer/ops/mapper/generate_instruction_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ class GenerateInstructionMapper(Mapper):
_accelerator = 'cuda'

def __init__(self,
hf_model,
seed_file,
instruct_num,
hf_model: str = 'Qwen/Qwen-7B-Chat',
seed_file: str = None,
instruct_num: int = 3,
trust_remote_code: bool = False,
similarity_threshold: float = 0.7,
prompt_template: str = None,
Expand Down Expand Up @@ -111,6 +111,10 @@ def __init__(self,
super().__init__(*args, **kwargs)
self.num_proc = 1

if not seed_file:
raise ValueError('Please provide `seed_file` parameter, a file in chatml format. '\
'Reference data: data-juicer/demos/data/demo-dataset-chatml.jsonl ')

self.instruct_num = instruct_num
self.similarity_threshold = similarity_threshold
self.similarity_type = 'rouge_l'
Expand Down

0 comments on commit 8ab74a3

Please sign in to comment.