diff --git a/uniflow/flow/config.py b/uniflow/flow/config.py index cbc62f0e..0f1a89b5 100644 --- a/uniflow/flow/config.py +++ b/uniflow/flow/config.py @@ -38,7 +38,7 @@ class ExtractConfig: class ExtractTxtConfig(ExtractConfig): """Extract Txt Config Class.""" - flow_name: str = "ExtractTxtFlow" + flow_name: str = """ExtractTxtFlow""" @dataclass @@ -52,7 +52,7 @@ class ExtractS3TxtConfig(ExtractConfig): class ExtractPDFConfig(ExtractConfig): """Nougat Config Class.""" - flow_name: str = "ExtractPDFFlow" + flow_name: str = """ExtractPDFFlow""" model_config: ModelConfig = field(default_factory=NougatModelConfig) splitter: str = PARAGRAPH_SPLITTER @@ -61,7 +61,7 @@ class ExtractPDFConfig(ExtractConfig): class ExtractImageConfig(ExtractConfig): """Extract Image Config Class""" - flow_name: str = "ExtractImageFlow" + flow_name: str = """ExtractImageFlow""" model_config: ModelConfig = field(default_factory=LayoutModelConfig()) splitter: str = PARAGRAPH_SPLITTER @@ -70,7 +70,7 @@ class ExtractImageConfig(ExtractConfig): class ExtractMarkdownConfig(ExtractConfig): """Extract Markdown Config Class.""" - flow_name: str = "ExtractMarkdownFlow" + flow_name: str = """ExtractMarkdownFlow""" splitter: str = MARKDOWN_HEADER_SPLITTER @@ -78,7 +78,7 @@ class ExtractMarkdownConfig(ExtractConfig): class ExtractIpynbConfig(ExtractConfig): """Extract ipynb Config Class.""" - flow_name: str = "ExtractIpynbFlow" + flow_name: str = """ExtractIpynbFlow""" @dataclass @@ -103,20 +103,18 @@ class TransformConfig: num_thread: int = 1 prompt_template: PromptTemplate = field( default_factory=lambda: PromptTemplate( - instruction=""" - Generate one question and its corresponding answer based on the last context in the last - example. Follow the format of the examples below to include context, question, and answer in the response. - """, + instruction="""Generate one question and its corresponding answer based on the last context in the last \ +example. Follow the format of the examples below to include context, question, and answer in the response.""", few_shot_prompt=[ Context( - context="The quick brown fox jumps over the lazy black dog.", - question="What is the color of the fox?", - answer="brown.", + context="""The quick brown fox jumps over the lazy black dog.""", + question="""What is the color of the fox?""", + answer="""brown.""", ), Context( - context="The quick brown fox jumps over the lazy black dog.", - question="What is the color of the dog?", - answer="black.", + context="""The quick brown fox jumps over the lazy black dog.""", + question="""What is the color of the dog?""", + answer="""black.""", ), ], ) @@ -127,7 +125,7 @@ class TransformConfig: class TransformOpenAIConfig(TransformConfig): """Transform OpenAI Config Class.""" - flow_name: str = "TransformOpenAIFlow" + flow_name: str = """TransformOpenAIFlow""" model_config: ModelConfig = field(default_factory=OpenAIModelConfig) @@ -135,7 +133,7 @@ class TransformOpenAIConfig(TransformConfig): class TransformHuggingFaceConfig(TransformConfig): """Transform Hugging Face Config Class.""" - flow_name: str = "TransformHuggingFaceFlow" + flow_name: str = """TransformHuggingFaceFlow""" model_config: ModelConfig = field(default_factory=HuggingfaceModelConfig) @@ -143,7 +141,7 @@ class TransformHuggingFaceConfig(TransformConfig): class TransformQAHuggingFaceConfig(TransformConfig): """Transform Hugging Face Config Class for raw response format.""" - flow_name: str = "TransformHuggingFaceFlow" + flow_name: str = """TransformHuggingFaceFlow""" model_config: ModelConfig = field( default_factory=lambda: HuggingfaceModelConfig( response_start_key="question", response_format={"type": "text"} @@ -152,20 +150,18 @@ class TransformQAHuggingFaceConfig(TransformConfig): num_thread: int = 1 prompt_template: PromptTemplate = field( default_factory=lambda: PromptTemplate( - instruction=""" - Generate one question and its corresponding answer based on the last context in the last - example. Follow the format of the examples below to include context, question, and answer in the response. - """, + instruction="""Generate one question and its corresponding answer based on the last context in the last \ +example. Follow the format of the examples below to include context, question, and answer in the response.""", few_shot_prompt=[ Context( - context="The quick brown fox jumps over the lazy black dog.", - question="What is the color of the fox?", - answer="brown.", + context="""The quick brown fox jumps over the lazy black dog.""", + question="""What is the color of the fox?""", + answer="""brown.""", ), Context( - context="The quick brown fox jumps over the lazy black dog.", - question="What is the color of the dog?", - answer="black.", + context="""The quick brown fox jumps over the lazy black dog.""", + question="""What is the color of the dog?""", + answer="""black.""", ), ], ) @@ -176,7 +172,7 @@ class TransformQAHuggingFaceConfig(TransformConfig): class TransformQAHuggingFaceJsonFormatConfig(TransformConfig): """Transform Hugging Face QA Config Class for Json response format.""" - flow_name: str = "TransformHuggingFaceFlow" + flow_name: str = """TransformHuggingFaceFlow""" # model will start generating response starting from # question, so the response start key is question. # this is very important for the model to generate valid json response. @@ -188,20 +184,18 @@ class TransformQAHuggingFaceJsonFormatConfig(TransformConfig): num_thread: int = 1 prompt_template: PromptTemplate = field( default_factory=lambda: PromptTemplate( - instruction=""" - Generate one question and its corresponding answer based on the last context in the last - example. Follow the format of the examples below to include context, question, and answer in the response. - """, + instruction="""Generate one question and its corresponding answer based on the last context in the last \ +example. Follow the format of the examples below to include context, question, and answer in the response.""", few_shot_prompt=[ Context( - context="The quick brown fox jumps over the lazy black dog.", - question="What is the color of the fox?", - answer="brown.", + context="""The quick brown fox jumps over the lazy black dog.""", + question="""What is the color of the fox?""", + answer="""brown.""", ), Context( - context="The quick brown fox jumps over the lazy black dog.", - question="What is the color of the dog?", - answer="black.", + context="""The quick brown fox jumps over the lazy black dog.""", + question="""What is the color of the dog?""", + answer="""black.""", ), ], ) @@ -212,9 +206,9 @@ class TransformQAHuggingFaceJsonFormatConfig(TransformConfig): class TransformLMQGConfig(TransformConfig): """Transform LMQG Config Class.""" - flow_name: str = "TransformLMQGFlow" + flow_name: str = """TransformLMQGFlow""" prompt_template: PromptTemplate = field( - default_factory=lambda: PromptTemplate(instruction="", few_shot_prompt=[]) + default_factory=lambda: PromptTemplate(instruction="""""", few_shot_prompt=[]) ) model_config: ModelConfig = field(default_factory=LMQGModelConfig) @@ -223,16 +217,16 @@ class TransformLMQGConfig(TransformConfig): class TransformCopyConfig(TransformConfig): """Transform Linear Config Class.""" - flow_name: str = "TransformCopyFlow" + flow_name: str = """TransformCopyFlow""" prompt_template: PromptTemplate = field( - default_factory=lambda: PromptTemplate(instruction="", few_shot_prompt=[]) + default_factory=lambda: PromptTemplate(instruction="""""", few_shot_prompt=[]) ) model_config: ModelConfig = field(default_factory=lambda: {}) @dataclass class TransformForGenerationOpenAIGPT3p5Config(TransformConfig): - flow_name: str = "TransformOpenAIFlow" + flow_name: str = """TransformOpenAIFlow""" model_config: ModelConfig = field( default_factory=lambda: OpenAIModelConfig( model_name="gpt-3.5-turbo-1106", @@ -244,16 +238,14 @@ class TransformForGenerationOpenAIGPT3p5Config(TransformConfig): ) prompt_template: PromptTemplate = field( default_factory=lambda: PromptTemplate( - instruction=""" - Your role is to explore the forefront of technological developments. Examine the text for mentions of state-of-the-art technology applications, innovative methods, or emerging areas of innovation. If present, list each technology by name in a string format. If none are mentioned, return an empty list. Ensure the response is always in a list format. - """, + instruction="""Your role is to explore the forefront of technological developments. Examine the text for mentions of state-of-the-art technology applications, innovative methods, or emerging areas of innovation. If present, list each technology by name in a string format. If none are mentioned, return an empty list. Ensure the response is always in a list format.""", few_shot_prompt=[ Context( - context="Our new business wins are supported by our product leadership strategy of bringing new product to market that provides value for our customers, such as market-leading 500 bar GDi technology, helping customers improve efficiency, reduce emissions and lower costs leveraging our GDi technology and capital to provide a value-focused solution for our off-highway diesel applications and hydrogen ICE that differentiates us from our competition. We're helping our customers move towards carbon neutral and carbon-free fuels with solutions using ethanol, biofuels and hydrogen, as it's our view that a liquefied or gaseous fuel is going to be a key element of our journey to carbon neutrality.", - answer=["500 bar GDi technology", "carbon neutral"], + context="""Our new business wins are supported by our product leadership strategy of bringing new product to market that provides value for our customers, such as market-leading 500 bar GDi technology, helping customers improve efficiency, reduce emissions and lower costs leveraging our GDi technology and capital to provide a value-focused solution for our off-highway diesel applications and hydrogen ICE that differentiates us from our competition. We're helping our customers move towards carbon neutral and carbon-free fuels with solutions using ethanol, biofuels and hydrogen, as it's our view that a liquefied or gaseous fuel is going to be a key element of our journey to carbon neutrality.""", + answer=["""500 bar GDi technology""", """carbon neutral"""], ), Context( - context="The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.", + context="""The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.""", answer=[], ), ], @@ -263,7 +255,7 @@ class TransformForGenerationOpenAIGPT3p5Config(TransformConfig): @dataclass class TransformForClusteringOpenAIGPT4Config: - flow_name: str = "TransformOpenAIFlow" + flow_name: str = """TransformOpenAIFlow""" model_config: ModelConfig = field( default_factory=lambda: OpenAIModelConfig( model_name="gpt-4-1106-preview", @@ -275,53 +267,59 @@ class TransformForClusteringOpenAIGPT4Config: ) prompt_template: PromptTemplate = field( default_factory=lambda: PromptTemplate( - instruction=""" - Your task as a technology expert is to categorize a list of tech terms. First, cluster these terms into distinct groups based on their semantic similarities, where each group encapsulates a specific technological concept. Second, within these clusters, identify and merge terms that are essentially synonymous. Your final output should be a well-structured dictionary, where each key signifies a unique category of technology, and its corresponding value is a list of technology terms. - """, + instruction="""Your task as a technology expert is to categorize a list of tech terms. First, cluster these terms into distinct groups based on their semantic similarities, where each group encapsulates a specific technological concept. Second, within these clusters, identify and merge terms that are essentially synonymous. Your final output should be a well-structured dictionary, where each key signifies a unique category of technology, and its corresponding value is a list of technology terms.""", few_shot_prompt=[ Context( context=[ - "artificial intelligence", - "AI", - "500 bar GDi technology", - "ML", - "500 bar GDi", - "machine learning", + """artificial intelligence""", + """AI""", + """500 bar GDi technology""", + """ML""", + """500 bar GDi""", + """machine learning""", ], answer={ - "500_BAR_GDI": ["500 bar GDi"], - "AIML": ["AI", "ML"], + """500_BAR_GDI""": ["""500 bar GDi"""], + """AIML""": ["""AI""", """ML"""], }, ), Context( context=[ - "cryptocurrency", - "blockchain", - "Bitcoin", - "Ethereum", - "digital currency", - "crypto mining", - "mRNA vaccine", - "gene editing", - "CRISPR", - "Ethereum platform", - "Ether", - "NFTs", - "DNA sequencing", - "bioinformatics", - "mRNA therapy", + """cryptocurrency""", + """blockchain""", + """Bitcoin""", + """Ethereum""", + """digital currency""", + """crypto mining""", + """mRNA vaccine""", + """gene editing""", + """CRISPR""", + """Ethereum platform""", + """Ether""", + """NFTs""", + """DNA sequencing""", + """bioinformatics""", + """mRNA therapy""", ], answer={ - "BIO_TECH": [ - "mRNA vaccine", - "gene editing", - "CRISPR", - "DNA sequencing", - "bioinformatics", - "mRNA therapy", + """BIO_TECH""": [ + """mRNA vaccine""", + """gene editing""", + """CRISPR""", + """DNA sequencing""", + """bioinformatics""", + """mRNA therapy""", + ], + """BLOCKCHAIN_TECH""": [ + """blockchain""", + """crypto mining""", + """NFTs""", + ], + """CRYPTOCURRENCY""": [ + """Bitcoin""", + """cryptocurrency""", + """Ethereum""", ], - "BLOCKCHAIN_TECH": ["blockchain", "crypto mining", "NFTs"], - "CRYPTOCURRENCY": ["Bitcoin", "cryptocurrency", "Ethereum"], }, ), ], @@ -336,7 +334,7 @@ class TransformForClusteringOpenAIGPT4Config: class RaterConfig: """Rater Config Class.""" - flow_name: str = "RaterFlow" + flow_name: str = """RaterFlow""" model_config: ModelConfig = field(default_factory=ModelConfig) label2score: Dict[str, float] = field(default_factory=dict) prompt_template: PromptTemplate = field(default_factory=PromptTemplate) @@ -356,11 +354,11 @@ def __post_init__(self): missing_labels = incompatible_labels["missing_labels"] if unexpected_labels: raise ValueError( - "Inconsistent labels found in prompt_template examples, " - f"example label {unexpected_labels} not in label2score has keys {list(self.label2score.keys())}", + """Inconsistent labels found in prompt_template examples, """ + f"""example label {unexpected_labels} not in label2score has keys {list(self.label2score.keys())}""", ) if missing_labels: - print(f"The label2score label {missing_labels} not in example label.") + print(f"""The label2score label {missing_labels} not in example label.""") # batch_size must be divisible by num_return_sequences for HuggingfaceModelConfig only # This might need to be extended to other model configs in the future. if isinstance(self.model_config, HuggingfaceModelConfig): @@ -369,8 +367,8 @@ def __post_init__(self): != 0 # noqa E501 ): raise ValueError( - f"batch_size {self.model_config.batch_size} must be divisible by" - f"num_return_sequences {self.model_config.num_return_sequences}" + f"""batch_size {self.model_config.batch_size} must be divisible by""" + f"""num_return_sequences {self.model_config.num_return_sequences}""" ) def check_labels(self) -> Dict[str, list]: @@ -428,26 +426,24 @@ class RaterForClassificationOpenAIGPT4Config(RaterConfig): ) prompt_template: PromptTemplate = field( default_factory=lambda: PromptTemplate( - instruction=""" - Evaluate the appropriateness of a given answer based on the question and the context. - There are few examples below, consisting of context, question, answer, explanation and label. - If answer is appropriate, you should give a label representing higher score and vise versa. Check label to score dictionary: {label2score}. - Your response should only focus on the unlabeled sample, including two fields: explanation and label (one of {label_list}). - """, + instruction="""Evaluate the appropriateness of a given answer based on the question and the context. \ +There are few examples below, consisting of context, question, answer, explanation and label. \ +If answer is appropriate, you should give a label representing higher score and vise versa. Check label to score dictionary: {label2score}. \ +Your response should only focus on the unlabeled sample, including two fields: explanation and label (one of {label_list}).""", few_shot_prompt=[ Context( - context="The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.", - question="When was the Eiffel Tower constructed?", - answer="The Eiffel Tower was constructed in 1889.", - explanation="The context explicitly mentions that the Eiffel Tower was constructed in 1889, so the answer is correct.", - label="Yes", + context="""The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.""", + question="""When was the Eiffel Tower constructed?""", + answer="""The Eiffel Tower was constructed in 1889.""", + explanation="""The context explicitly mentions that the Eiffel Tower was constructed in 1889, so the answer is correct.""", + label="""Yes""", ), Context( - context="Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.", - question="Where does photosynthesis primarily occur in plant cells?", - answer="Photosynthesis primarily occurs in the mitochondria of plant cells.", - explanation="The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells, so the answer is incorrect.", - label="No", + context="""Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.""", + question="""Where does photosynthesis primarily occur in plant cells?""", + answer="""Photosynthesis primarily occurs in the mitochondria of plant cells.""", + explanation="""The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells, so the answer is incorrect.""", + label="""No""", ), ], ) @@ -498,18 +494,18 @@ class RaterForClassificationOpenAIGPT3p5Config(RaterConfig): """, few_shot_prompt=[ Context( - context="The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.", - question="When was the Eiffel Tower constructed?", - answer="The Eiffel Tower was constructed in 1889.", - explanation="The context explicitly mentions that the Eiffel Tower was constructed in 1889, so the answer is correct.", - label="Yes", + context="""The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.""", + question="""When was the Eiffel Tower constructed?""", + answer="""The Eiffel Tower was constructed in 1889.""", + explanation="""The context explicitly mentions that the Eiffel Tower was constructed in 1889, so the answer is correct.""", + label="""Yes""", ), Context( - context="Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.", - question="Where does photosynthesis primarily occur in plant cells?", - answer="Photosynthesis primarily occurs in the mitochondria of plant cells.", - explanation="The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells, so the answer is incorrect.", - label="No", + context="""Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.""", + question="""Where does photosynthesis primarily occur in plant cells?""", + answer="""Photosynthesis primarily occurs in the mitochondria of plant cells.""", + explanation="""The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells, so the answer is incorrect.""", + label="""No""", ), ], ) @@ -547,18 +543,18 @@ class RaterForClassificationBedrockClaudeConfig(RaterConfig): """, few_shot_prompt=[ Context( - context="The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.", - question="When was the Eiffel Tower constructed?", - answer="The Eiffel Tower was constructed in 1889.", - explanation="The context explicitly mentions that the Eiffel Tower was constructed in 1889, so the answer is correct.", - label="Yes", + context="""The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.""", + question="""When was the Eiffel Tower constructed?""", + answer="""The Eiffel Tower was constructed in 1889.""", + explanation="""The context explicitly mentions that the Eiffel Tower was constructed in 1889, so the answer is correct.""", + label="""Yes""", ), Context( - context="Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.", - question="Where does photosynthesis primarily occur in plant cells?", - answer="Photosynthesis primarily occurs in the mitochondria of plant cells.", - explanation="The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells, so the answer is incorrect.", - label="No", + context="""Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.""", + question="""Where does photosynthesis primarily occur in plant cells?""", + answer="""Photosynthesis primarily occurs in the mitochondria of plant cells.""", + explanation="""The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells, so the answer is incorrect.""", + label="""No""", ), ], ) @@ -581,7 +577,7 @@ class RaterForClassificationSageMakerEndpointConfig(RaterConfig): answer, label, and explanation for each case. """ - flow_name: str = "RaterFlow" + flow_name: str = """RaterFlow""" model_config: ModelConfig = field(default_factory=SageMakerModelConfig) label2score: Dict[str, float] = field( default_factory=lambda: {"Yes": 1.0, "No": 0.0} @@ -595,20 +591,20 @@ class RaterForClassificationSageMakerEndpointConfig(RaterConfig): Context( context="""The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.""", - question="When was the Eiffel Tower constructed?", - answer="The Eiffel Tower was constructed in 1889.", + question="""When was the Eiffel Tower constructed?""", + answer="""The Eiffel Tower was constructed in 1889.""", explanation="""The context explicitly mentions that the Eiffel Tower was constructed in 1889, so the answer is correct.""", - label="Yes", + label="""Yes""", ), Context( context="""Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.""", - question="Where does photosynthesis primarily occur in plant cells?", - answer="Photosynthesis primarily occurs in the mitochondria of plant cells.", + question="""Where does photosynthesis primarily occur in plant cells?""", + answer="""Photosynthesis primarily occurs in the mitochondria of plant cells.""", explanation="""The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells, so the answer is incorrect.""", - label="No", + label="""No""", ), ], ) @@ -634,7 +630,7 @@ class RaterForClassificationHuggingfaceConfig(RaterConfig): model_config: ModelConfig = field( default_factory=HuggingfaceModelConfig( - response_start_key="explanation", + response_start_key="""explanation""", response_format={"type": "text"}, batch_size=1, ) @@ -648,18 +644,18 @@ class RaterForClassificationHuggingfaceConfig(RaterConfig): Follow the format of the examples below, consisting of context, question, answer, explanation and label (you must choose one from {label_list}).""", few_shot_prompt=[ Context( - context="The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.", - question="When was the Eiffel Tower constructed?", - answer="The Eiffel Tower was constructed in 1889.", - explanation="The answer is consistency to the fact that Eiffel Tower was constructed in 1889 mentioned in context, so the answer is correct.", - label="Yes", + context="""The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 and stands at a height of 324 meters.""", + question="""When was the Eiffel Tower constructed?""", + answer="""The Eiffel Tower was constructed in 1889.""", + explanation="""The answer is consistency to the fact that Eiffel Tower was constructed in 1889 mentioned in context, so the answer is correct.""", + label="""Yes""", ), Context( - context="Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.", - question="Where does photosynthesis primarily occur in plant cells?", - answer="Photosynthesis primarily occurs in the mitochondria of plant cells.", - explanation="The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells but not mitochondria indicated by answer, so the answer is incorrect.", - label="No", + context="""Photosynthesis is a process used by plants to convert light energy into chemical energy. This process primarily occurs in the chloroplasts of plant cells.""", + question="""Where does photosynthesis primarily occur in plant cells?""", + answer="""Photosynthesis primarily occurs in the mitochondria of plant cells.""", + explanation="""The context mentions that photosynthesis primarily occurs in the chloroplasts of plant cells but not mitochondria indicated by answer, so the answer is incorrect.""", + label="""No""", ), ], ) @@ -710,11 +706,11 @@ class RaterForGeneratedAnswerOpenAIGPT4Config(RaterConfig): """, few_shot_prompt=[ Context( - context="Early computers were built to perform a series of single tasks, like a calculator.", - question="Did early computers function like modern calculators?", - grounding_answer="No. Early computers were used primarily for complex calculating.", - generated_answer="Yes. Early computers were built to perform a series of single tasks, similar to a calculator.", - explanation="The generated answer is better because it correctly figures out early computers was used to perform single tasks akin to calculators while grounding answer not. So we accept generated answer.", + context="""Early computers were built to perform a series of single tasks, like a calculator.""", + question="""Did early computers function like modern calculators?""", + grounding_answer="""No. Early computers were used primarily for complex calculating.""", + generated_answer="""Yes. Early computers were built to perform a series of single tasks, similar to a calculator.""", + explanation="""The generated answer is better because it correctly figures out early computers was used to perform single tasks akin to calculators while grounding answer not. So we accept generated answer.""", label="accept", ), ], @@ -774,27 +770,27 @@ class RaterForGeneratedAnswerOpenAIGPT3p5Config(RaterConfig): """, few_shot_prompt=[ Context( - context="Early computers were built to perform a series of single tasks, like a calculator.", - question="Did early computers function like modern calculators?", - grounding_answer="No. Early computers were used primarily for complex calculating.", - generated_answer="Yes. Early computers were built to perform a series of single tasks, similar to a calculator.", - explanation="The generated answer is better because it correctly figures out early computers was used to perform single tasks akin to calculators.", + context="""Early computers were built to perform a series of single tasks, like a calculator.""", + question="""Did early computers function like modern calculators?""", + grounding_answer="""No. Early computers were used primarily for complex calculating.""", + generated_answer="""Yes. Early computers were built to perform a series of single tasks, similar to a calculator.""", + explanation="""The generated answer is better because it correctly figures out early computers was used to perform single tasks akin to calculators.""", label="accept", ), Context( - context="Operating systems(OS) did not exist in their modern and more complex forms until the early 1960s.", - question="When did operating systems start to resemble their modern forms?", - grounding_answer="Operating systems started to resemble their modern forms in the early 1960s.", - generated_answer="Modern and more complex forms of operating systems began to emerge in the early 1960s.", - explanation="The generated answer is as equally good as grounding answer because they both accurately pinpoint the early 1960s as the period when modern operating systems began to develop.", + context="""Operating systems(OS) did not exist in their modern and more complex forms until the early 1960s.""", + question="""When did operating systems start to resemble their modern forms?""", + grounding_answer="""Operating systems started to resemble their modern forms in the early 1960s.""", + generated_answer="""Modern and more complex forms of operating systems began to emerge in the early 1960s.""", + explanation="""The generated answer is as equally good as grounding answer because they both accurately pinpoint the early 1960s as the period when modern operating systems began to develop.""", label="equivalent", ), Context( - context="Hardware features were added, that enabled use of runtime libraries, interrupts, and parallel processing in the 1960s.", - question="What features were added to hardware in the 1960s?", - grounding_answer="Hardware in the 1960s saw the addition of features like runtime libraries and parallel processing.", - generated_answer="The 1960s saw the addition of input output control and compatible timesharing capabilities in hardware.", - explanation="The generated answer is worse because it inaccurately suggests the addition of capabilities of hardware in 1960s which is not supported by the context.", + context="""Hardware features were added, that enabled use of runtime libraries, interrupts, and parallel processing in the 1960s.""", + question="""What features were added to hardware in the 1960s?""", + grounding_answer="""Hardware in the 1960s saw the addition of features like runtime libraries and parallel processing.""", + generated_answer="""The 1960s saw the addition of input output control and compatible timesharing capabilities in hardware.""", + explanation="""The generated answer is worse because it inaccurately suggests the addition of capabilities of hardware in 1960s which is not supported by the context.""", label="reject", ), ], diff --git a/uniflow/node.py b/uniflow/node.py index d48c6d6a..9b9fab74 100644 --- a/uniflow/node.py +++ b/uniflow/node.py @@ -1,4 +1,4 @@ -""" Node module for uniflow.""" +"""Node module for uniflow.""" from typing import Any, Mapping, Optional, Sequence diff --git a/uniflow/op/extract/load/aws/s3_op.py b/uniflow/op/extract/load/aws/s3_op.py index b7147d34..ca51a6ad 100644 --- a/uniflow/op/extract/load/aws/s3_op.py +++ b/uniflow/op/extract/load/aws/s3_op.py @@ -18,7 +18,7 @@ class ExtractS3Op(Op): def __init__(self, name: str = "extract_s3_op") -> None: try: - import boto3 # pylint: disable=import-outside-toplevel + import boto3 except ImportError as e: raise ImportError("Please install boto3 to use S3Op.") from e diff --git a/uniflow/op/extract/load/ipynb_op.py b/uniflow/op/extract/load/ipynb_op.py index fd1b7113..17670800 100644 --- a/uniflow/op/extract/load/ipynb_op.py +++ b/uniflow/op/extract/load/ipynb_op.py @@ -12,10 +12,8 @@ class ExtractIpynbOp(Op): def __init__(self, name: str) -> None: try: - import nbformat # pylint: disable=import-outside-toplevel - from nbconvert import ( # pylint: disable=import-outside-toplevel - MarkdownExporter, - ) + import nbformat + from nbconvert import MarkdownExporter except ModuleNotFoundError as exc: raise ModuleNotFoundError( "Please install nbformat and nbconvert to load ipynb file. You can use `pip install nbformat nbconvert` to install them." diff --git a/uniflow/op/model/model_server.py b/uniflow/op/model/model_server.py index f58c5a83..5013b1f8 100644 --- a/uniflow/op/model/model_server.py +++ b/uniflow/op/model/model_server.py @@ -144,7 +144,7 @@ def __init__( self, prompt_template: PromptTemplate, model_config: Dict[str, Any] ) -> None: # import in class level to avoid installing openai package - from openai import OpenAI # pylint: disable=import-outside-toplevel + from openai import OpenAI super().__init__(prompt_template, model_config) self._model_config = OpenAIModelConfig(**self._model_config) @@ -218,7 +218,7 @@ def __init__( self, prompt_template: PromptTemplate, model_config: Dict[str, Any] ) -> None: # import in class level to avoid installing openai package - from openai import AzureOpenAI # pylint: disable=import-outside-toplevel + from openai import AzureOpenAI super().__init__(prompt_template, model_config) self._model_config = AzureOpenAIModelConfig(**self._model_config) @@ -292,9 +292,7 @@ def __init__( self._model_config = HuggingfaceModelConfig(**self._model_config) if self._model_config.neuron is False: try: - from transformers import ( # pylint: disable=import-outside-toplevel - pipeline, - ) + from transformers import pipeline except ModuleNotFoundError as exc: raise ModuleNotFoundError( "Please install transformers to use HuggingfaceModelServer. You can use `pip install transformers` to install it." @@ -324,9 +322,7 @@ def __init__( print( "Neuron model does not support quantized models. load_in_4bit and load_in_8bit are automatically set to False." ) - from uniflow.op.model.neuron_utils import ( # pylint: disable=import-outside-toplevel - Neuron, - ) + from uniflow.op.model.neuron_utils import Neuron model, tokenizer = Neuron.get_neuron_model( self._model_config.model_name, self._model_config.batch_size @@ -338,10 +334,7 @@ def __init__( def _get_model(self): """Get model.""" - from transformers import ( # pylint: disable=import-outside-toplevel - AutoModelForCausalLM, - AutoTokenizer, - ) + from transformers import AutoModelForCausalLM, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained( self._model_config.model_name, @@ -465,7 +458,7 @@ def __init__( self, prompt_template: PromptTemplate, model_config: Dict[str, Any] ) -> None: # import in class level to avoid installing transformers package - from lmqg import TransformersQG # pylint: disable=import-outside-toplevel + from lmqg import TransformersQG super().__init__(prompt_template, model_config) self._model_config = LMQGModelConfig(**self._model_config) @@ -519,13 +512,9 @@ def __init__( ) -> None: # import in class level to avoid installing nougat package try: - from nougat import NougatModel # pylint: disable=import-outside-toplevel - from nougat.utils.checkpoint import ( # pylint: disable=import-outside-toplevel - get_checkpoint, - ) - from nougat.utils.device import ( # pylint: disable=import-outside-toplevel - move_to_device, - ) + from nougat import NougatModel + from nougat.utils.checkpoint import get_checkpoint + from nougat.utils.device import move_to_device except ModuleNotFoundError as exc: raise ModuleNotFoundError( "Please install nougat to use NougatModelServer. You can use `pip install nougat-ocr` to install it." @@ -571,16 +560,9 @@ def __call__(self, data: List[str]) -> List[str]: Returns: List[str]: Output data. """ - from nougat.postprocessing import ( # pylint: disable=import-outside-toplevel - markdown_compatible, - ) - from nougat.utils.dataset import ( # pylint: disable=import-outside-toplevel - LazyDataset, - ) - from torch.utils.data import ( # pylint: disable=import-outside-toplevel - ConcatDataset, - DataLoader, - ) + from nougat.postprocessing import markdown_compatible + from nougat.utils.dataset import LazyDataset + from torch.utils.data import ConcatDataset, DataLoader outs = [] for pdf in data: @@ -1082,16 +1064,14 @@ def __init__( super().__init__(prompt_template, model_config) self._model_config = LayoutModelConfig(**self._model_config) try: - import easyocr # pylint: disable=import-outside-toplevel + import easyocr self.reader = easyocr.Reader(self._model_config.ocr_lang) except ModuleNotFoundError as exc: raise ModuleNotFoundError( "Please install easyocr to use LayoutModelServer. You can use `pip install easyocr` to install it." ) from exc - from .layout_utils import ( # pylint: disable=import-outside-toplevel - LayoutPredictor, - ) + from .layout_utils import LayoutPredictor self.layout_predictor = LayoutPredictor( self._model_config.model_name, self._model_config.model_file @@ -1128,12 +1108,10 @@ def __call__(self, data: List[str]) -> List[str]: Returns: List[str]: Output data. """ - import cv2 # pylint: disable=import-outside-toplevel - import numpy as np # pylint: disable=import-outside-toplevel + import cv2 + import numpy as np - from uniflow.op.model.layout_utils import ( # pylint: disable=import-outside-toplevel - XYCut, - ) + from uniflow.op.model.layout_utils import XYCut outs = [] for img in data: