Skip to content

Commit

Permalink
move towards more generic
Browse files Browse the repository at this point in the history
  • Loading branch information
leej3 committed Aug 28, 2024
1 parent 041a521 commit c30ea88
Showing 1 changed file with 49 additions and 31 deletions.
80 changes: 49 additions & 31 deletions external_components/chat_gpt_4o_2024_08_06/extract_2.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
from llama_index.core import ChatPromptTemplate
from llama_index.core.llms import ChatMessage
from llama_index.core.llms import LLM, ChatMessage
from llama_index.llms.openai import OpenAI
from llama_index.program.openai import OpenAIPydanticProgram
from pydantic import BaseModel, Field

llm = OpenAI(model="gpt-4o-2024-08-06")

# response = llm.complete(
# "Generate a sales call transcript, use real names, talk about a product, discuss some action items"
# )


class GPT_4o_2024_08_06(BaseModel):
class LLMExtractor(BaseModel):
"""
Model for extracting information from scientific publications. These metrics
are a summary of the publications adherence to transparent or open
Expand Down Expand Up @@ -48,19 +44,28 @@ class GPT_4o_2024_08_06(BaseModel):
is_open_code: bool = Field(
description="Whether there is evidence that the code used for analysis in the paper has been shared online",
)
code_sharing_statement: str = Field(
code_sharing_statement: list[str] = Field(
description="The statement in the paper that indicates whether the code used for analysis has been shared online",
)
is_open_data: bool = Field(
description="Whether there is evidence that the data used for analysis in the paper has been shared online",
)
data_sharing_statement: str = Field(
data_sharing_statement: list[str] = Field(
description="The statement in the paper that indicates whether the data used for analysis has been shared online",
)
data_repository_url: str = Field(
description="The URL of the repository where the data can be found"
)
dataset_unique_identifier: list[str] = Field(
description="Any unique identifiers the dataset may have"
)
code_repository_url: str = Field(
description="The URL of the repository where the code and data can be found"
)
has_coi_statement: bool = Field(
description="Whether there is a conflict of interest statement in the paper",
)
coi_statement: str = Field(
coi_statement: list[str] = Field(
description="The conflict of interest statement in the paper"
)
funder: list[str] = Field(
Expand All @@ -69,37 +74,50 @@ class GPT_4o_2024_08_06(BaseModel):
has_funding_statement: bool = Field(
description="Whether there is a funding statement in the paper"
)
funding_statement: str = Field(description="The funding statement in the paper")
funding_statement: list[str] = Field(
description="The funding statement in the paper"
)
has_registration_statement: bool = Field(
description="Whether there is a registration statement in the paper",
)
registration_statement: str = Field(
registration_statement: list[str] = Field(
description="The registration statement in the paper"
)
reasoning_steps: list[str] = Field(
description="The reasoning steps used to extract the information from the paper",
)


prompt = ChatPromptTemplate(
message_templates=[
ChatMessage(
role="system",
content=(
"You are an expert at extracting information from scientific publications with a keen eye for details that when combined together allows you to summarize aspects of the publication"
def get_program(llm: LLM) -> OpenAIPydanticProgram:
prompt = ChatPromptTemplate(
message_templates=[
ChatMessage(
role="system",
content=(
"You are an expert at extracting information from scientific publications with a keen eye for details that when combined together allows you to summarize aspects of the publication"
),
),
),
ChatMessage(
role="user",
content=(
"Here is the transcript: \n" "------\n" "{xml_content}\n" "------"
ChatMessage(
role="user",
content=(
"The llm model is {llm_model}. The publication in xml follows below:\n"
"------\n"
"{xml_content}\n"
"------"
),
),
),
]
)
program = OpenAIPydanticProgram.from_defaults(
output_cls=GPT_4o_2024_08_06,
llm=llm,
prompt=prompt,
verbose=True,
)
]
)

program = OpenAIPydanticProgram.from_defaults(
output_cls=LLMExtractor,
llm=llm,
prompt=prompt,
verbose=True,
)
return program


def extract_with_llm(xml_content: bytes, llm: LLM) -> LLMExtractor:
program = get_program(llm=llm)
return program(xml_content=xml_content, llm_model=llm.model)

0 comments on commit c30ea88

Please sign in to comment.