-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #38 from TheDataGuild/feature/useful-welcome-messa…
…ge-2 Feature/useful welcome message 2
- Loading branch information
Showing
3 changed files
with
95 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,41 @@ | ||
from typing import List | ||
|
||
from docs import Section | ||
from llama_index.llms import ChatMessage, OpenAI | ||
|
||
|
||
def parse_abstracts(nodes) -> List[str]: | ||
"""Returns a list of abstract texts from a list of nodes.""" | ||
return [ | ||
node.get_text() | ||
for node in nodes | ||
if node.metadata["section"] == str(Section.ABSTRACT) | ||
] | ||
|
||
|
||
def summarize_prompt(abstracts: List[str]): | ||
bullet_points = "\n".join([f"* {text}" for text in abstracts]) | ||
return { | ||
"system": ( | ||
"You are a science journalist summarizing papers for your readers.\n" | ||
"Instructions:\n" | ||
"respond with fewer than 100 words\n" | ||
"start your response with 'This collection of papers'" | ||
), | ||
"user": f"Summarize these research papers:\n'''{bullet_points}'''", | ||
} | ||
|
||
|
||
def summarize(gpt_model, texts: List[str]): | ||
prompt = summarize_prompt(texts) | ||
messages = [ | ||
ChatMessage(role="system", content=prompt["system"]), | ||
ChatMessage(role="user", content=prompt["user"]), | ||
] | ||
resp = OpenAI(model=gpt_model).chat(messages) | ||
return resp.message.content | ||
|
||
|
||
def get_welcome_message(nodes): | ||
# TODO: | ||
return "Ask me a question about these PDFs" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ | |
import extract | ||
import index | ||
import measure | ||
import welcome |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import re | ||
|
||
import pytest | ||
|
||
from tests.context import docs, extract | ||
from tests.context import welcome as w | ||
|
||
from . import test_docs | ||
|
||
|
||
def test_parse_abstracts(): | ||
nodes = [ | ||
docs.create_text_node( | ||
text="this is abstract", node_id=1, section=docs.Section.ABSTRACT | ||
), | ||
docs.create_text_node( | ||
text="this is not abstract", node_id=2, section=docs.Section.BODY | ||
), | ||
] | ||
abstracts = w.parse_abstracts(nodes) | ||
assert len(abstracts) == 1 | ||
assert isinstance(abstracts, list) | ||
assert all(isinstance(abstract, str) for abstract in abstracts) | ||
|
||
|
||
def test_summarize_prompt(): | ||
abstracts = ["this is abstract", "second abstract"] | ||
prompt = w.summarize_prompt(abstracts) | ||
assert isinstance(prompt, dict) | ||
assert isinstance(prompt["system"], str) | ||
assert re.search(r"'''\* this is abstract\n\* second abstract'''", prompt["user"]) | ||
|
||
|
||
@pytest.mark.skip(reason="calls out to OpenAI API and is not free") | ||
def test_summerize(): | ||
nodes = extract.seed_nodes(test_docs.XML_PATH) | ||
abstracts = w.parse_abstracts(nodes) | ||
resp = w.summarize("gpt-3.5-turbo", abstracts) | ||
|
||
# An example response from GPT: | ||
# This collection of papers focuses on the process of sonoporation, which | ||
# involves using ultrasound and microbubbles to temporarily permeate the | ||
# plasma membrane of cells. The papers discuss the mechanistic knowledge of | ||
# sonoporation-induced cellular impact, the spatiotemporal characteristics | ||
# of sonoporation, the disruption of the actin cytoskeleton during | ||
# sonoporation, the role of membrane blebbing in cell recovery, the cellular | ||
# impact of sonoporation on living cells, the properties of daughter bubbles | ||
# created during inertial cavitation, and the dynamics of membrane | ||
# perforation and resealing during sonoporation. These findings provide | ||
# valuable insights into the potential applications and optimization of | ||
# sonoporation as a drug and gene delivery technique. | ||
|
||
assert resp.startswith("This collection of papers") | ||
assert len(resp.split()) < 200 | ||
assert "sonoporation" in resp |