Skip to content

Commit

Permalink
Merge pull request #38 from TheDataGuild/feature/useful-welcome-messa…
Browse files Browse the repository at this point in the history
…ge-2

Feature/useful welcome message 2
  • Loading branch information
Quantisan authored Sep 28, 2023
2 parents d2998a3 + 1cdb86d commit 9631dc5
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 0 deletions.
39 changes: 39 additions & 0 deletions mind_palace/welcome.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,41 @@
from typing import List

from docs import Section
from llama_index.llms import ChatMessage, OpenAI


def parse_abstracts(nodes) -> List[str]:
"""Returns a list of abstract texts from a list of nodes."""
return [
node.get_text()
for node in nodes
if node.metadata["section"] == str(Section.ABSTRACT)
]


def summarize_prompt(abstracts: List[str]):
bullet_points = "\n".join([f"* {text}" for text in abstracts])
return {
"system": (
"You are a science journalist summarizing papers for your readers.\n"
"Instructions:\n"
"respond with fewer than 100 words\n"
"start your response with 'This collection of papers'"
),
"user": f"Summarize these research papers:\n'''{bullet_points}'''",
}


def summarize(gpt_model, texts: List[str]):
prompt = summarize_prompt(texts)
messages = [
ChatMessage(role="system", content=prompt["system"]),
ChatMessage(role="user", content=prompt["user"]),
]
resp = OpenAI(model=gpt_model).chat(messages)
return resp.message.content


def get_welcome_message(nodes):
# TODO:
return "Ask me a question about these PDFs"
1 change: 1 addition & 0 deletions tests/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
import extract
import index
import measure
import welcome
55 changes: 55 additions & 0 deletions tests/unit/test_welcome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import re

import pytest

from tests.context import docs, extract
from tests.context import welcome as w

from . import test_docs


def test_parse_abstracts():
nodes = [
docs.create_text_node(
text="this is abstract", node_id=1, section=docs.Section.ABSTRACT
),
docs.create_text_node(
text="this is not abstract", node_id=2, section=docs.Section.BODY
),
]
abstracts = w.parse_abstracts(nodes)
assert len(abstracts) == 1
assert isinstance(abstracts, list)
assert all(isinstance(abstract, str) for abstract in abstracts)


def test_summarize_prompt():
abstracts = ["this is abstract", "second abstract"]
prompt = w.summarize_prompt(abstracts)
assert isinstance(prompt, dict)
assert isinstance(prompt["system"], str)
assert re.search(r"'''\* this is abstract\n\* second abstract'''", prompt["user"])


@pytest.mark.skip(reason="calls out to OpenAI API and is not free")
def test_summerize():
nodes = extract.seed_nodes(test_docs.XML_PATH)
abstracts = w.parse_abstracts(nodes)
resp = w.summarize("gpt-3.5-turbo", abstracts)

# An example response from GPT:
# This collection of papers focuses on the process of sonoporation, which
# involves using ultrasound and microbubbles to temporarily permeate the
# plasma membrane of cells. The papers discuss the mechanistic knowledge of
# sonoporation-induced cellular impact, the spatiotemporal characteristics
# of sonoporation, the disruption of the actin cytoskeleton during
# sonoporation, the role of membrane blebbing in cell recovery, the cellular
# impact of sonoporation on living cells, the properties of daughter bubbles
# created during inertial cavitation, and the dynamics of membrane
# perforation and resealing during sonoporation. These findings provide
# valuable insights into the potential applications and optimization of
# sonoporation as a drug and gene delivery technique.

assert resp.startswith("This collection of papers")
assert len(resp.split()) < 200
assert "sonoporation" in resp

0 comments on commit 9631dc5

Please sign in to comment.