Skip to content

Commit

Permalink
lab9
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Mar 18, 2024
1 parent c517d4e commit 2333139
Showing 1 changed file with 82 additions and 9 deletions.
91 changes: 82 additions & 9 deletions 2024/lab9/Lab9 - RAG over KG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,22 @@
"\n",
"SELECT ?uri ?predicate ?label ?type\n",
"WHERE {\n",
" TODO\n",
" ?uri a ?type ;\n",
" ?predicate ?label .\n",
" FILTER (\n",
" ?type = owl:Class ||\n",
" ?type = owl:DatatypeProperty ||\n",
" ?type = owl:ObjectProperty\n",
" )\n",
" FILTER (\n",
" ?predicate = rdfs:label ||\n",
" ?predicate = skos:prefLabel ||\n",
" ?predicate = skos:altLabel ||\n",
" ?predicate = skos:definition ||\n",
" ?predicate = rdfs:comment ||\n",
" ?predicate = dcterms:description ||\n",
" ?predicate = dc:title\n",
" )\n",
"}\"\"\"\n",
"\n",
"class OntologyLoader(BaseLoader):\n",
Expand Down Expand Up @@ -200,9 +215,31 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-03-17 11:02:48.072\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mfastembed.embedding\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m7\u001b[0m - \u001b[33m\u001b[1mDefaultEmbedding, FlagEmbedding, JinaEmbedding are deprecated. Use TextEmbedding instead.\u001b[0m\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4e0eea67b54c477ea3fd7e5ecd8e7e45",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Fetching 9 files: 0%| | 0/9 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"flag_embeddings = FastEmbedEmbeddings(model_name=\"BAAI/bge-small-en-v1.5\", max_length=512)\n",
"loader = OntologyLoader(\"https://semanticscience.org/ontology/sio.owl\", format=\"xml\")\n",
Expand Down Expand Up @@ -251,7 +288,10 @@
")\n",
"\n",
"# Prompt to reformulate the question using the chat history\n",
"reform_template = \"\"\"TODO\n",
"reform_template = \"\"\"Given the following chat history and a follow up question,\n",
"rephrase the follow up question to be a standalone straightforward question, in its original language.\n",
"Do not answer the question! Just rephrase reusing informations from the chat history.\n",
"Make it short and straight to the point.\n",
"\n",
"Chat History:\n",
"{chat_history}\n",
Expand All @@ -260,7 +300,8 @@
"REFORM_QUESTION_PROMPT = PromptTemplate.from_template(reform_template)\n",
"\n",
"# Prompt to ask to answer the reformulated question\n",
"answer_template = \"\"\"TODO\n",
"answer_template = \"\"\"Briefly answer the question based only on the following context,\n",
"do not use any information outside this context:\n",
"{context}\n",
"\n",
"Question: {question}\n",
Expand Down Expand Up @@ -351,9 +392,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"💭 Reformulated question: Can you give me the definition of a protein?\n",
"📚 Documents retrieved:\n",
"· protein (http://semanticscience.org/resource/SIO_010043)\n",
"· A protein is an organic polymer that is composed of one or more linear polymers of amino acids. (http://semanticscience.org/resource/SIO_010043)\n",
"· A protein complex is a molecular complex composed of at least two polypeptide chains. (http://semanticscience.org/resource/SIO_010497)\n",
"· A polypeptide is an organic polymer composed of amino acid residues, typically of less than 50 amino acids in length. (http://semanticscience.org/resource/SIO_010007)\n",
"· amino acid (http://semanticscience.org/resource/SIO_001224)\n",
"\n",
"Answer: A protein is an organic polymer composed of one or more linear polymers of amino acids."
]
}
],
"source": [
"# set_debug(True) # Uncomment to enable detailed LangChain debugging\n",
"output = stream_chain(final_chain, memory, {\n",
Expand All @@ -363,9 +420,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"💭 Reformulated question: What is the URI for the concept of a protein?\n",
"📚 Documents retrieved:\n",
"· protein (http://semanticscience.org/resource/SIO_010043)\n",
"· A protein complex is a molecular complex composed of at least two polypeptide chains. (http://semanticscience.org/resource/SIO_010497)\n",
"· protein complex (http://semanticscience.org/resource/SIO_010497)\n",
"· A protein-protein association is an association between two proteins. (http://semanticscience.org/resource/SIO_001438)\n",
"· protein-protein association (http://semanticscience.org/resource/SIO_001438)\n",
"\n",
"http://semanticscience.org/resource/SIO_010043"
]
}
],
"source": [
"output = stream_chain(final_chain, memory, {\n",
" \"question\": \"What is the URI for this concept?\"\n",
Expand Down

0 comments on commit 2333139

Please sign in to comment.