Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add kg block #644

Merged
merged 5 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/ui/public/components/workflows_writeraddtokg.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions src/writer/blocks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from writer.blocks.runworkflow import RunWorkflow
from writer.blocks.setstate import SetState
from writer.blocks.writeraddchatmessage import WriterAddChatMessage
from writer.blocks.writeraddtokg import WriterAddToKG
from writer.blocks.writerchat import WriterChat
from writer.blocks.writerclassification import WriterClassification
from writer.blocks.writercompletion import WriterCompletion
Expand All @@ -29,3 +30,4 @@
AddToStateList.register("workflows_addtostatelist")
ReturnValue.register("workflows_returnvalue")
WriterInitChat.register("workflows_writerinitchat")
WriterAddToKG.register("workflows_writeraddtokg")
81 changes: 81 additions & 0 deletions src/writer/blocks/writeraddtokg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from typing import Any

from writer.abstract import register_abstract_template
from writer.blocks.base_block import WorkflowBlock
from writer.ss_types import AbstractTemplate, WriterConfigurationError


class WriterAddToKG(WorkflowBlock):

@classmethod
def register(cls, type: str):
super(WriterAddToKG, cls).register(type)
register_abstract_template(type, AbstractTemplate(
baseType="workflows_node",
writer={
"name": "Add to Knowledge Graph",
"description": "Adds files to an existing knowledge graph.",
"category": "Writer",
"fields": {
"graphId": {
"name": "Graph id",
"type": "Text",
"desc": "The id for an existing knowledge graph. It has a UUID format."
},
"files": {
"name": "Files",
"type": "Object",
"default": "[]",
"desc": "A list of files to be uploaded and added to the knowledge graph. You can use files uploaded via the File Input component or specify dictionaries with data, type and name."
},
},
"outs": {
"success": {
"name": "Success",
"description": "If the execution was successful.",
"style": "success",
},
"error": {
"name": "Error",
"description": "If the function raises an Exception.",
"style": "error",
},
},
}
))

def _get_prepared_file(self, raw_file: Any):
if not isinstance(raw_file, dict):
raise WriterConfigurationError("Files must be dictionaries and contain `data`, `type` and `name` attributes.")

if "data" not in raw_file or "type" not in raw_file or "name" not in raw_file:
raise WriterConfigurationError("A file specified as a dictionary must contain `data`, `type` and `name` attributes.")

return raw_file

def run(self):
try:
import writer.ai

graph_id = self._get_field("graphId", required=True)
raw_files = self._get_field("files", as_json=True)
prepared_files = []

if not isinstance(raw_files, list):
raise WriterConfigurationError("Files must be a list.")

for raw_file in raw_files:
prepared_files.append(self._get_prepared_file(raw_file))

graph = writer.ai.retrieve_graph(graph_id)

for prepared_file in prepared_files:
file = writer.ai.upload_file(prepared_file.get("data"),
prepared_file.get("type"),
prepared_file.get("name"))
graph.add_file(file)

self.outcome = "success"
except BaseException as e:
self.outcome = "error"
raise e
52 changes: 23 additions & 29 deletions src/writer/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import writer.core_ui
from writer.ss_types import (
InstancePath,
WriterConfigurationError,
)

if TYPE_CHECKING:
Expand All @@ -29,46 +30,39 @@ def __init__(self, state: "WriterState", component_tree: "ComponentTree"):
self.serializer = writer.core.StateSerialiser()

def evaluate_field(self, instance_path: InstancePath, field_key: str, as_json=False, default_field_value="", base_context={}) -> Any:
def decode_json(text):
try:
return json.loads(text)
except json.JSONDecodeError as exception:
raise WriterConfigurationError("Error decoding JSON. " + str(exception)) from exception

def replacer(matched):
if matched.string[0] == "\\": # Escaped @, don't evaluate
return matched.string
expr = matched.group(1).strip()
expr_value = self.evaluate_expression(expr, instance_path, base_context)

try:
if as_json:
serialized_value = self.serializer.serialise(expr_value)
if not isinstance(serialized_value, str):
serialized_value = json.dumps(serialized_value)
return serialized_value
return expr_value
except BaseException:
raise ValueError(
f"""Couldn't serialize value of type "{ type(expr_value) }" when evaluating field "{ field_key }".""")
return expr_value

component_id = instance_path[-1]["componentId"]
component = self.component_tree.get_component(component_id)
if component:
field_value = component.content.get(field_key) or default_field_value
replaced = None
full_match = self.TEMPLATE_REGEX.fullmatch(field_value)
if not component:
raise ValueError(f'Component with id "{component_id}" not found.')

if full_match is None:
replaced = self.TEMPLATE_REGEX.sub(lambda m: str(replacer(m)), field_value)
else:
replaced = replacer(full_match)

if (replaced is not None) and as_json:
replaced_as_json = None
try:
replaced_as_json = json.loads(replaced)
except json.JSONDecodeError:
replaced_as_json = json.loads(default_field_value)
return replaced_as_json
else:
return replaced
field_value = component.content.get(field_key) or default_field_value
replaced = None
full_match = self.TEMPLATE_REGEX.fullmatch(field_value)

if full_match is None:
replaced = self.TEMPLATE_REGEX.sub(lambda m: str(replacer(m)), field_value)
if as_json:
replaced = decode_json(replaced)
else:
raise ValueError(f"Couldn't acquire a component by ID '{component_id}'")
replaced = replacer(full_match)
if as_json and isinstance(replaced, str):
replaced = decode_json(replaced)

return replaced


def get_context_data(self, instance_path: InstancePath, base_context={}) -> Dict[str, Any]:
Expand Down
20 changes: 17 additions & 3 deletions src/writer/workflows.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import time
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Type

Expand Down Expand Up @@ -70,6 +71,19 @@ def run_nodes(self, nodes: List[writer.core_ui.Component], execution_environment
self._generate_run_log(execution, title, "info", return_value)
return return_value

def _summarize_data_for_log(self, data):
if isinstance(data, list):
return [self._summarize_data_for_log(item) for item in data]
if isinstance(data, dict):
return {k : self._summarize_data_for_log(v) for k, v in data.items()}
if isinstance(data, (str, int, float, bool, type(None))):
return data
try:
return json.loads(json.dumps(data))
except (TypeError, OverflowError):
return f"Can't be displayed in the log. Value of type: {str(type(data))}."


def _generate_run_log(self,
execution: Dict[str, writer.blocks.base_block.WorkflowBlock],
title: str,
Expand All @@ -83,9 +97,9 @@ def _generate_run_log(self,
"componentId": component_id,
"outcome": tool.outcome,
"message": tool.message,
"result": tool.result,
"returnValue": tool.return_value,
"executionEnvironment": tool.execution_environment,
"result": self._summarize_data_for_log(tool.result),
"returnValue": self._summarize_data_for_log(tool.return_value),
"executionEnvironment": self._summarize_data_for_log(tool.execution_environment),
"executionTimeInSeconds": tool.execution_time_in_seconds
})
msg = "Execution finished."
Expand Down
79 changes: 79 additions & 0 deletions tests/backend/blocks/test_writeraddtokg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import pytest
import writer.ai
from writer.blocks.writeraddtokg import WriterAddToKG
from writer.ss_types import WriterConfigurationError


class MockFile():
pass

class MockGraph():

def add_file(self, file):
assert isinstance(file, MockFile)

def mock_retrieve_graph(graph_id):
assert graph_id == "abc123"
return MockGraph()

def mock_upload_file(data, type, name):
assert data == b"123"
assert type == "application/pdf"
assert name == "interesting.pdf"
return MockFile()


def test_add_to_kg(session, runner):
writer.ai.retrieve_graph = mock_retrieve_graph
writer.ai.upload_file = mock_upload_file
session.session_state["my_files"] = [
{
"data": b"123",
"type": "application/pdf",
"name": "interesting.pdf"
},
{
"data": b"123",
"type": "application/pdf",
"name": "interesting.pdf"
}
]
session.add_fake_component({
"graphId": "abc123",
"files": "@{my_files}"
})
block = WriterAddToKG("fake_id", runner, {})
block.run()
assert block.outcome == "success"


def test_add_to_kg_missing_type(session, runner):
writer.ai.retrieve_graph = mock_retrieve_graph
writer.ai.upload_file = mock_upload_file
session.session_state["my_files"] = [
{
"data": b"123",
"name": "interesting.pdf"
}
]
session.add_fake_component({
"graphId": "abc123",
"files": "@{my_files}"
})
block = WriterAddToKG("fake_id", runner, {})

with pytest.raises(WriterConfigurationError):
block.run()

def test_add_to_kg_wrong_type(session, runner):
writer.ai.retrieve_graph = mock_retrieve_graph
writer.ai.upload_file = mock_upload_file
session.session_state["my_files"] = "should be list"
session.add_fake_component({
"graphId": "abc123",
"files": "@{my_files}"
})
block = WriterAddToKG("fake_id", runner, {})

with pytest.raises(WriterConfigurationError):
block.run()