Skip to content

Commit

Permalink
add streamlit app for better visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
dahaipeng committed Aug 12, 2024
1 parent 62162b1 commit 97c5d9f
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 58 deletions.
26 changes: 26 additions & 0 deletions apps/datascience_assistant/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Data Science Assistant with Streamlit ⭐
Data Science Assistant (hereinafter referred to as DS Assistant) is a Data Science Assistant developed based on the modelscope-agent framework, which can automatically perform exploratory Data analysis (EDA) in Data Science tasks according to user needs, Data preprocessing, feature engineering, model training, model evaluation and other steps are fully automated.

Detailed information can be found in the [documentation](../../docs/source/agents/data_science_assistant.md).

## Quick Start
Streamlit is a Python library that makes it easy to create and share beautiful, custom web apps for machine learning and data science.

To run the DS Assistant in streamlit, you need to install the Streamlit library. You can install it using pip:
```bash
pip install streamlit streamlit-jupyter
```
Then, you need to set
Then, you can run the DS Assistant using the following command:
```bash
streamlit run app.py
```

After running the command, a new tab will open in your default web browser with the DS Assistant running.
The following are screenshots of the DS Assistant running in the browser:

![img_2.png](../../resources/data_science_assistant_streamlit_1.png)
you can view all of the codes and in streamlit
![img_3.png](../../resources/data_science_assistant_streamlit_2.png)
After you have finished using the DS Assistant, you can directly convert the running process to a pdf
![img_5.png](../../resources/data_science_assistant_streamlit_3.png)
23 changes: 23 additions & 0 deletions apps/datascience_assistant/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os

import streamlit as st
from modelscope_agent.agents.data_science_assistant import DataScienceAssistant
from modelscope_agent.tools.metagpt_tools.tool_recommend import \
TypeMatchToolRecommender

llm_config = {
'model': 'qwen2-72b-instruct',
'model_server': 'dashscope',
}
os.environ['DASHSCOPE_API_KEY'] = input(
'Please input your dashscope api key: ')
data_science_assistant = DataScienceAssistant(
llm=llm_config, tool_recommender=TypeMatchToolRecommender(tools=['<all>']))
st.title('Data Science Assistant')
st.write(
'This is a data science assistant that can help you with your data science tasks.'
)
st.write('Please input your request below and click the submit button.')
user_request = st.text_input('User Request')
if st.button('submit'):
data_science_assistant.run(user_request=user_request, streamlit=True)
111 changes: 54 additions & 57 deletions modelscope_agent/agents/data_science_assistant.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Implementation inspired by the paper "DATA INTERPRETER: AN LLM AGENT FOR DATA SCIENCE"
import asyncio
import copy
import os
import time
from datetime import datetime
Expand All @@ -8,7 +9,6 @@
import json
import json5
import nbformat
import streamlit as st
from modelscope_agent.agents.role_play import RolePlay
from modelscope_agent.llm.base import BaseChatModel
from modelscope_agent.schemas import CodeCell, Plan, Task
Expand All @@ -18,14 +18,13 @@
from modelscope_agent.tools.metagpt_tools.tool_recommend import ToolRecommender
from modelscope_agent.utils.logger import agent_logger as logger
from modelscope_agent.utils.utils import parse_code
from streamlit_agraph import Config, Edge, Node, agraph

try:
import streamlit as st # noqa
from nbconvert import HTMLExporter
from traitlets.config import Config
except Exception as e:
print(
f'import streamlit error: {str(e)}, please install streamlit first by running: pip install streamlit '
)
print(f'import error: {str(e)}, please install streamlit and nbconvert')
PLAN_TEMPLATE = """
# Context:
{context}
Expand Down Expand Up @@ -235,14 +234,12 @@
these are the previous code blocks, which have been executed successfully in the previous jupyter notebook code blocks \
{previous_code_blocks}
Attention: your response should be one of the following:
- [your step by step thought], correct
- [your step by step thought], incorrect
at the end of your thought, you need to give the final judgement with a new line( correct or incorrect).
don't generate code , just give the reason why the code is correct or incorrect.
## Attention
don't use the word 'incorrect' in your step by step thought.
your answer should be short and clear, don't need to be too long.
"""

CHECK_DATA_PROMPT = """
Expand Down Expand Up @@ -322,33 +319,6 @@ def __init__(self,
self.total_token = 0
self.streamlit = False

def create_agraph_from_json(tasks):
# 解析 JSON 字符串
# 初始化节点和边的列表
nodes = []
edges = []

# 为每个任务创建节点
for task in tasks:
task_id = task['task_id']
nodes.append(
Node(
id=task_id, label='TASK ' + task_id, size=50, shape='box'))

# 为每个依赖任务创建边
for dependent_task_id in task['dependent_task_ids']:
edges.append(Edge(
source=dependent_task_id,
target=task_id,
))

# 配置图形
config = Config(
width=750, height=950, directed=True, hierarchical=False)

# 返回 agraph 对象
return agraph(nodes=nodes, edges=edges, config=config)

def _update_plan(self, user_request: str, curr_plan: Plan = None) -> Plan:
call_llm_success = False
call_llm_count = 0
Expand Down Expand Up @@ -599,8 +569,6 @@ def _check_data(self):

def _judge_code(self, task, previous_code_blocks, code,
code_interpreter_resp):
success = True
failed_reason = ''
judge_prompt = JUDGE_TEMPLATE.format(
instruction=task.instruction,
previous_code_blocks=previous_code_blocks,
Expand All @@ -621,13 +589,12 @@ def _judge_code(self, task, previous_code_blocks, code,
self._get_total_tokens()
if 'Error code' in judge_result:
call_llm_count += 1
time.sleep(10)
time.sleep(5)
else:
call_llm_success = True
if not call_llm_success:
raise Exception('call llm failed')
logger.info(f'judge result for task{task.task_id}: \n {judge_result}')

if 'incorrect' in judge_result.split('\n')[-1]:
success = False
failed_reason = (
Expand All @@ -636,7 +603,7 @@ def _judge_code(self, task, previous_code_blocks, code,
return success, failed_reason

else:
return True, 'The code logic is correct'
return True, judge_result

def _run(self, user_request, save: bool = True, **kwargs):
before_time = time.time()
Expand All @@ -645,7 +612,7 @@ def _run(self, user_request, save: bool = True, **kwargs):
if self.streamlit:
st.write("""# DataScience Assistant """)
st.write("""### The user request is: \n""")
st.write("""{user_request}""")
st.write(user_request)
print('streamlit: ', self.streamlit)
self.plan = self._update_plan(user_request=user_request)
jupyter_file_path = ''
Expand All @@ -659,7 +626,9 @@ def _run(self, user_request, save: bool = True, **kwargs):

while self.plan.current_task_id:
task = self.plan.task_map.get(self.plan.current_task_id)
# write_and_execute_code(self)
if self.streamlit:
st.write(
f"""### Task {task.task_id}: {task.instruction}\n""")
logger.info(
f'new task starts: task_{task.task_id} , instruction: {task.instruction}'
)
Expand All @@ -671,7 +640,6 @@ def _run(self, user_request, save: bool = True, **kwargs):
code_execute_success = False
code_logic_success = False
temp_code_interpreter = CodeInterpreter()

temp_code_interpreter.call(
params=json.dumps({
'code':
Expand All @@ -682,26 +650,60 @@ def _run(self, user_request, save: bool = True, **kwargs):
# generate code
code = self._generate_code(code_counter, task,
user_request)
code = '%matplotlib inline \n' + code
# if self.streamlit:
# st.divider()
# st.write("We have generated the code for the current task")
# st.code(code, language='python')
code_execute_success, code_interpreter_resp = temp_code_interpreter.call(
params=json.dumps({'code': code}),
nb_mode=True,
silent_mode=True)
# 删除临时 jupyter环境
temp_code_interpreter.terminate()
if self.streamlit:
st.divider()
st_notebook = nbformat.v4.new_notebook()
st_notebook.cells = [
temp_code_interpreter.nb.cells[-1]
]
c = Config()
c.HTMLExporter.preprocessors = [
'nbconvert.preprocessors.ConvertFiguresPreprocessor'
]
# create the new exporter using the custom config
html_exporter_with_figs = HTMLExporter(config=c)
(html, resources_with_fig
) = html_exporter_with_figs.from_notebook_node(
st_notebook)
st.write(
'We have generated the code for the current task')
st.html(html)
judge_resp = ''
if not code_execute_success:
logger.error(
f'code execution failed, task{task.task_id} code_counter{code_counter}:\n '
f'{code_interpreter_resp}')
if self.streamlit:
st.write(
'The code execution failed. Now we will take a reflection and regenerate the code.'
)
else:
logger.info(
f'code execution success, task{task.task_id} code_counter{code_counter}:\n '
f'{code_interpreter_resp}')
if self.streamlit:
st.write(
'The code execution is successful. Now we will ask the judge to check the code.'
)
code_logic_success, judge_resp = self._judge_code(
task=task,
previous_code_blocks=previous_code_blocks,
code=code,
code_interpreter_resp=code_interpreter_resp)
if self.streamlit:
st.write(
'The judge has checked the code, here is the result.'
)
st.write(judge_resp)
success = code_execute_success and code_logic_success
task.code_cells.append(
CodeCell(
Expand All @@ -713,16 +715,13 @@ def _run(self, user_request, save: bool = True, **kwargs):
self.code_interpreter.call(
params=json.dumps({'code': code}), nb_mode=True)
if self.streamlit:
st.divider()
st.write(
f"""### Task {task.task_id}: {task.instruction}\n"""
'The code is correct, we will move to the next task.'
)
st.write(
'Now we generate the code for the current task'
)
st.code(f"""{code}""", language='python')
task.code = code
task.result = code_interpreter_resp
else:
self.code_interpreter.nb.cells.pop()
code_counter += 1

# save the successful code in jupyter notebook
Expand Down Expand Up @@ -759,12 +758,10 @@ def _run(self, user_request, save: bool = True, **kwargs):
print(f'json write error: {str(e)}')
if self.streamlit:
st.divider()
st.write('#### We have finished all the tasks! ')
st.write(
f'you can check the details in the jupyter notebook \"{jupyter_file_path}\"'
)
st.write('### We have finished all the tasks! ')
st.balloons()
st.write(
f"you can check the plan in the json file \"{dir_name + 'plan.json'}\""
f"""#### The total time cost is: {time_cost}\n #### The total token cost is: {total_token}"""
)

except Exception as e:
Expand Down
10 changes: 9 additions & 1 deletion modelscope_agent/tools/metagpt_tools/task_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@
- Ensure that the evaluated data is same processed as the training data.
- Use trained model from previous task result directly, do not mock or reload model yourself.
"""
OCR_PROMPT = """
The current task is about OCR, please note the following:
- you can follow the following code to get the OCR result:
from paddleocr import PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
result = ocr.ocr('/path/to/the/pic', cls=True) # please replace the path with the real path
"""


class TaskTypeDef(BaseModel):
Expand Down Expand Up @@ -92,7 +99,8 @@ class TaskType(Enum):
desc='Only for evaluating model.',
guidance=MODEL_EVALUATE_PROMPT,
)

OCR = TaskTypeDef(
name='ocr', desc='For performing OCR tasks', guidance=OCR_PROMPT)
OTHER = TaskTypeDef(
name='other', desc='Any tasks not in the defined categories')

Expand Down
Binary file added resources/data_science_assistant_streamlit_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added resources/data_science_assistant_streamlit_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added resources/data_science_assistant_streamlit_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 97c5d9f

Please sign in to comment.