Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ds assistant #583

Merged
merged 33 commits into from
Sep 13, 2024
Merged
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
96e6a31
add plan update logic
dahaipeng Jul 4, 2024
0373d07
adding update task logic
dahaipeng Jul 5, 2024
6c418a3
update datascience assistant logic to achieve better results
dahaipeng Jul 12, 2024
61d6953
Merge branch 'master' into feature/datascience_assistant
dahaipeng Jul 12, 2024
7c1c20b
Merge branch 'refs/heads/master' into feature/datascience_assistant
dahaipeng Jul 15, 2024
70d844c
add ds tools
dahaipeng Jul 18, 2024
2e54983
add ds tools
dahaipeng Jul 19, 2024
0f42462
update prompt
dahaipeng Jul 22, 2024
dd7d05d
update utils
dahaipeng Jul 22, 2024
8b752a5
update init
dahaipeng Jul 22, 2024
800fc06
Merge branch 'refs/heads/master' into feature/datascience_assistant
dahaipeng Jul 22, 2024
4f7bd6a
update log
dahaipeng Jul 23, 2024
62c2bed
Merge branch 'refs/heads/master' into feature/datascience_assistant
dahaipeng Jul 23, 2024
cb05393
delete yml
dahaipeng Jul 23, 2024
5b70701
update ds_assistant
dahaipeng Jul 24, 2024
49c50d2
Merge branch 'refs/heads/master' into feature/datascience_assistant
dahaipeng Jul 25, 2024
428401a
update ds_assistant
dahaipeng Jul 25, 2024
f6bd4e2
update ds_assistant
dahaipeng Jul 25, 2024
ee8857f
Merge branch 'refs/heads/master' into feature/datascience_assistant
dahaipeng Jul 26, 2024
1f1be09
update ds_assistant
dahaipeng Jul 26, 2024
e2310b5
fix openapi tool
dahaipeng Jul 26, 2024
458d4d5
Merge branch 'refs/heads/feature/datascience_assistant'
dahaipeng Jul 30, 2024
c536e6d
Merge remote-tracking branch 'origin/master'
dahaipeng Aug 1, 2024
292c5b9
Merge remote-tracking branch 'origin/master'
dahaipeng Aug 1, 2024
67fa1ba
Merge remote-tracking branch 'origin/master'
dahaipeng Aug 5, 2024
84bff9a
Merge remote-tracking branch 'origin/master'
dahaipeng Aug 6, 2024
2b0cb29
Merge remote-tracking branch 'origin/master'
dahaipeng Aug 12, 2024
124a4ea
Merge remote-tracking branch 'origin/master'
dahaipeng Aug 13, 2024
120aa9c
Merge remote-tracking branch 'origin/master'
dahaipeng Aug 26, 2024
bd7d5d7
add task decompose function
dahaipeng Aug 30, 2024
fff71ff
add task decompose function
dahaipeng Aug 30, 2024
021d2e8
add task decompose function
dahaipeng Aug 30, 2024
343bf70
add task decompose function
dahaipeng Sep 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 82 additions & 10 deletions modelscope_agent/agents/data_science_assistant.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Implementation inspired by the paper "DATA INTERPRETER: AN LLM AGENT FOR DATA SCIENCE"
import asyncio
import copy
import os
import time
from datetime import datetime
Expand Down Expand Up @@ -39,8 +37,7 @@
- **other**: Any tasks not in the defined categories

# Task:
Based on the context, write a simple plan or modify an existing plan of what you should do to achieve the goal. A plan \
consists of one to four tasks.
Based on the context, write a simple plan or modify an existing plan of what you should do to achieve the goal.

Output a list of jsons following the format:
```json
Expand All @@ -55,6 +52,44 @@
]
```
"""

DECOMPOSE_TASK_TEMPLATE = """
# Context:
{context}
# Available Task Types:
- **eda**: For performing exploratory data analysis
- **data preprocessing**: For preprocessing dataset in a data analysis or machine learning task ONLY,\
general data operation doesn't fall into this type
- **feature engineering**: Only for creating new columns fo input data.
- **model train**: Only for training model.
- **model evaluate**: Only for evaluating model.
- **ocr**: Only for OCR tasks.
- **other**: Any tasks not in the defined categories

# Previous Tasks
We have already generated the following tasks:
{previous_tasks}
# Task:
The current task is:
{current_task}
Currently, the current task is too complex to be executed in one step. Please decompose the task into smaller tasks, \
and output a list of jsons following the format:
Output a list of jsons following the format:

```json
[
{{
"task_id": str = "unique identifier for a task in plan, can be an ordinal, \
should be unique and not conflict with previous task ids",
"dependent_task_ids": list[str] = "ids of tasks prerequisite to this task",
"instruction": "what you should do in this task, one short phrase or sentence",
"task_type": "type of this task, should be one of Available Task Types",
}},
...
]
```
"""

CODE_TEMPLATE = """
# Task
you are a code generator, you need to generate a code python block in jupyter notebook to achieve the \
Expand Down Expand Up @@ -597,8 +632,8 @@ def _judge_code(self, task, previous_code_blocks, code,
if 'incorrect' in judge_result.split('\n')[-1]:
success = False
failed_reason = (
'Though the code executes successfully, The code logic is incorrect, here is the reason: '
+ judge_result)
'Though the code executes successfully, The code logic is \
incorrect, here is the reason: ' + judge_result)
return success, failed_reason

else:
Expand Down Expand Up @@ -634,7 +669,7 @@ def _run(self, user_request, save: bool = True, **kwargs):
previous_code_blocks = self._get_previous_code_blocks()
success = False
code_counter = 0
max_try = kwargs.get('max_try', 10)
max_try = kwargs.get('max_try', 1)
while not success and code_counter < max_try:
code_execute_success = False
code_logic_success = False
Expand Down Expand Up @@ -726,9 +761,13 @@ def _run(self, user_request, save: bool = True, **kwargs):
encoding='utf-8') as file:
nbformat.write(self.code_interpreter.nb, file)
else:
self.plan = self._update_plan(
user_request=user_request, curr_plan=self.plan)
self.code_interpreter.reset()
decomposed_tasks = self._decompose_task(task)
if decomposed_tasks:
self.plan.replace_task(task, decomposed_tasks)
else:
self.plan = self._update_plan(
user_request=user_request, curr_plan=self.plan)
self.code_interpreter.reset()
# save the plan into json file
if save:
after_time = time.time()
Expand Down Expand Up @@ -769,3 +808,36 @@ def _get_total_tokens(self):
except Exception as e:
logger.error(f'get total token error: {e}')
pass

def _decompose_task(self, task):
try:
print(f'decompose task {task.task_id}')
messages = [{
'role':
'user',
'content':
DECOMPOSE_TASK_TEMPLATE.format(
context='User Request: ' + task.instruction + '\n',
previous_tasks='\n'.join([
json.dumps({
'task_id': t.task_id,
'dependent_task_ids': t.dependent_task_ids,
'instruction': t.instruction,
'task_type': t.task_type
}) for t in self.plan.tasks
]),
current_task=json.dumps(task.__dict__))
}]
resp = self._call_llm(prompt=None, messages=messages, stop=None)
tasks_text = ''
for r in resp:
tasks_text += r
tasks_text = parse_code(text=tasks_text, lang='json')
logger.info(f'decomposed tasks: {tasks_text}')

tasks = json5.loads(tasks_text)
tasks = [Task(**task) for task in tasks]
return tasks
except Exception as e:
logger.error(f'decompose task error: {e}')
return None
Loading