diff --git a/docs/browser.html b/docs/browser.html index 906d8da..c35172d 100644 --- a/docs/browser.html +++ b/docs/browser.html @@ -36,6 +36,22 @@ {% endraw %} +
Logging
+ +Using OpenAI Backend API
+ +get_conversations
-get_conversation
[source]+
get_conversation
(conversation_id
)
get_conversation
[source]
get_conversation
(conversation_id
)
get_conversation
-handle_conversation_detail
[source]+
handle_conversation_detail
(current_node
,mapping
)
handle_conversation_detail
[source]@@ -123,7 +139,7 @@
handle_conversation_detail
(current_node
,mapping
)
handle_conversation
@@ -148,7 +164,7 @@
start_conversation
-
generate_title
[source]+
generate_title
(conversation_id
)
generate_title
[source]@@ -173,7 +189,7 @@
generate_title
(conversation_id
)
generate_title
-
rename_title
[source]+
rename_title
(conversation_id
,title
)
rename_title
[source]@@ -198,7 +214,7 @@
rename_title
(conversation_id
,title
)
rename_title
-
delete_conversation
[source]+
delete_conversation
(conversation_id
)
delete_conversation
[source]@@ -223,7 +239,7 @@
delete_conversation
(conversation_id
)
delete_conversation
@@ -248,7 +264,7 @@
recover_conversation -
clear_conversations
[source]+
clear_conversations
()
clear_conversations
[source]@@ -302,21 +318,6 @@
clear_conversations
()
clear_conversations
--- {% endraw %} @@ -468,7 +469,7 @@- --- -- - -
clear_conversations
@@ -500,7 +501,7 @@
init
-
login
[source]+
login
()
login
[source]@@ -552,7 +553,7 @@
login
()
login
-
open_chat
[source]+
open_chat
(conversation_id
=''
)
open_chat
[source]@@ -577,7 +578,7 @@
open_chat
(conversation_id
=''
)
open_chat
-
remove_portal
[source]+
remove_portal
()
remove_portal
[source]@@ -680,7 +681,32 @@
remove_portal
()
remove_portal
-
request
[source]+
request
(prompt
:str
)
input_prompt
[source]+ + + + + + + + + + {% endraw %} + + {% raw %} + ++
input_prompt
(prompt
)+ +@@ -921,7 +947,7 @@+@@ -834,7 +860,7 @@+ +@@ -755,7 +781,7 @@+ + + +@@ -730,7 +756,7 @@ @@ -705,7 +731,7 @@
request
-
get_last_response
[source]+
get_last_response
()
get_last_response
[source]
get_last_response
()
get_last_response
-
get_response
[source]+
get_response
()
get_response
[source]
get_response
()
get_response
-
ask
[source]+
ask
(prompt
:str
)
ask
[source]
ask
(prompt
:str
)
ask
-
get_screenshot
[source]+
get_screenshot
()
get_screenshot
[source]
get_screenshot
()
get_screenshot
-
class
attrdict
[source]+
attrdict
() ::dict
class
attrdict
[source]
attrdict
() ::dict
dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's @@ -955,7 +981,7 @@
class
attrdict
-
attributize
[source]+
attributize
(obj
)
attributize
[source]
attributize
(obj
)Add attributes to a dictionary and its sub-dictionaries.
@@ -981,7 +1007,7 @@
attributize
-
retry_on_status_code
[source]+
retry_on_status_code
(func
)
retry_on_status_code
[source]
retry_on_status_code
(func
)Retry decorator that retries a function on specific status codes.
@@ -1007,7 +1033,7 @@
retry_on_status_code -
retry_on_status_code.
[source].wrapper +
retry_on_status_code.
(*.wrapper args
, **kwargs
)
retry_on_status_code.
[source].wrapper @@ -1032,7 +1058,7 @@
retry_on_status_code.
(*.wrapper args
, **kwargs
)
retry_on
@@ -1057,7 +1083,7 @@
new_id
-
delta
[source]+
delta
(prompt
)
delta
[source]@@ -1082,7 +1108,7 @@
delta
(prompt
)
delta
-
chat_delta
[source]+
chat_delta
(prompt
)
chat_delta
[source]@@ -1107,7 +1133,7 @@
chat_delta
(prompt
)
chat_delta
-
mock_create
[source]+
mock_create
(*args
, **kwargs
)
mock_create
[source]@@ -1132,7 +1158,7 @@
mock_create
(*args
, **kwargs
)
mock_create
-
mock_chat_create
[source]+
mock_chat_create
(*args
, **kwargs
)
mock_chat_create
[source]@@ -1287,7 +1313,7 @@
mock_chat_create
(*args
, **kwargs
)
mock_chat_create
-
mock_openai
[source]+
mock_openai
(monkeypatch
)
mock_openai
[source]diff --git a/docs/tools_duckduckgo.html b/docs/tools_duckduckgo.html index 06981f4..58e520f 100644 --- a/docs/tools_duckduckgo.html +++ b/docs/tools_duckduckgo.html @@ -110,12 +110,12 @@
mock_openai
(monkeypatch
)
class
max_results=self.max_results, ) if results is None or len(results) == 0: - return f'搜索「{query}」没有发现好的{DuckDuckGo 搜索结果:{results}}' + return f'No suitable DuckDuckGo search results found for "{query}": {{{results}}}' snippets = '\n'.join([result['body'] for result in results]) return ( - f'用 DuckDuckGo 搜索「{query}」的结果:「\n' + f'Results for the DuckDuckGo search "{query}": {{\n' f'{snippets}\n' - f'」' + f'}}' ) def results(self, query: str, num_results: int) -> List[Dict]: diff --git a/docs/tools_pyknp.html b/docs/tools_pyknp.html index 79d3ca5..b972ac8 100644 --- a/docs/tools_pyknp.html +++ b/docs/tools_pyknp.html @@ -50,7 +50,7 @@ @@ -103,7 +103,48 @@
halfwidth_to_fullwidth<
+ + + + + + + + {% endraw %} + + {% raw %} + ++ ++ {% endraw %} + ++ {% raw %} + ++++++ +pip install beautifulsoup4 +pip install lxml ++ +++ ++ + + +diff --git a/ipymock/_nbdev.py b/ipymock/_nbdev.py index 8a339f7..2755ab1 100644 --- a/ipymock/_nbdev.py +++ b/ipymock/_nbdev.py @@ -9,8 +9,8 @@ "device_pixel_ratio": "2_automation.ipynb", "init": "2_browser.ipynb", "quit": "2_automation.ipynb", - "logger": "2_automation.ipynb", - "handler": "2_automation.ipynb", + "logger": "2_browser.ipynb", + "handler": "2_browser.ipynb", "ok": "2_automation.ipynb", "last": "2_automation.ipynb", "new": "2_automation.ipynb", @@ -88,6 +88,7 @@ "chatgpt_red_500": "2_browser.ipynb", "chatgpt_big_response": "2_browser.ipynb", "chatgpt_small_response": "2_browser.ipynb", + "input_prompt": "2_browser.ipynb", "request": "2_browser.ipynb", "get_last_response": "2_browser.ipynb", "get_response": "2_browser.ipynb", @@ -132,7 +133,8 @@ "DuckDuckGoSearchAPIWrapper": "4_tools_duckduckgo.ipynb", "is_halfwidth": "4_tools_pyknp.ipynb", "halfwidth_to_fullwidth": "4_tools_pyknp.ipynb", - "annotate": "4_tools_pyknp.ipynb"} + "annotate": "4_tools_pyknp.ipynb", + "annotate_html": "4_tools_pyknp.ipynb"} modules = ["__init__.py", "automation.py", diff --git a/ipymock/browser.py b/ipymock/browser.py index e4c9db7..88ba9ff 100644 --- a/ipymock/browser.py +++ b/ipymock/browser.py @@ -2,9 +2,9 @@ __all__ = ['common', 'get_conversations', 'get_conversation', 'handle_conversation_detail', 'start_conversation', 'generate_title', 'rename_title', 'delete_conversation', 'recover_conversation', 'clear_conversations', - 'init', 'login', 'open_chat', 'remove_portal', 'request', 'get_last_response', 'get_response', 'ask', - 'get_screenshot', 'attrdict', 'attributize', 'retry_on_status_code', 'content', 'new_id', 'delta', - 'chat_delta', 'mock_create', 'mock_chat_create', 'mock_openai'] + 'init', 'login', 'open_chat', 'remove_portal', 'input_prompt', 'request', 'get_last_response', + 'get_response', 'ask', 'get_screenshot', 'attrdict', 'attributize', 'retry_on_status_code', 'content', + 'new_id', 'delta', 'chat_delta', 'mock_create', 'mock_chat_create', 'mock_openai'] # Internal Cell from queue import Queue @@ -43,6 +43,19 @@ class Common: common.access_token = common.config.get('access_token', common.access_token) common.conversation_id = common.config.get('conversation_id', common.conversation_id) +# Internal Cell +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +handler = logging.StreamHandler() +handler.setFormatter(logging.Formatter( + fmt = '[%(asctime)s][%(levelname)s]<%(name)s> %(message)s', + datefmt = '%H:%M:%S' +)) +logger.addHandler(handler) + # Cell def get_conversations(): response = requests.get(f'{common.chat_gpt_base_url}/conversations?offset=0&limit=100', headers = {'Authorization': f'Bearer {common.access_token}'}) @@ -289,7 +302,22 @@ def init(chrome_args = set()): # Cell def login(): - new('https://chatgpt.com/auth/login') + if 'github' in common.chat_gpt_base_url: + new('https://github.com/login') + wait(5.0) + input(common.config['email'], 'Username or email address', yoffset = 35) + wait(5.0) + input(common.config['password'], 'Password', yoffset = 35) + wait(5.0) + click('Sign in') + wait(5.0) + click('Use passkey') + wait(stability_duration = 5.0) + common.driver.maximize_window() + wait(1.0) + return + + new(f'{common.chat_gpt_base_url}/auth/login') # WebDriverWait(common.driver, 5).until( # expected_conditions.presence_of_element_located((By.XPATH, '//*[text()="Log in"]')) @@ -390,9 +418,9 @@ def open_chat(conversation_id = ''): from .automation import driver common.driver = driver if conversation_id == '': - common.driver.get('https://chatgpt.com/') + common.driver.get(f'{common.chat_gpt_base_url}/') else: - common.driver.get(f'https://chatgpt.com/c/{conversation_id}') + common.driver.get(f'{common.chat_gpt_base_url}/c/{conversation_id}') if common.conversation_id != conversation_id: common.conversation_id = conversation_id common.parent_message_id = '' @@ -402,6 +430,8 @@ def open_chat(conversation_id = ''): # ) wait(5.0) + if 'copilot' in common.chat_gpt_base_url: + return remove_portal() def remove_portal(): @@ -446,7 +476,36 @@ def remove_portal(): # from ipymock.automation import exists, touch # Cell +def input_prompt(prompt): + # textbox.send_keys(prompt.strip()) + # common.driver.execute_script(''' + # var element = arguments[0], txt = arguments[1]; + # element.value += txt; + # element.dispatchEvent(new Event("change")); + # ''', + # textbox, + # prompt.strip(), + # ) + for line in prompt.strip().split('\n'): + fill(line) + ActionChains(common.driver).key_down(Keys.SHIFT).send_keys(Keys.ENTER).key_up(Keys.SHIFT).perform() + + # WebDriverWait(common.driver, 3).until_not( + # expected_conditions.presence_of_element_located(chatgpt_disabled_button) + # ) + wait(stability_duration = 3.0) + + # textbox.send_keys('\n') + # textbox.send_keys(Keys.ENTER) + def request(prompt: str) -> None: + if 'copilot' in common.chat_gpt_base_url: + click('Ask Copilot') + input_prompt(prompt) + fill(Keys.ENTER) + wait(1.0) + return + # try: # textbox = WebDriverWait(common.driver, 5).until( # expected_conditions.element_to_be_clickable(chatgpt_textbox) @@ -472,26 +531,7 @@ def request(prompt: str) -> None: # touch(textbox) click('Message ChatGPT') - # textbox.send_keys(prompt.strip()) - # common.driver.execute_script(''' - # var element = arguments[0], txt = arguments[1]; - # element.value += txt; - # element.dispatchEvent(new Event("change")); - # ''', - # textbox, - # prompt.strip(), - # ) - for line in prompt.strip().split('\n'): - fill(line) - ActionChains(common.driver).key_down(Keys.SHIFT).send_keys(Keys.ENTER).key_up(Keys.SHIFT).perform() - - # WebDriverWait(common.driver, 3).until_not( - # expected_conditions.presence_of_element_located(chatgpt_disabled_button) - # ) - wait(stability_duration = 3.0) - - # textbox.send_keys('\n') - # textbox.send_keys(Keys.ENTER) + input_prompt(prompt) # click('ChatGPT can make mistakes. Check important info.') # send_button = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/send-button.png')) @@ -509,15 +549,59 @@ def request(prompt: str) -> None: # pass def get_last_response(): + if 'copilot' in common.chat_gpt_base_url: + chatgpt_response = (By.XPATH, '//div[contains(@class, "markdown-body")]') + chatgpt_big_response = (By.XPATH, '//div[starts-with(@class, "js-snippet-clipboard-copy-unpositioned")]//div[p or pre]') + chatgpt_small_response = (By.XPATH, './/code[div]') for xpath in chatgpt_response, chatgpt_big_response: - responses = common.driver.find_elements(*xpath) + while True: + responses = common.driver.find_elements(*xpath) + elements = [] + if responses != []: + try: + elements = responses[-1].find_elements(*chatgpt_small_response) + except StaleElementReferenceException: + continue + break + if len(elements) == 1: + return elements[0] if responses != []: - elements = responses[-1].find_elements(*chatgpt_small_response) - if len(elements) == 1: - return elements[0] return responses[-1] def get_response() -> Generator[str, None, None]: + if 'copilot' in common.chat_gpt_base_url: + from .automation import get_html_hash + # Get the initial hash value + previous_hash, previous_time = get_html_hash() + response = get_last_response() + + # Wait until the HTML does not change + start_time = time.time() + while True: + time.sleep(0.1) + + # Get the current hash value + current_hash, current_time = get_html_hash() + response = get_last_response() + + # Check if the hash value has stabilized + if current_hash == previous_hash: + if current_time - previous_time >= 5.0: + logger.info('HTML content has stabilized.') + break + else: + # Update hash and time if the content changes + previous_hash, previous_time = current_hash, current_time + yield markdownize(response.get_attribute('innerHTML')) + + # Check for timeout + if current_time - start_time >= float('inf'): + logger.info('Wait for HTML stabilization timed out.') + break + response = get_last_response() + yield markdownize(response.get_attribute('innerHTML')) + return + try: result_streaming = WebDriverWait(common.driver, 5).until( expected_conditions.presence_of_element_located(chatgpt_streaming) diff --git a/ipymock/nlp.py b/ipymock/nlp.py index ac9aa19..29a9b0d 100644 --- a/ipymock/nlp.py +++ b/ipymock/nlp.py @@ -1,6 +1,6 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/4_tools_pyknp.ipynb (unless otherwise specified). -__all__ = ['is_halfwidth', 'halfwidth_to_fullwidth', 'annotate'] +__all__ = ['is_halfwidth', 'halfwidth_to_fullwidth', 'annotate', 'annotate_html'] # Internal Cell from pyknp import Juman @@ -45,4 +45,40 @@ def annotate(text): yield mrph.midasi continue yield f'{mrph.midasi}' - yield '\n' \ No newline at end of file + yield '\n' + +# Internal Cell +import bs4 +from .nlp import annotate +from IPython.display import display, HTML + +# Cell +def annotate_html(content, interactive = False): + # Parse the HTML content with BeautifulSoup + soup = bs4.BeautifulSoup(content, 'lxml-xml') + + # Iterate through the div elements and process only the leaf nodes + for div in soup.find_all('div'): # Find all 'div' elements in the soup + # Check if the div is a leaf node (contains only text) + if div.find_all(True): # Has child tags, so it's a leaf node + continue + try: + answser = '' + line = '' + for word in annotate(div.get_text()): + answser += word + if word == '\n': + if interactive: + display(HTML(line)) + line = '' + else: + line += word + # Replace the content of the div with parsed HTML + new_content = bs4.BeautifulSoup(answser, 'html.parser') + div.clear() # Clear the original content + div.append(new_content) # Append the new parsed content + except Exception as e: + print(f'Error processing content: {e}') + continue + + return str(soup) # Convert the soup back to string \ No newline at end of file diff --git a/ipymock/reader.py b/ipymock/reader.py index 919118c..5a55df1 100644 --- a/ipymock/reader.py +++ b/ipymock/reader.py @@ -50,12 +50,12 @@ def run(self, query: str) -> str: max_results=self.max_results, ) if results is None or len(results) == 0: - return f'搜索「{query}」没有发现好的{DuckDuckGo 搜索结果:{results}}' + return f'No suitable DuckDuckGo search results found for "{query}": {{{results}}}' snippets = '\n'.join([result['body'] for result in results]) return ( - f'用 DuckDuckGo 搜索「{query}」的结果:「\n' + f'Results for the DuckDuckGo search "{query}": {{\n' f'{snippets}\n' - f'」' + f'}}' ) def results(self, query: str, num_results: int) -> List[Dict]: diff --git a/nbs/2_browser.ipynb b/nbs/2_browser.ipynb index 6f14504..c16dedd 100644 --- a/nbs/2_browser.ipynb +++ b/nbs/2_browser.ipynb @@ -81,6 +81,45 @@ " common.conversation_id = common.config.get('conversation_id', common.conversation_id)" ] }, + { + "cell_type": "markdown", + "id": "5ee9010c", + "metadata": {}, + "source": [ + "---\n", + "Logging" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0e5bffd", + "metadata": {}, + "outputs": [], + "source": [ + "# exporti\n", + "import logging\n", + "\n", + "logger = logging.getLogger(__name__)\n", + "logger.setLevel(logging.DEBUG)\n", + "\n", + "handler = logging.StreamHandler()\n", + "handler.setFormatter(logging.Formatter(\n", + " fmt = '[%(asctime)s][%(levelname)s]<%(name)s> %(message)s',\n", + " datefmt = '%H:%M:%S'\n", + "))\n", + "logger.addHandler(handler)" + ] + }, + { + "cell_type": "markdown", + "id": "e1622909", + "metadata": {}, + "source": [ + "---\n", + "Using OpenAI Backend API" + ] + }, { "cell_type": "code", "execution_count": null, @@ -533,7 +572,22 @@ "source": [ "# export\n", "def login():\n", - " new('https://chatgpt.com/auth/login')\n", + " if 'github' in common.chat_gpt_base_url:\n", + " new('https://github.com/login')\n", + " wait(5.0)\n", + " input(common.config['email'], 'Username or email address', yoffset = 35)\n", + " wait(5.0)\n", + " input(common.config['password'], 'Password', yoffset = 35)\n", + " wait(5.0)\n", + " click('Sign in')\n", + " wait(5.0)\n", + " click('Use passkey')\n", + " wait(stability_duration = 5.0)\n", + " common.driver.maximize_window()\n", + " wait(1.0)\n", + " return\n", + "\n", + " new(f'{common.chat_gpt_base_url}/auth/login')\n", "\n", " # WebDriverWait(common.driver, 5).until(\n", " # expected_conditions.presence_of_element_located((By.XPATH, '//*[text()=\"Log in\"]'))\n", @@ -656,9 +710,9 @@ " from ipymock.automation import driver\n", " common.driver = driver\n", " if conversation_id == '':\n", - " common.driver.get('https://chatgpt.com/')\n", + " common.driver.get(f'{common.chat_gpt_base_url}/')\n", " else:\n", - " common.driver.get(f'https://chatgpt.com/c/{conversation_id}')\n", + " common.driver.get(f'{common.chat_gpt_base_url}/c/{conversation_id}')\n", " if common.conversation_id != conversation_id:\n", " common.conversation_id = conversation_id\n", " common.parent_message_id = ''\n", @@ -668,6 +722,8 @@ " # )\n", " wait(5.0)\n", "\n", + " if 'copilot' in common.chat_gpt_base_url:\n", + " return\n", " remove_portal()\n", "\n", "def remove_portal():\n", @@ -791,7 +847,36 @@ "outputs": [], "source": [ "# export\n", + "def input_prompt(prompt):\n", + " # textbox.send_keys(prompt.strip())\n", + " # common.driver.execute_script('''\n", + " # var element = arguments[0], txt = arguments[1];\n", + " # element.value += txt;\n", + " # element.dispatchEvent(new Event(\"change\"));\n", + " # ''',\n", + " # textbox,\n", + " # prompt.strip(),\n", + " # )\n", + " for line in prompt.strip().split('\\n'):\n", + " fill(line)\n", + " ActionChains(common.driver).key_down(Keys.SHIFT).send_keys(Keys.ENTER).key_up(Keys.SHIFT).perform()\n", + "\n", + " # WebDriverWait(common.driver, 3).until_not(\n", + " # expected_conditions.presence_of_element_located(chatgpt_disabled_button)\n", + " # )\n", + " wait(stability_duration = 3.0)\n", + "\n", + " # textbox.send_keys('\\n')\n", + " # textbox.send_keys(Keys.ENTER)\n", + "\n", "def request(prompt: str) -> None:\n", + " if 'copilot' in common.chat_gpt_base_url:\n", + " click('Ask Copilot')\n", + " input_prompt(prompt)\n", + " fill(Keys.ENTER)\n", + " wait(1.0)\n", + " return\n", + "\n", " # try:\n", " # textbox = WebDriverWait(common.driver, 5).until(\n", " # expected_conditions.element_to_be_clickable(chatgpt_textbox)\n", @@ -817,26 +902,7 @@ " # touch(textbox)\n", " click('Message ChatGPT')\n", "\n", - " # textbox.send_keys(prompt.strip())\n", - " # common.driver.execute_script('''\n", - " # var element = arguments[0], txt = arguments[1];\n", - " # element.value += txt;\n", - " # element.dispatchEvent(new Event(\"change\"));\n", - " # ''',\n", - " # textbox,\n", - " # prompt.strip(),\n", - " # )\n", - " for line in prompt.strip().split('\\n'):\n", - " fill(line)\n", - " ActionChains(common.driver).key_down(Keys.SHIFT).send_keys(Keys.ENTER).key_up(Keys.SHIFT).perform()\n", - "\n", - " # WebDriverWait(common.driver, 3).until_not(\n", - " # expected_conditions.presence_of_element_located(chatgpt_disabled_button)\n", - " # )\n", - " wait(stability_duration = 3.0)\n", - "\n", - " # textbox.send_keys('\\n')\n", - " # textbox.send_keys(Keys.ENTER)\n", + " input_prompt(prompt)\n", "\n", " # click('ChatGPT can make mistakes. Check important info.')\n", " # send_button = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/send-button.png'))\n", @@ -854,15 +920,59 @@ " # pass\n", "\n", "def get_last_response():\n", + " if 'copilot' in common.chat_gpt_base_url:\n", + " chatgpt_response = (By.XPATH, '//div[contains(@class, \"markdown-body\")]')\n", + " chatgpt_big_response = (By.XPATH, '//div[starts-with(@class, \"js-snippet-clipboard-copy-unpositioned\")]//div[p or pre]')\n", + " chatgpt_small_response = (By.XPATH, './/code[div]')\n", " for xpath in chatgpt_response, chatgpt_big_response:\n", - " responses = common.driver.find_elements(*xpath)\n", + " while True:\n", + " responses = common.driver.find_elements(*xpath)\n", + " elements = []\n", + " if responses != []:\n", + " try:\n", + " elements = responses[-1].find_elements(*chatgpt_small_response)\n", + " except StaleElementReferenceException:\n", + " continue\n", + " break\n", + " if len(elements) == 1:\n", + " return elements[0]\n", " if responses != []:\n", - " elements = responses[-1].find_elements(*chatgpt_small_response)\n", - " if len(elements) == 1:\n", - " return elements[0]\n", " return responses[-1]\n", "\n", "def get_response() -> Generator[str, None, None]:\n", + " if 'copilot' in common.chat_gpt_base_url:\n", + " from ipymock.automation import get_html_hash\n", + " # Get the initial hash value\n", + " previous_hash, previous_time = get_html_hash()\n", + " response = get_last_response()\n", + "\n", + " # Wait until the HTML does not change\n", + " start_time = time.time()\n", + " while True:\n", + " time.sleep(0.1)\n", + "\n", + " # Get the current hash value\n", + " current_hash, current_time = get_html_hash()\n", + " response = get_last_response()\n", + "\n", + " # Check if the hash value has stabilized\n", + " if current_hash == previous_hash:\n", + " if current_time - previous_time >= 5.0:\n", + " logger.info('HTML content has stabilized.')\n", + " break\n", + " else:\n", + " # Update hash and time if the content changes\n", + " previous_hash, previous_time = current_hash, current_time\n", + " yield markdownize(response.get_attribute('innerHTML'))\n", + "\n", + " # Check for timeout\n", + " if current_time - start_time >= float('inf'):\n", + " logger.info('Wait for HTML stabilization timed out.')\n", + " break\n", + " response = get_last_response()\n", + " yield markdownize(response.get_attribute('innerHTML'))\n", + " return\n", + "\n", " try:\n", " result_streaming = WebDriverWait(common.driver, 5).until(\n", " expected_conditions.presence_of_element_located(chatgpt_streaming)\n", @@ -1278,7 +1388,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" } diff --git a/nbs/4_tools_duckduckgo.ipynb b/nbs/4_tools_duckduckgo.ipynb index 6345e28..0d57bde 100644 --- a/nbs/4_tools_duckduckgo.ipynb +++ b/nbs/4_tools_duckduckgo.ipynb @@ -75,12 +75,12 @@ " max_results=self.max_results,\n", " )\n", " if results is None or len(results) == 0:\n", - " return f'搜索「{query}」没有发现好的{DuckDuckGo 搜索结果:{results}}'\n", + " return f'No suitable DuckDuckGo search results found for \"{query}\": {{{results}}}'\n", " snippets = '\\n'.join([result['body'] for result in results])\n", " return (\n", - " f'用 DuckDuckGo 搜索「{query}」的结果:「\\n'\n", + " f'Results for the DuckDuckGo search \"{query}\": {{\\n'\n", " f'{snippets}\\n'\n", - " f'」'\n", + " f'}}'\n", " )\n", "\n", " def results(self, query: str, num_results: int) -> List[Dict]:\n", diff --git a/nbs/4_tools_pyknp.ipynb b/nbs/4_tools_pyknp.ipynb index fe9e7f8..ff19897 100644 --- a/nbs/4_tools_pyknp.ipynb +++ b/nbs/4_tools_pyknp.ipynb @@ -87,6 +87,66 @@ " yield f'{mrph.midasi}'\n", " yield '\\n'" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```bash\n", + "pip install beautifulsoup4\n", + "pip install lxml\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# exporti\n", + "import bs4\n", + "from ipymock.nlp import annotate\n", + "from IPython.display import display, HTML" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# export\n", + "def annotate_html(content, interactive = False):\n", + " # Parse the HTML content with BeautifulSoup\n", + " soup = bs4.BeautifulSoup(content, 'lxml-xml')\n", + "\n", + " # Iterate through the div elements and process only the leaf nodes\n", + " for div in soup.find_all('div'): # Find all 'div' elements in the soup\n", + " # Check if the div is a leaf node (contains only text)\n", + " if div.find_all(True): # Has child tags, so it's a leaf node\n", + " continue\n", + " try:\n", + " answser = ''\n", + " line = ''\n", + " for word in annotate(div.get_text()):\n", + " answser += word\n", + " if word == '\\n':\n", + " if interactive:\n", + " display(HTML(line))\n", + " line = ''\n", + " else:\n", + " line += word\n", + " # Replace the content of the div with parsed HTML\n", + " new_content = bs4.BeautifulSoup(answser, 'html.parser')\n", + " div.clear() # Clear the original content\n", + " div.append(new_content) # Append the new parsed content\n", + " except Exception as e:\n", + " print(f'Error processing content: {e}')\n", + " continue\n", + "\n", + " return str(soup) # Convert the soup back to string" + ] } ], "metadata": { diff --git a/settings.ini b/settings.ini index 8b58a24..251cafe 100644 --- a/settings.ini +++ b/settings.ini @@ -13,7 +13,7 @@ author = andrew author_email = andrew.saintway@gmail.com copyright = 2023 onwards, Neuro Spirit, DAO. branch = main -version = 1.1.2 +version = 1.2.0 min_python = 3.6 audience = Developers language = English