diff --git a/examples/usage_example.ipynb b/examples/usage_example.ipynb new file mode 100644 index 0000000..3c098cd --- /dev/null +++ b/examples/usage_example.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example Call through Python SDK" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "\n", + "from dotenv import load_dotenv\n", + "\n", + "sys.path.append(\".\")\n", + "sys.path.append(\"..\")\n", + "sys.path.append(\"../..\")\n", + "load_dotenv()\n", + "\n", + "from any_parser import AnyParser \n", + "\n", + "example_apikey = os.getenv(\"CAMBIO_API_KEY\")\n", + "\n", + "example_local_file = \"./sample_data/test2.pdf\"\n", + "\n", + "op = AnyParser(example_apikey)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "file/document extraction test:\n", + "\n", + "['# Productivity and Business Processes\\n\\n## Overview\\n\\n| Investor Metrics | FY23 Q1 | FY23 Q2 | FY23 Q3 | FY23 Q4 | FY24 Q1 |\\n|:-------------------------------------------------------------------|:----------|:----------|:----------|:----------|:----------|\\n| Office Commercial products and cloud services revenue growth (y/y) | 7% / 13% | 7% 14% | 13% / 17% | 12% / 14% | 15% / 14% |\\n| Office Consumer products and cloud services revenue growth (y/y) | 7% 11% | (2)% 3% | 1% 4% | 3% 6% | 3% 4% |\\n| Office 365 Commercial seat growth (y/y) | 14% | 12% | 11% | 11% | 10% |\\n| Microsoft 365 Consumer subscribers (in millions) | 65.1 | 67.7 | 70.8 | 74.9 | 76.7 |\\n| Dynamics products and cloud services revenue growth (y/y) | 15% / 22% | 13% 20% | 17% / 21% | 19% / 21% | 22% / 21% |\\n| LinkedIn revenue growth (y/y) | 17% / 21% | 10% / 14% | 8% 11% | 6% 8% | 8% |\\n\\nGrowth rates include non-GAAP CC growth (GAAP %/CC%)\\n\\n## Press release\\n\\n## Business Highlights\\n\\nRevenue in Productivity and Business Processes was $17.0 billion and increased 7% (up 13% in constant currency), with the following business highlights:\\n\\nOffice Commercial products and cloud services revenue increased 7% (up 14% in constant currency) driven by Office 365 Commercial revenue growth of 11% (up 18% in constant currency)\\nOffice Consumer products and cloud services revenue decreased 2% (up 3% in constant currency) and Microsoft 365 Consumer subscribers grew to 63.2 million\\nLinkedIn revenue increased 10% (up 14% in constant currency)\\nDynamics products and cloud services revenue increased 13% (up 20% in constant currency) driven by Dynamics 365 revenue growth of 21% (up 29% in constant currency)\\n\\nServer products and cloud services revenue increased 20% (up 26% in constant currency) driven by Azure and other cloud services revenue growth of 31% (up 38% in constant currency)\\n\\nRevenue in More Personal Computing was $14.2 billion and decreased 19% (down 16% in constant currency), with the following business highlights:\\n\\nWindows OEM revenue decreased 39%\\nWindows Commercial products and cloud services revenue decreased 3% (up 3% in constant currency)\\nXbox content and services revenue decreased 12% (down 8% in constant currency)\\nSearch and news advertising revenue excluding traffic acquisition costs increased 10% (up 15% in constant currency)\\nDevices revenue decreased 39% (down 34% in constant currency)\\n\\n## Financial statement-MD&A\\n\\nHighlights from the second quarter of fiscal year 2024 compared with the second quarter of fiscal year 2023 included:\\n\\nMicrosoft Cloud revenue increased 24% to $33.7 billion\\nOffice Commercial products and cloud services revenue increased 15% driven by Office 365 Commercial growth of 17%\\nOffice Consumer products and cloud services revenue increased 5% and Microsoft 365 Consumer subscribers grew to 78.4 million\\nLinkedIn revenue increased 9%\\nDynamics products and cloud services revenue increased 21% driven by Dynamics 365 growth of 27%\\nServer products and cloud services revenue increased 22% driven by Azure and other cloud services growth of 30%\\nWindows revenue increased 9% with Windows original equipment manufacturer licensing (\"Windows OEM\") revenue growth of 11% and Windows Commercial products and cloud services revenue growth of 9%\\nDevices revenue decreased 9%']\n" + ] + } + ], + "source": [ + "print(\"file/document extraction test:\")\n", + "content_result = op.extract(example_local_file)\n", + "print(type(content_result))\n", + "print(content_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "information extraction test:\n", + "\n", + "['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
Investor Metrics FY23 Q1 FY23 Q2 FY23 Q3 FY23 Q4 FY24 Q1
Office Commercial products and cloud services revenue growth (y/y)7% / 13% 7% 14% 13% / 17%12% / 14%15% / 14%
Office Consumer products and cloud services revenue growth (y/y) 7% 11% (2)% 3% 1% 4% 3% 6% 3% 4%
Office 365 Commercial seat growth (y/y) 14% 12% 11% 11% 10%
Microsoft 365 Consumer subscribers (in millions) 65.1 67.7 70.8 74.9 76.7
Dynamics products and cloud services revenue growth (y/y) 15% / 22%13% 20% 17% / 21%19% / 21%22% / 21%
LinkedIn revenue growth (y/y) 17% / 21%10% / 14%8% 11% 6% 8% 8%
']\n" + ] + } + ], + "source": [ + "print(\"information extraction test:\")\n", + "example_prompt = \"Return table under Investor Metrics in JSON format with year as the key and the column as subkeys.\"\n", + "qa_result = op.parse(example_local_file, example_prompt, mode=\"basic\")\n", + "print(type(qa_result))\n", + "print(qa_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "instruction extraction test:\n", + "\n", + "[[{'Office Commercial products and cloud services revenue growth (y/y)': '7% / 13%, 7% 14%, 13% / 17%, 12% / 14%, 15% / 14%', 'Office Consumer products and cloud services revenue growth (y/y)': '7% 11%, (2)% 3%, 1% 4%, 3% 6%, 3% 4%', 'Office 365 Commercial seat growth (y/y)': '14%, 12%, 11%, 11%, 10%', 'Microsoft 365 Consumer subscribers (in millions)': '65.1, 67.7, 70.8, 74.9, 76.7', 'Dynamics products and cloud services revenue growth (y/y)': '15% / 22%, 13% 20%, 17% / 21%, 19% / 21%, 22% / 21%', 'LinkedIn revenue growth (y/y)': '17% / 21%, 10% / 14%, 8% 11%, 6% 8%, 8%', 'Microsoft Cloud revenue increased': '24% to $33.7 billion', 'Office Commercial products and cloud services revenue increased': '15% driven by Office 365 Commercial growth of 17%', 'Office Consumer products and cloud services revenue increased': '5% and Microsoft 365 Consumer subscribers grew to 78.4 million', 'LinkedIn revenue increased': '9%', 'Dynamics products and cloud services revenue increased': '21% driven by Dynamics 365 growth of 27%', 'Server products and cloud services revenue increased': '22% driven by Azure and other cloud services growth of 30%', 'Windows revenue increased': '9% with Windows original equipment manufacturer licensing (\"Windows OEM\") revenue growth of 11% and Windows Commercial products and cloud services revenue growth of 9%', 'Devices revenue decreased': '9%'}]]\n" + ] + } + ], + "source": [ + "print(\"instruction extraction test:\")\n", + "example_instruction = \"Return the table under Investor Metrics in JSON format with year as the key and the column as subkeys.\"\n", + "instruction_result = op.instruct(example_local_file, example_instruction, mode=\"advanced\")\n", + "print(type(instruction_result))\n", + "print(instruction_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example Call through Bash Script" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "file/document extraction test:\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " \"# Productivity and Business Processes\\n\\n## Overview\\n\\n| Investor Metrics | FY23 Q1 | FY23 Q2 | FY23 Q3 | FY23 Q4 | FY24 Q1 |\\n|:--- |:--- |:--- |:--- |:--- |:--- |\\n| Office Commercial products and cloud services revenue growth (y/y) | 7% / 13% | 7% 14% | 13% / 17% | 12% / 14% | 15% / 14% |\\n| Office Consumer products and cloud services revenue growth (y/y) | 7% 11% | (2)% 3% | 1% 4% | 3% 6% | 3% 4% |\\n| Office 365 Commercial seat growth (y/y) | 14% | 12% | 11% | 11% | 10% |\\n| Microsoft 365 Consumer subscribers (in millions) | 65.1 | 67.7 | 70.8 | 74.9 | 76.7 |\\n| Dynamics products and cloud services revenue growth (y/y) | 15% / 22% | 13% 20% | 17% / 21% | 19% / 21% | 22% / 21% |\\n| LinkedIn revenue growth (y/y) | 17% / 21% | 10% / 14% | 8% 11% | 6% 8% | 8% |\\n\\nGrowth rates include non-GAAP CC growth (GAAP %/CC%)\\n\\n## Press release\\n\\n## Business Highlights\\n\\nRevenue in Productivity and Business Processes was $17.0 billion and increased 7% (up 13% in constant currency), with the following business highlights:\\n\\n- Office Commercial products and cloud services revenue increased 7% (up 14% in constant currency) driven by Office 365 Commercial revenue growth of 11% (up 18% in constant currency)\\n- Office Consumer products and cloud services revenue decreased 2% (up 3% in constant currency) and Microsoft 365 Consumer subscribers grew to 63.2 million\\n- LinkedIn revenue increased 10% (up 14% in constant currency)\\n- Dynamics products and cloud services revenue increased 13% (up 20% in constant currency) driven by Dynamics 365 revenue growth of 21% (up 29% in constant currency)\\n\\nRevenue in Intelligent Cloud was $21.5 billion and increased 18% (up 24% in constant currency), with the following business highlights:\\n\\n- Server products and cloud services revenue increased 20% (up 26% in constant currency) driven by Azure and other cloud services revenue growth of 31% (up 38% in constant currency)\\n\\nRevenue in More Personal Computing was $14.2 billion and decreased 19% (down 16% in constant currency), with the following business highlights:\\n\\n- Windows OEM revenue decreased 39%\\n- Windows Commercial products and cloud services revenue decreased 3% (up 3% in constant currency) \\n- Xbox content and services revenue decreased 12% (down 8% in constant currency)\\n- Search and news advertising revenue excluding traffic acquisition costs increased 10% (up 15% in constant currency)\\n- Devices revenue decreased 39% (down 34% in constant currency)\\n\\n## Financial statement-MD&A\\n\\nHighlights from the second quarter of fiscal year 2024 compared with the second quarter of fiscal year 2023 included:\\n\\n- Microsoft Cloud revenue increased 24% to $33.7 billion\\n- Office Commercial products and cloud services revenue increased 15% driven by Office 365 Commercial growth of 17%\\n- Office Consumer products and cloud services revenue increased 5% and Microsoft 365 Consumer subscribers grew to 78.4 million\\n- LinkedIn revenue increased 9%\\n- Dynamics products and cloud services revenue increased 21% driven by Dynamics 365 growth of 27%\\n- Server products and cloud services revenue increased 22% driven by Azure and other cloud services growth of 30%\\n- Windows revenue increased 9% with Windows original equipment manufacturer licensing (\\\"Windows OEM\\\") revenue growth of 11% and Windows Commercial products and cloud services revenue growth of 9%\\n- Devices revenue decreased 9%\"\n", + "]\n" + ] + } + ], + "source": [ + "%%bash\n", + "cd .. \n", + "\n", + "source .env\n", + "APIKEY=$CAMBIO_API_KEY\n", + "FILE=./examples/sample_data/test2.pdf\n", + "\n", + "echo \"file/document extraction test:\"\n", + "bash extract_parse.sh $APIKEY extract $FILE basic" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "information extraction test:\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " \"\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
Investor Metrics FY23 Q1 FY23 Q2 FY23 Q3 FY23 Q4 FY24 Q1
Office Commercial products and cloud services revenue growth (y/y)7% / 13% 7% 14% 13% / 17%12% / 14%15% / 14%
Office Consumer products and cloud services revenue growth (y/y) 7% 11% (2)% 3% 1% 4% 3% 6% 3% 4%
Office 365 Commercial seat growth (y/y) 14% 12% 11% 11% 10%
Microsoft 365 Consumer subscribers (in millions) 65.1 67.7 70.8 74.9 76.7
Dynamics products and cloud services revenue growth (y/y) 15% / 22%13% 20% 17% / 21%19% / 21%22% / 21%
LinkedIn revenue growth (y/y) 17% / 21%10% / 14%8% 11% 6% 8% 8%
\"\n", + "]\n" + ] + } + ], + "source": [ + "%%bash\n", + "cd .. \n", + "\n", + "source .env\n", + "APIKEY=$CAMBIO_API_KEY\n", + "FILE=./examples/sample_data/test2.pdf\n", + "\n", + "echo \"information extraction test:\"\n", + "bash extract_parse.sh $APIKEY parse $FILE \"Return table under Investor Metrics in JSON format with year as the key and the column as subkeys.\" basic" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "instruction extraction test:\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " [\n", + " {\n", + " \"Office Commercial products and cloud services revenue growth (y/y)\": \"7% / 13%\",\n", + " \"Office Consumer products and cloud services revenue growth (y/y)\": \"7% 11%\",\n", + " \"Office 365 Commercial seat growth (y/y)\": \"14%\",\n", + " \"Microsoft 365 Consumer subscribers (in millions)\": \"65.1\",\n", + " \"Dynamics products and cloud services revenue growth (y/y)\": \"15% / 22%\",\n", + " \"LinkedIn revenue growth (y/y)\": \"17% / 21%\",\n", + " \"Business Highlights\": \"Revenue in Productivity and Business Processes was $17.0 billion and increased 7% (up 13% in constant currency), with the following business highlights:\\nOffice Commercial products and cloud services revenue increased 7% (up 14% in constant currency) driven by Office 365 Commercial revenue growth of 11% (up 18% in constant currency)\\nOffice Consumer products and cloud services revenue decreased 2% (up 3% in constant currency) and Microsoft 365 Consumer subscribers grew to 63.2 million\\nLinkedIn revenue increased 10% (up 14% in constant currency)\\nDynamics products and cloud services revenue increased 13% (up 20% in constant currency) driven by Dynamics 365 revenue growth of 21% (up 29% in constant currency)\",\n", + " \"Financial statement-MD&A\": \"Microsoft Cloud revenue increased 24% to $33.7 billion\\nOffice Commercial products and cloud services revenue increased 15% driven by Office 365 Commercial growth of 17%\\nOffice Consumer products and cloud services revenue increased 5% and Microsoft 365 Consumer subscribers grew to 78.4 million\\nLinkedIn revenue increased 9%\\nDynamics products and cloud services revenue increased 21% driven by Dynamics 365 growth of 27%\\nServer products and cloud services revenue increased 22% driven by Azure and other cloud services growth of 30%\\nWindows revenue increased 9% with Windows original equipment manufacturer licensing (\\\"Windows OEM\\\") revenue growth of 11% and Windows Commercial products and cloud services revenue growth of 9%\\nDevices revenue decreased 9%\"\n", + " }\n", + " ]\n", + "]\n" + ] + } + ], + "source": [ + "%%bash\n", + "cd .. \n", + "\n", + "source .env\n", + "APIKEY=$CAMBIO_API_KEY\n", + "FILE=./examples/sample_data/test2.pdf\n", + "\n", + "echo \"instruction extraction test:\"\n", + "bash extract_parse.sh $APIKEY instruct $FILE \"Return table under Investor Metrics in JSON format with year as the key and the column as subkeys.\" advanced" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Done" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "saas", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}