diff --git a/.pnp.cjs b/.pnp.cjs index 83b130f0..e1ff98ba 100755 --- a/.pnp.cjs +++ b/.pnp.cjs @@ -49,6 +49,10 @@ const RAW_RUNTIME_STATE = "name": "@acusti/matchmaking",\ "reference": "workspace:packages/matchmaking"\ },\ + {\ + "name": "@acusti/parsing",\ + "reference": "workspace:packages/parsing"\ + },\ {\ "name": "@acusti/post",\ "reference": "workspace:packages/post"\ @@ -93,6 +97,7 @@ const RAW_RUNTIME_STATE = ["@acusti/dropdown", ["workspace:packages/dropdown"]],\ ["@acusti/input-text", ["virtual:afdae6bed2220ffeb070ee40379649c9baf1f40f92a18dda206ea109ee08e197c8d4ded52ffaeedb5def9932505c08fa82afb8ccafbb0155d514fb4046b48c2a#workspace:packages/input-text", "workspace:packages/input-text"]],\ ["@acusti/matchmaking", ["workspace:packages/matchmaking"]],\ + ["@acusti/parsing", ["workspace:packages/parsing"]],\ ["@acusti/post", ["workspace:packages/post"]],\ ["@acusti/styling", ["virtual:42ba1efe27540225bc0e39545c970733c8d53f6ec1dba8054209f9065065055aa19797297531b5570ac14b89957f918e9364f5540989a288f82948c6bc571b07#workspace:packages/styling", "workspace:packages/styling"]],\ ["@acusti/textual", ["workspace:packages/textual"]],\ @@ -300,6 +305,17 @@ const RAW_RUNTIME_STATE = "linkType": "SOFT"\ }]\ ]],\ + ["@acusti/parsing", [\ + ["workspace:packages/parsing", {\ + "packageLocation": "./packages/parsing/",\ + "packageDependencies": [\ + ["@acusti/parsing", "workspace:packages/parsing"],\ + ["typescript", "patch:typescript@npm%3A5.3.3#optional!builtin::version=5.3.3&hash=e012d7"],\ + ["vitest", "virtual:ae52462715931ea007dc50efba07d133bbd2707991d966b03dd50bc7d26895a233be9457b0dfcacf62c9e7732b6ee7fddb7ea173aaf1a49dde4d8dadb45358ba#npm:1.1.0"]\ + ],\ + "linkType": "SOFT"\ + }]\ + ]],\ ["@acusti/post", [\ ["workspace:packages/post", {\ "packageLocation": "./packages/post/",\ diff --git a/packages/parsing/README.md b/packages/parsing/README.md new file mode 100644 index 00000000..cbd9d7c3 --- /dev/null +++ b/packages/parsing/README.md @@ -0,0 +1,41 @@ +# @acusti/parsing + +[![latest version](https://img.shields.io/npm/v/@acusti/parsing?style=for-the-badge)](https://www.npmjs.com/package/@acusti/parsing) +[![maintenance status](https://img.shields.io/npms-io/maintenance-score/@acusti/parsing?style=for-the-badge)](https://npms.io/search?q=%40acusti%2Funiquify) +[![bundle size](https://img.shields.io/bundlephobia/minzip/@acusti/parsing?style=for-the-badge)](https://bundlephobia.com/package/@acusti/parsing) +[![downloads per month](https://img.shields.io/npm/dm/@acusti/parsing?style=for-the-badge)](https://www.npmjs.com/package/@acusti/parsing) + +`@acusti/parsing` exports `asJSON`, a function that takes a string and +attempts to parse it as JSON, returning the resulting JS value, or `null` +if the string defeated all attempts at parsing it. This is especially +useful for generative AI when you prompt an LLM to generate a response in +JSON, because most models are unable to consistently generate valid JSON, +and even when they do, will often have a pre- or post-amble as a part of +the response. + +## Usage + +``` +npm install @acusti/parsing +# or +yarn add @acusti/parsing +``` + +Import `asJSON` (it’s a named export) and pass a string to it: + +````js +import { asJSON } from '@acusti/parsing'; + +asJSON(`Here is the JSON output for the "About Us" page based on the provided props: +{ +"heading": "Our Story", +"subheading": "A Passion for Sourdough" +} +`); +/* results in: +{ + heading: 'Our Story', + subheading: 'A Passion for Sourdough', +} +*/ +``` diff --git a/packages/parsing/package.json b/packages/parsing/package.json new file mode 100644 index 00000000..8f8f1470 --- /dev/null +++ b/packages/parsing/package.json @@ -0,0 +1,40 @@ +{ + "name": "@acusti/parsing", + "version": "0.1.0", + "type": "module", + "sideEffects": false, + "exports": "./dist/index.js", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "files": [ + "dist", + "src" + ], + "description": "Loosely parse a string as JSON with numerous affordances for syntax errors", + "keywords": [ + "parse", + "parsing", + "json", + "llm", + "genai", + "generative-ai" + ], + "scripts": { + "test": "vitest" + }, + "repository": { + "type": "git", + "url": "https://github.com/acusti/uikit.git", + "directory": "packages/parsing" + }, + "author": "andrew patton (https://www.acusti.ca)", + "license": "Unlicense", + "bugs": { + "url": "https://github.com/acusti/uikit/issues" + }, + "homepage": "https://github.com/acusti/uikit/tree/main/packages/parsing#readme", + "devDependencies": { + "typescript": "^5.3.3", + "vitest": "^1.1.0" + } +} diff --git a/packages/parsing/src/as-json.test.ts b/packages/parsing/src/as-json.test.ts new file mode 100644 index 00000000..91531f29 --- /dev/null +++ b/packages/parsing/src/as-json.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it } from 'vitest'; + +import { asJSON } from './as-json.js'; + +describe('@acusti/parsing', () => { + describe('asJSON', () => { + it('should convert a LLM response string to a props object', () => { + const response = `\ +Here is the JSON output for the "About Us" page based on the provided props: +{ +"heading": "Our Story", +"subheading": "A Passion for Sourdough" +} +`; + expect(asJSON(response)).toEqual({ + heading: 'Our Story', + subheading: 'A Passion for Sourdough', + }); + }); + + it('should strip invalid JSON when the LLM response goes off the rails', () => { + const response = `\ +Here is the JSON output for the "Meet the Team" page: +{ +"callToAction": "Learn More", +"heading": "Meet the Team", +"subheading": "Our bakery is built on the foundation of passionate individuals who are dedicated to creating the best sourdough bread in North Lake Tahoe. Meet the team behind Masa Madre." +[ +"teamMembers": [ +{ +"name": "Jenny Lee", +"role": "Head Baker", +"description": "Jenny is the mastermind behind Masa Madre's delicious sourdough bread. With over 10 years of experience in the baking industry, she brings a wealth of knowledge and expertise to the table. Jenny's passion for sourdough bread is evident in every loaf she creates, and her dedication to using only the finest ingredients has earned her a loyal following of customers." +}, +{ +"name": "Tommy Thompson", +"role": "Baker", +"description": "Tommy is the muscle behind Masa Madre's bakery. With a background in culinary arts, he brings a creative touch to every loaf he bakes. Tommy's attention to detail and commitment to quality has made him an invaluable member of the team." +}, +{ +"name": "Emily Chen", +"role": "Marketing Manager", +"description": "Emily is the marketing genius behind Masa Madre's success. With a background in advertising and a passion for food, she has helped to create a strong brand identity for the bakery. Emily's creativity and attention to detail have been instrumental in building a loyal customer base." + +] + +} +`; + expect(asJSON(response)).toEqual({ + callToAction: 'Learn More', + heading: 'Meet the Team', + subheading: + 'Our bakery is built on the foundation of passionate individuals who are dedicated to creating the best sourdough bread in North Lake Tahoe. Meet the team behind Masa Madre.', + }); + }); + }); +}); diff --git a/packages/parsing/src/as-json.ts b/packages/parsing/src/as-json.ts new file mode 100644 index 00000000..22797ceb --- /dev/null +++ b/packages/parsing/src/as-json.ts @@ -0,0 +1,91 @@ +// Adapted from https://github.com/langchain-ai/langchainjs/blob/215dd52/langchain-core/src/output_parsers/json.ts#L58 +// MIT License +const parsePartialJSON = (text: string) => { + // If the input is undefined/null, return null to indicate failure. + if (text == null) return null; + + // Attempt to parse the string as-is. + try { + return JSON.parse(text); + } catch (error) { + // Pass + } + + // Initialize variables. + let newText = ''; + const stack = []; + let isInsideString = false; + let escaped = false; + + // Process each character in the string one at a time. + for (let char of text) { + if (isInsideString) { + if (char === '"' && !escaped) { + isInsideString = false; + } else if (char === '\n' && !escaped) { + char = '\\n'; // Replace the newline character with the escape sequence. + } else if (char === '\\') { + escaped = !escaped; + } else { + escaped = false; + } + } else { + if (char === '"') { + isInsideString = true; + escaped = false; + } else if (char === '{') { + stack.push('}'); + } else if (char === '[') { + stack.push(']'); + } else if (char === '}' || char === ']') { + if (stack && stack[stack.length - 1] === char) { + stack.pop(); + } else { + // Mismatched closing character; the input is malformed. + return null; + } + } + } + + // Append the processed character to the new string. + newText += char; + } + + // If we're still inside a string at the end of processing, + // we need to close the string. + if (isInsideString) { + newText += '"'; + } + + // Close any remaining open structures in the reverse order that they were opened. + for (let i = stack.length - 1; i >= 0; i -= 1) { + newText += stack[i]; + } + + // Attempt to parse the modified string as JSON. + try { + return JSON.parse(newText); + } catch (error) { + // If we still can't parse the string as JSON, return null to indicate failure. + return null; + } +}; + +type ReturnValue = string | boolean | number | Record | Array; + +export function asJSON(result: string): ReturnValue | null { + // because props are Record, there should only be 1 '{' and 1 '}' + const startJSONIndex = result.indexOf('{'); + let endJSONIndex = result.indexOf('}'); + if (endJSONIndex === -1) { + result += '}'; + endJSONIndex = result.length; + } + result = result.substring(startJSONIndex, endJSONIndex + 1); + // remove any arrays (TODO make this better) + result = result.split('[')[0]; + + return parsePartialJSON(result); + // const props: LayoutProps | null = parsePartialJSON(result); + // return props; +} diff --git a/packages/parsing/src/index.ts b/packages/parsing/src/index.ts new file mode 100644 index 00000000..40a4455b --- /dev/null +++ b/packages/parsing/src/index.ts @@ -0,0 +1 @@ +export { asJSON } from './as-json.js'; diff --git a/packages/parsing/tsconfig.json b/packages/parsing/tsconfig.json new file mode 100644 index 00000000..23afd591 --- /dev/null +++ b/packages/parsing/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src" + }, + "references": [] +} diff --git a/tsconfig.json b/tsconfig.json index 2daca8f2..3d52a9c1 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -16,7 +16,9 @@ "noUnusedLocals": true, "noUnusedParameters": true, "outDir": "dist", - "paths": { "@acusti/*": ["packages/*/src"] }, + "paths": { + "@acusti/*": ["packages/*/src"] + }, "skipLibCheck": true, "sourceMap": true, "strictNullChecks": true, @@ -32,6 +34,7 @@ { "path": "packages/dropdown" }, { "path": "packages/input-text" }, { "path": "packages/matchmaking" }, + { "path": "packages/parsing" }, { "path": "packages/post" }, { "path": "packages/styling" }, { "path": "packages/textual" }, diff --git a/yarn.lock b/yarn.lock index d603bc7f..026ae7ad 100644 --- a/yarn.lock +++ b/yarn.lock @@ -133,6 +133,15 @@ __metadata: languageName: unknown linkType: soft +"@acusti/parsing@workspace:packages/parsing": + version: 0.0.0-use.local + resolution: "@acusti/parsing@workspace:packages/parsing" + dependencies: + typescript: "npm:^5.3.3" + vitest: "npm:^1.1.0" + languageName: unknown + linkType: soft + "@acusti/post@npm:^0.5.1, @acusti/post@workspace:packages/post": version: 0.0.0-use.local resolution: "@acusti/post@workspace:packages/post"