Skip to content

Commit

Permalink
fix: A better assertions test
Browse files Browse the repository at this point in the history
  • Loading branch information
nadeesha committed Dec 7, 2024
1 parent 8985d3f commit 1ae0144
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 100 deletions.
2 changes: 1 addition & 1 deletion sdk-node/src/Inferable.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {
validateFunctionSchema,
validateServiceName,
} from "./util";
import { assert, Assertions, assertRun } from "./assertions";
import { Assertions, assertRun } from "./assertions";

// Custom json formatter
debug.formatters.J = (json) => {
Expand Down
112 changes: 15 additions & 97 deletions sdk-node/src/assertions.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Inferable } from "./inferable";
import { z } from "zod";
import assert from "assert";
import { TEST_ENDPOINT } from "./tests/utils";
import { TEST_API_SECRET } from "./tests/utils";

Expand All @@ -11,122 +10,41 @@ describe("assertions", () => {
endpoint: TEST_ENDPOINT,
});

let timesRun = 0;

client.default.register({
name: "fetch",
func: async ({ url }: { url: string }) => {
if (url === "https://news.ycombinator.com/show") {
return `<div class="container">
<h1>Top Posts</h1>
<ul class="post-list">
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42331270" class="post-title">Banan-OS, an Unix-like operating system written from scratch</a></div>
<div class="post-id">ID: 42331270</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42329071" class="post-title">Replace "hub" by "ingest" in GitHub URLs for a prompt-friendly extract</a></div>
<div class="post-id">ID: 42329071</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42333823" class="post-title">Data Connector – Chat with Your Database and APIs</a></div>
<div class="post-id">ID: 42333823</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42332760" class="post-title">Checkmate, a server and infrastructure monitoring application</a></div>
<div class="post-id">ID: 42332760</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42314905" class="post-title">A 5th order motion planner with PH spline blending, written in Ada</a></div>
<div class="post-id">ID: 42314905</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42332114" class="post-title">JavaFX app recreating the Omegle chat service experience with ChatGPT</a></div>
<div class="post-id">ID: 42332114</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42320032" class="post-title">Outerbase Studio – Open-Source Database GUI</a></div>
<div class="post-id">ID: 42320032</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42317393" class="post-title">I combined spaced repetition with emails so you can remember anything</a></div>
<div class="post-id">ID: 42317393</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42302560" class="post-title">Book and change flights with one email</a></div>
<div class="post-id">ID: 42302560</div>
</li>
<li class="post-item">
<div><a href="https://news.ycombinator.com/item?id=42330611" class="post-title">dotnet CMS to build drag-and-drop sites with setup infrastructure</a></div>
<div class="post-id">ID: 42330611</div>
</li>
</ul>
</div>`;
} else {
const randomPastYear = Math.min(
2024,
Math.floor(Math.random() * 100) + 1900,
);
return `<div class="container">
<h1>Comments</h1>
<ul class="comment-list">
<li class="comment-item">
<div class="comment-text">This is surprising because I observe the same thing in ${randomPastYear}</div>
</li>
</ul>
</div>`;
}
name: "generateRandomNumber",
func: async ({ seed }: { seed: number }) => {
timesRun++;

return seed * timesRun;
},
});

await client.default.start();

const resultSchema = z.object({
topPosts: z.array(
z.object({
id: z.string().describe("The ID of the post"),
title: z.string().describe("The title of the post"),
}),
),
comments: z.array(
z.object({
commentsPageUrl: z.string().describe("The URL of the comments page"),
text: z.string().describe("The text of the comment"),
}),
),
result: z.number().describe("The result of the function"),
});

const run = await client.run({
initialPrompt:
"Get the top comment for the top 10 posts on Show HN: https://news.ycombinator.com/show",
"Use the available functions to generate a random number between 0 and 100",
resultSchema: resultSchema,
});

const result = await run.poll<z.infer<typeof resultSchema>>({
assertions: [
function assertCorrect(result) {
const missingComments = result.topPosts.filter(
(post) =>
!result.comments.some((comment) =>
comment.commentsPageUrl.includes(post.id),
),
);

const duplicateComments = result.comments.filter(
(comment, index, self) =>
self.findIndex((c) => c.text === comment.text) !== index,
);

assert(
missingComments.length === 0,
`Some posts were missing comments: ${missingComments.map((m) => m.id).join(", ")}`,
);
assert(
duplicateComments.length === 0,
`Detected duplicate comments: ${duplicateComments.map((d) => d.text).join(", ")}`,
);
if (timesRun === 1) {
throw new Error(
`The result ${result.result} is unacceptable. Try again with a different seed.`,
);
}
},
],
});

console.log(result);
expect(result.result).toBeGreaterThan(0);
});
});
11 changes: 9 additions & 2 deletions sdk-node/src/assertions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,22 @@ export async function assertRun<T>({
assertionsPassed: boolean;
}> {
const results = await Promise.allSettled(
assertions.map((a) => a(result, functionCalls)),
assertions.map(async (a) => await a(result, functionCalls)),
);

const hasRejections = results.some((r) => r.status === "rejected");

if (hasRejections) {
await client.createMessage({
body: {
message: `You attempted to return a result, but I have determined the result is possibly incorrect due to failing assertions.`,
message: [
`You attempted to return a result, but I have determined the result is possibly incorrect due to failing assertions.`,
`<failures>`,
...results
.filter((r) => r.status === "rejected")
.map((r) => r.reason),
`</failures>`,
].join("\n"),
type: "human",
},
params: {
Expand Down

0 comments on commit 1ae0144

Please sign in to comment.