Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OPIK-405] Detect base64 images across the input object #637

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import React, { useMemo } from "react";
import get from "lodash/get";
import isString from "lodash/isString";
import uniq from "lodash/uniq";
import { Span, Trace } from "@/types/traces";
import {
Accordion,
Expand All @@ -25,7 +26,7 @@ const isImageContent = (content?: Partial<ImageContent>) => {
}
};

function extractImageUrls(messages: unknown) {
function extractOpenAIImages(messages: unknown) {
if (!Array.isArray(messages)) return [];

const images: string[] = [];
Expand All @@ -41,17 +42,63 @@ function extractImageUrls(messages: unknown) {
return images;
}

const BASE64_PREFIXES_MAP = {
"/9j/": "jpeg",
iVBORw0KGgo: "png",
R0lGODlh: "gif",
Qk: "bmp",
SUkq: "tiff",
TU0A: "tiff",
UklGR: "webp",
} as const;

const IMAGE_CHARS_REGEX = "[A-Za-z0-9+/]+={0,2}";
const DATA_IMAGE_PREFIX = `"data:image/[^;]{3,4};base64,${IMAGE_CHARS_REGEX}"`;

function extractInputImages(input: object) {
const images: string[] = [];
const stringifiedInput = JSON.stringify(input);

// Extract images with general base64 prefix in case it is present
Object.entries(BASE64_PREFIXES_MAP).forEach(([prefix, extension]) => {
const regex = new RegExp(`"${prefix}={0,2}${IMAGE_CHARS_REGEX}"`, "g");
const matches = stringifiedInput.match(regex);

if (matches) {
const customPrefixImages = matches.map((match) => {
const base64Image = match.replace(/"/g, "");
return `data:image/${extension};base64,${base64Image}`;
});

images.push(...customPrefixImages);
}
});

// Extract data:image/...;base64,...
const dataImageRegex = new RegExp(DATA_IMAGE_PREFIX, "g");
const dataImageMatches = stringifiedInput.match(dataImageRegex);
if (dataImageMatches) {
images.push(...dataImageMatches.map((match) => match.replace(/"/g, "")));
}

return images;
}

function extractImageUrls(input: object) {
const openAIImages = extractOpenAIImages(get(input, "messages", []));
const inputImages = extractInputImages(input);

return uniq([...openAIImages, ...inputImages]);
}

type InputOutputTabProps = {
data: Trace | Span;
};

const InputOutputTab: React.FunctionComponent<InputOutputTabProps> = ({
data,
}) => {
const imagesUrls = useMemo(
() => extractImageUrls(get(data, ["input", "messages"], [])),
[data],
);
const imagesUrls = useMemo(() => extractImageUrls(data.input), [data.input]);

const hasImages = imagesUrls.length > 0;

Expand Down
Loading