Skip to content

Commit

Permalink
Merge pull request #133 from awaescher/merge
Browse files Browse the repository at this point in the history
Merge improved image handling
  • Loading branch information
awaescher authored Nov 4, 2024
2 parents 764e9c0 + 8203f21 commit 1ccce48
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 42 deletions.
6 changes: 2 additions & 4 deletions demo/Demos/ImageChatConsole.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ public override async Task Run()
continue;
}

var imagesBase64 = imageBytes.Select(Convert.ToBase64String);

// remove paths from the message
foreach (var path in imagePaths)
message = message.Replace(path, "");
Expand All @@ -88,7 +86,7 @@ public override async Task Run()
AnsiConsole.MarkupLine($"[{HintTextColor}]The images were scaled down for the console only, the model gets full versions.[/]");
AnsiConsole.WriteLine();

await foreach (var answerToken in chat.SendAsync(message, [], imagesBase64))
await foreach (var answerToken in chat.SendAsync(message, imageBytes))
AnsiConsole.MarkupInterpolated($"[{AiTextColor}]{answerToken}[/]");
}
else
Expand All @@ -114,4 +112,4 @@ public override async Task Run()
/// </summary>
[GeneratedRegex("(.+)\\/([^\\/]+)")]
private static partial Regex UnixFileRegex();
}
}
9 changes: 7 additions & 2 deletions demo/OllamaApiConsole.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,13 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Spectre.Console" Version="0.49.1" />
<PackageReference Include="Spectre.Console.ImageSharp" Version="0.49.1" />
<!--
SixLabors.ImageSharp added explicitly to fix CVE-2024-41131: https://github.com/advisories/GHSA-63p8-c4ww-9cg7
and can be removed once Spectre.Console.ImageSharp uses a version greater than 3.1.4
-->
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.5" />
<PackageReference Include="Spectre.Console" Version="0.49.1" />
<PackageReference Include="Spectre.Console.ImageSharp" Version="0.49.1" />
</ItemGroup>

<ItemGroup>
Expand Down
23 changes: 23 additions & 0 deletions src/ByteArrayExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using System;
using System.Collections.Generic;
using System.Linq;

namespace OllamaSharp;

/// <summary>
/// Extensions for byte arrays
/// </summary>
public static class ByteArrayExtensions
{
/// <summary>
/// Converts a series of bytes to a base64 string
/// </summary>
/// <param name="bytes">The bytes to convert to base64</param>
public static string ToBase64(this IEnumerable<byte>? bytes) => Convert.ToBase64String(bytes.ToArray());

/// <summary>
/// Converts multiple series of bytes to multiple base64 strings, one for each.
/// </summary>
/// <param name="byteArrays">The series of bytes to convert to base64</param>
public static IEnumerable<string>? ToBase64(this IEnumerable<IEnumerable<byte>>? byteArrays) => byteArrays?.Select(ToBase64);
}
38 changes: 38 additions & 0 deletions src/Chat.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,24 @@ public Chat(IOllamaApiClient client, string systemPrompt = "")
public IAsyncEnumerable<string> SendAsync(string message, CancellationToken cancellationToken = default)
=> SendAsync(message, tools: null, imagesAsBase64: null, cancellationToken);

/// <summary>
/// Sends a message to the currently selected model and streams its response
/// </summary>
/// <param name="message">The message to send</param>
/// <param name="imagesAsBytes">Images in byte representation to send to the model</param>
/// <param name="cancellationToken">The token to cancel the operation with</param>
public IAsyncEnumerable<string> SendAsync(string message, IEnumerable<IEnumerable<byte>> imagesAsBytes, CancellationToken cancellationToken = default)
=> SendAsync(message, imagesAsBytes?.ToBase64() ?? [], cancellationToken);

/// <summary>
/// Sends a message to the currently selected model and streams its response
/// </summary>
/// <param name="message">The message to send</param>
/// <param name="imagesAsBase64">Base64 encoded images to send to the model</param>
/// <param name="cancellationToken">The token to cancel the operation with</param>
public IAsyncEnumerable<string> SendAsync(string message, IEnumerable<string> imagesAsBase64, CancellationToken cancellationToken = default)
=> SendAsync(message, [], imagesAsBase64, cancellationToken);

/// <summary>
/// Sends a message to the currently selected model and streams its response
/// </summary>
Expand All @@ -77,6 +95,26 @@ public IAsyncEnumerable<string> SendAsync(string message, IEnumerable<Tool>? too
public IAsyncEnumerable<string> SendAsAsync(ChatRole role, string message, CancellationToken cancellationToken = default)
=> SendAsAsync(role, message, tools: null, imagesAsBase64: null, cancellationToken);

/// <summary>
/// Sends a message in a given role to the currently selected model and streams its response
/// </summary>
/// <param name="role">The role in which the message should be sent</param>
/// <param name="message">The message to send</param>
/// <param name="imagesAsBytes">Images in byte representation to send to the model</param>
/// <param name="cancellationToken">The token to cancel the operation with</param>
public IAsyncEnumerable<string> SendAsAsync(ChatRole role, string message, IEnumerable<IEnumerable<byte>> imagesAsBytes, CancellationToken cancellationToken = default)
=> SendAsAsync(role, message, imagesAsBytes?.ToBase64() ?? [], cancellationToken);

/// <summary>
/// Sends a message in a given role to the currently selected model and streams its response
/// </summary>
/// <param name="role">The role in which the message should be sent</param>
/// <param name="message">The message to send</param>
/// <param name="imagesAsBase64">Base64 encoded images to send to the model</param>
/// <param name="cancellationToken">The token to cancel the operation with</param>
public IAsyncEnumerable<string> SendAsAsync(ChatRole role, string message, IEnumerable<string> imagesAsBase64, CancellationToken cancellationToken = default)
=> SendAsAsync(role, message, [], imagesAsBase64, cancellationToken);

/// <summary>
/// Sends a message in a given role to the currently selected model and streams its response
/// </summary>
Expand Down
23 changes: 12 additions & 11 deletions src/MicrosoftAi/AbstractionMapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public static ChatRequest ToOllamaSharpChatRequest(IList<ChatMessage> chatMessag
Stop = options?.StopSequences?.ToArray(),
Temperature = options?.Temperature,
TopP = options?.TopP,
TopK = options?.TopK,
},
Stream = stream,
Template = null,
Expand Down Expand Up @@ -112,7 +113,7 @@ public static ChatRequest ToOllamaSharpChatRequest(IList<ChatMessage> chatMessag
/// <param name="optionSetter">The setter to set the Ollama option if available in the chat options</param>
private static void TryAddOllamaOption<T>(ChatOptions microsoftChatOptions, OllamaOption option, Action<T> optionSetter)
{
if (microsoftChatOptions?.AdditionalProperties?.TryGetValue(option.Name, out var value) ?? false)
if ((microsoftChatOptions?.AdditionalProperties?.TryGetValue(option.Name, out var value) ?? false) && value is not null)
optionSetter((T)value);
}

Expand Down Expand Up @@ -196,27 +197,26 @@ private static IEnumerable<Message> ToOllamaSharpMessages(IList<ChatMessage> cha
yield return new Message
{
Content = cm.Text,
Images = cm.Contents.OfType<DataContent>().Select(ToOllamaImage).Where(s => !string.IsNullOrEmpty(s)).ToArray(),
Images = cm.Contents.OfType<ImageContent>().Select(ToOllamaImage).Where(s => !string.IsNullOrEmpty(s)).ToArray(),
Role = ToOllamaSharpRole(cm.Role),
ToolCalls = cm.Contents.OfType<FunctionCallContent>().Select(ToOllamaSharpToolCall),
};
}
}

/// <summary>
/// Converts a Microsoft.Extensions.AI.<see cref="DataContent"/> to a base64 image string.
/// Converts a Microsoft.Extensions.AI.<see cref="ImageContent"/> to a base64 image string.
/// </summary>
/// <param name="content">The data content to convert.</param>
private static string ToOllamaImage(DataContent content)
private static string ToOllamaImage(ImageContent content)
{
if (content is null || !content.ContainsData)
if (content is null)
return string.Empty;

var isImage = content is ImageContent || content?.MediaType?.StartsWith("image", StringComparison.OrdinalIgnoreCase) == true;
if (isImage)
return content?.Uri ?? ""; // If the content is binary data, converts it to a data: URI with base64 encoding
if (content.ContainsData && content.Data.HasValue)
return Convert.ToBase64String(content.Data.Value.ToArray());

return string.Empty;
throw new NotSupportedException("Images have to be provided as content (byte-Array or base64-string) for Ollama to be used. Other image sources like links are not supported.");
}

/// <summary>
Expand Down Expand Up @@ -285,7 +285,8 @@ public static StreamingChatCompletionUpdate ToStreamingChatCompletionUpdate(Chat
FinishReason = response?.Done == true ? ChatFinishReason.Stop : null,
RawRepresentation = response,
Text = response?.Message?.Content ?? string.Empty,
Role = ToAbstractionRole(response?.Message?.Role)
Role = ToAbstractionRole(response?.Message?.Role),
ModelId = response?.Model
};
}

Expand Down Expand Up @@ -371,7 +372,7 @@ public static ChatMessage ToChatMessage(Message message)
/// <returns>A <see cref="UsageDetails"/> object containing the parsed usage details.</returns>
private static UsageDetails? ParseOllamaChatResponseUsage(ChatDoneResponseStream? response)
{
if (response?.PromptEvalCount is not null || response?.EvalCount is not null)
if (response is not null)
{
return new()
{
Expand Down
20 changes: 9 additions & 11 deletions src/OllamaApiClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -373,15 +373,6 @@ private async Task EnsureSuccessStatusCodeAsync(HttpResponseMessage response)

response.EnsureSuccessStatusCode();
}
/// <summary>
/// Releases the resources used by the <see cref="OllamaApiClient"/> instance.
/// Disposes the internal HTTP client if it was created internally.
/// </summary>
public void Dispose()
{
if (_disposeHttpClient)
_client?.Dispose();
}

#region IChatClient and IEmbeddingGenerator implementation

Expand Down Expand Up @@ -423,8 +414,15 @@ async Task<GeneratedEmbeddings<Embedding<float>>> IEmbeddingGenerator<string, Em
TService? IEmbeddingGenerator<string, Embedding<float>>.GetService<TService>(object? key) where TService : class
=> key is null ? this as TService : null;

/// <inheritdoc/>
void IDisposable.Dispose() => Dispose();
/// <summary>
/// Releases the resources used by the <see cref="OllamaApiClient"/> instance.
/// Disposes the internal HTTP client if it was created internally.
/// </summary>
void IDisposable.Dispose()
{
if (_disposeHttpClient)
_client?.Dispose();
}

#endregion

Expand Down
85 changes: 73 additions & 12 deletions test/AbstractionMapperTests.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using FluentAssertions;
using Microsoft.Extensions.AI;
using Moq;
using NUnit.Framework;
using OllamaSharp;
using OllamaSharp.MicrosoftAi;
Expand All @@ -9,6 +8,9 @@

namespace Tests;

#pragma warning disable CS8602 // Dereference of a possibly null reference.
#pragma warning disable CS8604 // Possible null reference argument.

public partial class AbstractionMapperTests
{
public partial class ToOllamaSharpChatRequestMethod : AbstractionMapperTests
Expand Down Expand Up @@ -110,10 +112,14 @@ public void Maps_Messages()
message.Role.Should().Be(OllamaSharp.Models.Chat.ChatRole.Assistant);
}

/// <summary>
/// Ollama wants images without the metadata like "data:image/png;base64,"
/// </summary>
[Test]
public void Maps_Messages_With_Images()
public void Maps_Base64_Images()
{
const string TRANSPARENT_PIXEL = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/wcAAgEBAYkFNgAAAAAASUVORK5CYII=";
const string TRANSPARENT_PIXEL = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/wcAAgEBAYkFNgAAAAAASUVORK5CYII=";
const string TRANSPARENT_PIXEL_WITH_BASE64_META = "data:image/png;base64," + TRANSPARENT_PIXEL;

var chatMessages = new List<Microsoft.Extensions.AI.ChatMessage>
{
Expand All @@ -123,7 +129,7 @@ public void Maps_Messages_With_Images()
AuthorName = "a1",
Contents = [
new TextContent("Make me an image like this, but with beer."),
new ImageContent(TRANSPARENT_PIXEL)],
new ImageContent(TRANSPARENT_PIXEL_WITH_BASE64_META)],
RawRepresentation = null,
Role = Microsoft.Extensions.AI.ChatRole.User
},
Expand All @@ -133,10 +139,10 @@ public void Maps_Messages_With_Images()
AuthorName = "a2",
Contents = [
new TextContent("Interesting idea, here we go:"),
new ImageContent(TRANSPARENT_PIXEL),
new ImageContent(TRANSPARENT_PIXEL),
new ImageContent(TRANSPARENT_PIXEL),
new ImageContent(TRANSPARENT_PIXEL)],
new ImageContent(TRANSPARENT_PIXEL_WITH_BASE64_META),
new ImageContent(TRANSPARENT_PIXEL_WITH_BASE64_META),
new ImageContent(TRANSPARENT_PIXEL_WITH_BASE64_META),
new ImageContent(TRANSPARENT_PIXEL_WITH_BASE64_META)],
RawRepresentation = null,
Role = Microsoft.Extensions.AI.ChatRole.Assistant
},
Expand All @@ -148,13 +154,66 @@ public void Maps_Messages_With_Images()

var message = chatRequest.Messages.ElementAt(0);
message.Role.Should().Be(OllamaSharp.Models.Chat.ChatRole.User);
message.Images.Single().Should().Be(TRANSPARENT_PIXEL);
message.Images.Single().Should().Be(TRANSPARENT_PIXEL); // <- WITHOUT BASE64_META

message = chatRequest.Messages.ElementAt(1);
message.Role.Should().Be(OllamaSharp.Models.Chat.ChatRole.Assistant);
message.Images.Should().HaveCount(4);
}

[Test]
public void Maps_Byte_Array_Images()
{
var bytes = System.Text.Encoding.ASCII.GetBytes("ABC");

var chatMessages = new List<Microsoft.Extensions.AI.ChatMessage>
{
new()
{
AdditionalProperties = [],
AuthorName = "a1",
Contents = [
new TextContent("Make me an image like this, but with beer."),
new ImageContent(bytes)],
RawRepresentation = null,
Role = Microsoft.Extensions.AI.ChatRole.User
}
};

var request = AbstractionMapper.ToOllamaSharpChatRequest(chatMessages, null, stream: true);
request.Messages.Single().Images.Single().Should().Be("QUJD");
}

/// <summary>
/// Ollama only supports images provided as base64 string, that means with the image content
/// Links to images are not supported
/// </summary>
[Test]
public void Does_Not_Support_Image_Links()
{
var chatMessages = new List<Microsoft.Extensions.AI.ChatMessage>
{
new()
{
AdditionalProperties = [],
AuthorName = "a1",
Contents = [
new TextContent("Make me an image like this, but with beer."),
new ImageContent("https://unsplash.com/sunset.png")],
RawRepresentation = null,
Role = Microsoft.Extensions.AI.ChatRole.User
}
};

Action act = () =>
{
var request = AbstractionMapper.ToOllamaSharpChatRequest(chatMessages, null, stream: true);
request.Messages.Should().NotBeEmpty(); // access .Messages to invoke the evaluation of IEnumerable<Message>
};

act.Should().Throw<NotSupportedException>().Which.Message.Should().Contain("Images have to be provided as content");
}

[Test]
public void Maps_Messages_With_Tools()
{
Expand Down Expand Up @@ -187,7 +246,7 @@ public void Maps_Messages_With_Tools()
tool.Function.Parameters.Properties["unit"].Description.Should().Be("The unit to calculate the current temperature to");
tool.Function.Parameters.Properties["unit"].Enum.Should().BeEmpty();
tool.Function.Parameters.Properties["unit"].Type.Should().Be("string");
tool.Function.Parameters.Required.Should().BeEquivalentTo(["city"]);
tool.Function.Parameters.Required.Should().BeEquivalentTo("city");
tool.Function.Parameters.Type.Should().Be("object");
tool.Type.Should().Be("function");
}
Expand Down Expand Up @@ -251,7 +310,6 @@ public void Maps_Options()
chatRequest.Options.VocabOnly.Should().BeNull();
}


[Test]
public void Maps_Ollama_Options()
{
Expand Down Expand Up @@ -464,7 +522,7 @@ public void Maps_Request()

var request = AbstractionMapper.ToOllamaEmbedRequest(values, options);

request.Input.Should().BeEquivalentTo(["Teenage ", " Dirtbag."]);
request.Input.Should().BeEquivalentTo("Teenage ", " Dirtbag.");
request.KeepAlive.Should().BeNull();
request.Model.Should().Be("nomic_embed");
request.Options.Should().BeNull();
Expand Down Expand Up @@ -518,3 +576,6 @@ public void Maps_Response()
}
}
}

#pragma warning restore CS8602 // Dereference of a possibly null reference.
#pragma warning restore CS8604 // Possible null reference argument.
Loading

0 comments on commit 1ccce48

Please sign in to comment.