diff --git a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/ContentView.swift b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/ContentView.swift index fe18b2af..edefb3ca 100644 --- a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/ContentView.swift +++ b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/ContentView.swift @@ -1,27 +1,124 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - import SwiftUI + +struct Message: Identifiable { + let id = UUID() + var text: String + let isUser: Bool +} + struct ContentView: View { - @ObservedObject var tokenUpdater = SharedTokenUpdater.shared + @State private var userInput: String = "" + @State private var messages: [Message] = [] // Store chat messages locally + @State private var isGenerating: Bool = false // Track token generation state + @State private var stats: String = "" // token genetation stats var body: some View { VStack { + // ChatBubbles ScrollView { - VStack(alignment: .leading) { - ForEach(tokenUpdater.decodedTokens, id: \.self) { token in - Text(token) - .padding(.horizontal, 5) + VStack(alignment: .leading, spacing: 20) { + ForEach(messages) { message in + ChatBubble(text: message.text, isUser: message.isUser) + .padding(.horizontal, 20) + } + if !stats.isEmpty { + Text(stats) + .font(.footnote) + .foregroundColor(.gray) + .padding(.horizontal, 20) + .padding(.top, 5) + .multilineTextAlignment(.center) } } - .padding() + .padding(.top, 20) } - Button("Generate Tokens") { - DispatchQueue.global(qos: .background).async { - // TODO: add user prompt question UI - GenAIGenerator.generate("Who is the current US president?"); + + + // User input + HStack { + TextField("Type your message...", text: $userInput) + .padding() + .background(Color(.systemGray6)) + .cornerRadius(20) + .padding(.horizontal) + + Button(action: { + // Check for non-empty input + guard !userInput.trimmingCharacters(in: .whitespaces).isEmpty else { return } + + messages.append(Message(text: userInput, isUser: true)) + messages.append(Message(text: "", isUser: false)) // Placeholder for AI response + + + // clear previously generated tokens + SharedTokenUpdater.shared.clearTokens() + + let prompt = userInput + userInput = "" + isGenerating = true + + + DispatchQueue.global(qos: .background).async { + GenAIGenerator.generate(prompt) + } + }) { + Image(systemName: "paperplane.fill") + .foregroundColor(.white) + .padding() + .background(isGenerating ? Color.gray : Color.pastelGreen) + .clipShape(Circle()) + .padding(.trailing, 10) } + .disabled(isGenerating) + } + .padding(.bottom, 20) + } + .background(Color(.systemGroupedBackground)) + .edgesIgnoringSafeArea(.bottom) + .onReceive(NotificationCenter.default.publisher(for: NSNotification.Name("TokenGenerationCompleted"))) { _ in + isGenerating = false // Re-enable the button when token generation is complete + } + .onReceive(SharedTokenUpdater.shared.$decodedTokens) { tokens in + // update model response + if let lastIndex = messages.lastIndex(where: { !$0.isUser }) { + let combinedText = tokens.joined(separator: "") + messages[lastIndex].text = combinedText + } + } + .onReceive(NotificationCenter.default.publisher(for: NSNotification.Name("TokenGenerationStats"))) { notification in + if let userInfo = notification.userInfo, + let totalTime = userInfo["totalTime"] as? Int, + let firstTokenTime = userInfo["firstTokenTime"] as? Int, + let tokenCount = userInfo["tokenCount"] as? Int { + stats = "Generated \(tokenCount) tokens in \(totalTime) ms. First token in \(firstTokenTime) ms." + } + } + } +} + +struct ChatBubble: View { + var text: String + var isUser: Bool + + var body: some View { + HStack { + if isUser { + Spacer() + Text(text) + .padding() + .background(Color.pastelGreen) + .foregroundColor(.white) + .cornerRadius(25) + .padding(.horizontal, 10) + } else { + Text(text) + .padding() + .background(Color(.systemGray5)) + .foregroundColor(.black) + .cornerRadius(25) + .padding(.horizontal, 20) + Spacer() } } } @@ -32,3 +129,8 @@ struct ContentView_Previews: PreviewProvider { ContentView() } } + +// Extension for a pastel green color +extension Color { + static let pastelGreen = Color(red: 0.6, green: 0.9, blue: 0.6) +} diff --git a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/GenAIGenerator.mm b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/GenAIGenerator.mm index d430f16a..8a8d75dc 100644 --- a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/GenAIGenerator.mm +++ b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/GenAIGenerator.mm @@ -5,45 +5,109 @@ #include "LocalLLM-Swift.h" #include "ort_genai.h" #include "ort_genai_c.h" - +#include @implementation GenAIGenerator -+ (void)generate:(nonnull NSString*)input_user_question { - NSString* llmPath = [[NSBundle mainBundle] resourcePath]; - const char* modelPath = llmPath.cString; - - auto model = OgaModel::Create(modelPath); - auto tokenizer = OgaTokenizer::Create(*model); - - NSString* promptString = [NSString stringWithFormat:@"<|user|>\n%@<|end|>\n<|assistant|>", input_user_question]; - const char* prompt = [promptString UTF8String]; - - auto sequences = OgaSequences::Create(); - tokenizer->Encode(prompt, *sequences); +typedef std::chrono::high_resolution_clock Clock; +typedef std::chrono::time_point TimePoint; - auto params = OgaGeneratorParams::Create(*model); - params->SetSearchOption("max_length", 200); - params->SetInputSequences(*sequences); - - // Streaming Output to generate token by token - auto tokenizer_stream = OgaTokenizerStream::Create(*tokenizer); - - auto generator = OgaGenerator::Create(*model, *params); ++ (void)generate:(nonnull NSString*)input_user_question { + NSLog(@"Starting token generation..."); + + NSString* llmPath = [[NSBundle mainBundle] resourcePath]; + const char* modelPath = llmPath.cString; + + // Log model creation + NSLog(@"Creating model ..."); + auto model = OgaModel::Create(modelPath); + if (!model) { + NSLog(@"Failed to create model."); + return; + } + + NSLog(@"Creating tokenizer..."); + auto tokenizer = OgaTokenizer::Create(*model); + if (!tokenizer) { + NSLog(@"Failed to create tokenizer."); + return; + } + + auto tokenizer_stream = OgaTokenizerStream::Create(*tokenizer); + + // Construct the prompt + NSString* promptString = [NSString stringWithFormat:@"<|user|>\n%@<|end|>\n<|assistant|>", input_user_question]; + const char* prompt = [promptString UTF8String]; + + NSLog(@"Encoding prompt..."); + auto sequences = OgaSequences::Create(); + tokenizer->Encode(prompt, *sequences); + + // Log parameters + NSLog(@"Setting generator parameters..."); + auto params = OgaGeneratorParams::Create(*model); + params->SetSearchOption("max_length", 200); + params->SetInputSequences(*sequences); + + NSLog(@"Creating generator..."); + auto generator = OgaGenerator::Create(*model, *params); + + bool isFirstToken = true; + TimePoint startTime = Clock::now(); + TimePoint firstTokenTime; + int tokenCount = 0; + + NSLog(@"Starting token generation loop..."); + while (!generator->IsDone()) { + generator->ComputeLogits(); + generator->GenerateNextToken(); + + if (isFirstToken) { + NSLog(@"First token generated."); + firstTokenTime = Clock::now(); + isFirstToken = false; + } + + // Get the sequence data + const int32_t* seq = generator->GetSequenceData(0); + size_t seq_len = generator->GetSequenceCount(0); + + // Decode the new token + const char* decode_tokens = tokenizer_stream->Decode(seq[seq_len - 1]); + + // Check for decoding failure + if (!decode_tokens) { + NSLog(@"Token decoding failed."); + break; + } + + NSLog(@"Decoded token: %s", decode_tokens); + tokenCount++; + + // Convert token to NSString and update UI on the main thread + NSString* decodedTokenString = [NSString stringWithUTF8String:decode_tokens]; + [SharedTokenUpdater.shared addDecodedToken:decodedTokenString]; + } - while (!generator->IsDone()) { - generator->ComputeLogits(); - generator->GenerateNextToken(); - const int32_t* seq = generator->GetSequenceData(0); - size_t seq_len = generator->GetSequenceCount(0); - const char* decode_tokens = tokenizer_stream->Decode(seq[seq_len - 1]); + TimePoint endTime = Clock::now(); + auto totalDuration = std::chrono::duration_cast(endTime - startTime).count(); + auto firstTokenDuration = std::chrono::duration_cast(firstTokenTime - startTime).count(); + + NSLog(@"Token generation completed. Total time: %lld ms, First token time: %lld ms, Total tokens: %d", totalDuration, firstTokenDuration, tokenCount); - NSLog(@"Decoded tokens: %s", decode_tokens); + NSDictionary *stats = @{ + @"totalTime": @(totalDuration), + @"firstTokenTime": @(firstTokenDuration), + @"tokenCount": @(tokenCount) + }; - // Add decoded token to SharedTokenUpdater - NSString* decodedTokenString = [NSString stringWithUTF8String:decode_tokens]; - [SharedTokenUpdater.shared addDecodedToken:decodedTokenString]; - } + // notify main thread that token generation is complete + dispatch_async(dispatch_get_main_queue(), ^{ + [[NSNotificationCenter defaultCenter] postNotificationName:@"TokenGenerationCompleted" object:nil]; + [[NSNotificationCenter defaultCenter] postNotificationName:@"TokenGenerationStats" object:nil userInfo:stats]; + }); + NSLog(@"Token generation completed."); } + @end diff --git a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/README.md b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/README.md index 8680e606..a9438224 100644 --- a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/README.md +++ b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/README.md @@ -106,4 +106,6 @@ Upon app launching, Xcode will automatically copy and install the model files fr **Note**: The current app only sets up with a simple initial prompt question, you can adjust/try your own or refine the UI based on requirements. -***Notice:*** The current Xcode project runs on iOS 16.6, feel free to adjust latest iOS/build for lates iOS versions accordingly. \ No newline at end of file +***Notice:*** The current Xcode project runs on iOS 16.6, feel free to adjust latest iOS/build for lates iOS versions accordingly. + +![alt text]() \ No newline at end of file diff --git a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/SharedTokenUpdater.swift b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/SharedTokenUpdater.swift index a4680041..260a9154 100644 --- a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/SharedTokenUpdater.swift +++ b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/SharedTokenUpdater.swift @@ -14,4 +14,10 @@ import Foundation self.decodedTokens.append(token) } } + + @objc func clearTokens() { + DispatchQueue.main.async { + self.decodedTokens.removeAll() + } + } } diff --git a/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/Simulator Screenshot - iPhone 16.png b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/Simulator Screenshot - iPhone 16.png new file mode 100644 index 00000000..95448ef7 Binary files /dev/null and b/mobile/examples/phi-3/ios/LocalLLM/LocalLLM/Simulator Screenshot - iPhone 16.png differ