From b0ce86574d65e8473d497da437fa90c36adae816 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 6 Jul 2023 14:47:45 -0700 Subject: [PATCH 01/18] Adjust FeedAOutputToB --- docs/get-started/with-csharp.md | 68 +++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/docs/get-started/with-csharp.md b/docs/get-started/with-csharp.md index d0b441fcfc3e6..60999922cffc6 100644 --- a/docs/get-started/with-csharp.md +++ b/docs/get-started/with-csharp.md @@ -73,31 +73,57 @@ In some scenarios, you may want to reuse input/output tensors. This often happen ### Chaining: Feed model A's output(s) as input(s) to model B ```cs -InferenceSession session1, session2; // let's say 2 sessions are initialized - -Tensor input = new DenseTensor(new[] { 1, inputDimension }); // let's say data is fed into the Tensor objects -var inputs1 = new List() - { - NamedOnnxValue.CreateFromTensor("name1", input) - }; -// session1 inference -using (var outputs1 = session1.Run(inputs1)) -{ - // get intermediate value - var input2 = outputs1.First(); - - // modify the name of the ONNX value - input2.Name = "name2"; - - // create input list for session2 - var inputs2 = new List() { input2 }; +using Microsoft.ML.OnnxRuntime.Tensors; +using Microsoft.ML.OnnxRuntime; - // session2 inference - using (var results = session2.Run(inputs2)) +namespace Samples +{ + class FeedModelAToModelB { - // manipulate the results + static void Program() + { + const string modelAPath = "./modelA.onnx"; + const string modelBPath = "./modelB.onnx"; + using InferenceSession session1 = new InferenceSession(modelAPath); + using InferenceSession session2 = new InferenceSession(modelBPath); + + // Illustration only + float[] inputData = { 1, 2, 3, 4 }; + long[] inputShape = { 1, 4 }; + + using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(inputData, inputShape); + + // Create input data for session. Request all outputs in this case. + var inputs1 = new Dictionary + { + { "input", inputOrtValue } + }; + + using var runOptions = new RunOptions(); + + // session1 inference + using (var outputs1 = session1.Run(runOptions, inputs1, session1.OutputNames)) + { + // get intermediate value + var outputToFeed = outputs1.First(); + + // modify the name of the ONNX value + // create input list for session2 + var inputs2 = new Dictionary + { + { "inputNameForModelB", outputToFeed } + }; + + // session2 inference + using (var results = session2.Run(runOptions, inputs2, session2.OutputNames)) + { + // manipulate the results + } + } + } } } + ``` ### Multiple inference runs with fixed sized input(s) and output(s) From 540577c95cdc09b953753e55ec668bce74dfce92 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 6 Jul 2023 15:03:31 -0700 Subject: [PATCH 02/18] Update references to FixedBuffer class --- docs/get-started/with-csharp.md | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/docs/get-started/with-csharp.md b/docs/get-started/with-csharp.md index 60999922cffc6..51d687428afb0 100644 --- a/docs/get-started/with-csharp.md +++ b/docs/get-started/with-csharp.md @@ -16,7 +16,7 @@ nav_order: 4 ## Install the Nuget Packages with the .NET CLI ```bash -dotnet add package Microsoft.ML.OnnxRuntime --version 1.2.0 +dotnet add package Microsoft.ML.OnnxRuntime --version 1.16.0 dotnet add package System.Numerics.Tensors --version 0.1.0 ``` @@ -127,18 +127,26 @@ namespace Samples ``` ### Multiple inference runs with fixed sized input(s) and output(s) -If the model have fixed sized inputs and outputs of numeric tensors, you can use "FixedBufferOnnxValue" to accelerate the inference speed. By using "FixedBufferOnnxValue", the container objects only need to be allocated/disposed one time during multiple InferenceSession.Run() calls. This avoids some overhead which may be beneficial for smaller models where the time is noticeable in the overall running time. +If the model have fixed sized inputs and outputs of numeric tensors, +use the preferable **OrtValue** and its API to accelerate the inference speed and minimize data transfer. +**OrtValue** class makes it possible to reuse the underlying buffer for the input and output tensors. +It pins the managed buffers and makes use of them for inference. It also provides direct access +to the native buffers for outputs. You can also preallocate `OrtValue` for outputs or create it on top +of the existing buffers. +This avoids some overhead which may be beneficial for smaller models +where the time is noticeable in the overall running time. - - +Keep in mind that **OrtValue** class, like many other classes in Onnruntime C# API is **IDisposable**. +It needs to be properly disposed to either unpin the managed buffers or release the native buffers +to avoid memory leaks. ## Running on GPU (Optional) If using the GPU package, simply use the appropriate SessionOptions when creating an InferenceSession. ```cs int gpuDeviceId = 0; // The GPU device ID to execute on -var session = new InferenceSession("model.onnx", SessionOptions.MakeSessionOptionWithCudaProvider(gpuDeviceId)); +using var gpuSessionOptoins = SessionOptions.MakeSessionOptionWithCudaProvider(gpuDeviceId); +using var session = new InferenceSession("model.onnx", gpuSessionOptoins); ``` # ONNX Runtime C# API {: .no_toc } From b605d29391638c3907c61d30ae323c601490fab9 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 7 Jul 2023 11:32:01 -0700 Subject: [PATCH 03/18] Add BertTokenizer example --- .../csharp/bert-nlp-csharp-console-app.md | 119 +++++++++--------- 1 file changed, 61 insertions(+), 58 deletions(-) diff --git a/docs/tutorials/csharp/bert-nlp-csharp-console-app.md b/docs/tutorials/csharp/bert-nlp-csharp-console-app.md index 70c958806dda8..118b95a7d187f 100644 --- a/docs/tutorials/csharp/bert-nlp-csharp-console-app.md +++ b/docs/tutorials/csharp/bert-nlp-csharp-console-app.md @@ -136,8 +136,8 @@ Now that we have tested the model in Python its time to build it out in C#. The ### Install the Nuget Packages - Install the Nuget packages `BERTTokenizers`, `Microsoft.ML.OnnxRuntime`, `Microsoft.ML.OnnxRuntime.Managed`, `Microsoft.ML` ```PowerShell -dotnet add package Microsoft.ML.OnnxRuntime --version 1.12.0 -dotnet add package Microsoft.ML.OnnxRuntime.Managed --version 1.12.0 +dotnet add package Microsoft.ML.OnnxRuntime --version 1.16.0 +dotnet add package Microsoft.ML.OnnxRuntime.Managed --version 1.16.0 dotnet add package dotnet add package Microsoft.ML dotnet add package dotnet add package BERTTokenizers --version 1.1.0 ``` @@ -159,7 +159,7 @@ using System; namespace MyApp // Note: actual namespace depends on the project name. { - internal class Program + internal class BertTokenizeProgram { static void Main(string[] args) { @@ -169,10 +169,10 @@ namespace MyApp // Note: actual namespace depends on the project name. } ``` ### Create the BertInput class for encoding -- Add the `BertInput` class +- Add the `BertInput` struct ```csharp - public class BertInput + public struct BertInput { public long[] InputIds { get; set; } public long[] AttentionMask { get; set; } @@ -205,83 +205,86 @@ namespace MyApp // Note: actual namespace depends on the project name. }; ``` -### Create the Tensors -- Create the `ConvertToTensor` function. Set the shape of the Tensor `new[] { 1, inputDimension }` and the values to be added to the `NamedOnnxValue` input list. - -```csharp - public static Tensor ConvertToTensor(long[] inputArray, int inputDimension) - { - // Create a tensor with the shape the model is expecting. Here we are sending in 1 batch with the inputDimension as the amount of tokens. - Tensor input = new DenseTensor(new[] { 1, inputDimension }); - - // Loop through the inputArray (InputIds, AttentionMask and TypeIds) - for (var i = 0; i < inputArray.Length; i++) - { - // Add each to the input Tenor result. - // Set index and array value of each input Tensor. - input[0,i] = inputArray[i]; - } - return input; - } -``` - ### Create the `input` of `List` that is needed for inference -- Get the model, call the `ConvertToTensor` function to create the tensor and create the list of `NamedOnnxValue` input variables for inferencing. +- Get the model, create 3 OrtValues on top of the input buffers and wrap them into a Dictionary to feed into a Run(). + Beware that almost all of the Onnxruntime classes wrap native data structures, and, therefore, must be disposed + to prevent memory leaks. ```csharp // Get path to model to create inference session. var modelPath = @"C:\code\bert-nlp-csharp\BertNlpTest\BertNlpTest\bert-large-uncased-finetuned-qa.onnx"; - // Create input tensor. + using var runOptions = new RunOptions(); + using var session = new InferenceSession(modelPath); - var input_ids = ConvertToTensor(bertInput.InputIds, bertInput.InputIds.Length); - var attention_mask = ConvertToTensor(bertInput.AttentionMask, bertInput.InputIds.Length); - var token_type_ids = ConvertToTensor(bertInput.TypeIds, bertInput.InputIds.Length); + // Create input tensors over the input data. + using var inputIdsOrtValue = OrtValue.CreateTensorValueFromMemory(bertInput.InputIds, + new long[] { 1, bertInput.InputIds.Length }); + using var attMaskOrtValue = OrtValue.CreateTensorValueFromMemory(bertInput.AttentionMask, + new long[] { 1, bertInput.AttentionMask.Length }); - // Create input data for session. - var input = new List { NamedOnnxValue.CreateFromTensor("input_ids", input_ids), - NamedOnnxValue.CreateFromTensor("input_mask", attention_mask), - NamedOnnxValue.CreateFromTensor("segment_ids", token_type_ids) }; + using var typeIdsOrtValue = OrtValue.CreateTensorValueFromMemory(bertInput.TypeIds, + new long[] { 1, bertInput.TypeIds.Length }); + // Create input data for session. Request all outputs in this case. + var inputs = new Dictionary + { + { "input_ids", inputIdsOrtValue }, + { "input_mask", attMaskOrtValue }, + { "segment_ids", typeIdsOrtValue } + }; ``` ### Run Inference - Create the `InferenceSession`, run the inference and print out the result. ```csharp - // Create an InferenceSession from the Model Path. - var session = new InferenceSession(modelPath); - // Run session and send the input data in to get inference output. - var output = session.Run(input); + using var output = session.Run(runOptions, inputs, session.OutputNames); ``` ### Postprocess the `output` and print the result - Here we get the index for the start position (`startLogit`) and end position (`endLogits`). Then we take the original `tokens` of the input sentence and get the vocabulary value for the token ids predicted. ```csharp - // Call ToList on the output. - // Get the First and Last item in the list. - // Get the Value of the item and cast as IEnumerable to get a list result. - List startLogits = (output.ToList().First().Value as IEnumerable).ToList(); - List endLogits = (output.ToList().Last().Value as IEnumerable).ToList(); - - // Get the Index of the Max value from the output lists. - var startIndex = startLogits.ToList().IndexOf(startLogits.Max()); - var endIndex = endLogits.ToList().IndexOf(endLogits.Max()); - - // From the list of the original tokens in the sentence - // Get the tokens between the startIndex and endIndex and convert to the vocabulary from the ID of the token. - var predictedTokens = tokens - .Skip(startIndex) - .Take(endIndex + 1 - startIndex) - .Select(o => tokenizer.IdToToken((int)o.VocabularyIndex)) - .ToList(); - - // Print the result. - Console.WriteLine(String.Join(" ", predictedTokens)); + // Get the Index of the Max value from the output lists. + // We intentionally do not copy to an array or to a list to employ algorithms. + // Hopefully, more algos will be available in the future for spans. + // so we can directly read from native memory and do not duplicate data that + // can be large for some models + // Local function + int GetMaxValueIndex(ReadOnlySpan span) + { + float maxVal = span[0]; + int maxIndex = 0; + for (int i = 1; i < span.Length; ++i) + { + var v = span[i]; + if (v > maxVal) + { + maxVal = v; + maxIndex = i; + } + } + return maxIndex; + } + + var startLogits = output[0].GetTensorDataAsSpan(); + int startIndex = GetMaxValueIndex(startLogits); + + var endLogits = output[output.Count - 1].GetTensorDataAsSpan(); + int endIndex = GetMaxValueIndex(endLogits); + + var predictedTokens = tokens + .Skip(startIndex) + .Take(endIndex + 1 - startIndex) + .Select(o => tokenizer.IdToToken((int)o.VocabularyIndex)) + .ToList(); + + // Print the result. + Console.WriteLine(String.Join(" ", predictedTokens)); ``` ## Deploy with Azure Web App From f328180a97cf1e61f4798bebfd8e0152da904bde Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 7 Jul 2023 11:35:30 -0700 Subject: [PATCH 04/18] Adjust gpuSessionOptions sample --- docs/tutorials/csharp/csharp-gpu.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/csharp/csharp-gpu.md b/docs/tutorials/csharp/csharp-gpu.md index 9fc1e3b67dd00..f8edcceba6a48 100644 --- a/docs/tutorials/csharp/csharp-gpu.md +++ b/docs/tutorials/csharp/csharp-gpu.md @@ -47,7 +47,9 @@ torch.cuda.is_available() - Now you can enable GPU in the C# ONNX Runtime API with the following code: ```cs -var session = new InferenceSession(modelPath, SessionOptions.MakeSessionOptionWithCudaProvider(0)); +// keep in mind almost all of the classes are disposable. +using var gpuSessionOptions = SessionOptions.MakeSessionOptionWithCudaProvider(0); +using var session = new InferenceSession(modelPath, gpuSessionOptions); ``` ## Checkout more C# ONNX Runtime resources From 12aec533a793a0b6b943669bdff4d955b6aea44b Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 7 Jul 2023 12:00:17 -0700 Subject: [PATCH 05/18] Re-wrok renet50 example --- docs/tutorials/csharp/resnet50_csharp.md | 29 ++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/docs/tutorials/csharp/resnet50_csharp.md b/docs/tutorials/csharp/resnet50_csharp.md index 4bc11b1ad5887..d7cd3bf5a1ddd 100644 --- a/docs/tutorials/csharp/resnet50_csharp.md +++ b/docs/tutorials/csharp/resnet50_csharp.md @@ -77,9 +77,10 @@ Note, we're doing a centered crop resize to preserve aspect ratio. Next, we will preprocess the image according to the [requirements of the model](https://github.com/onnx/models/tree/master/vision/classification/resnet#preprocessing): ```cs -Tensor input = new DenseTensor(new[] { 1, 3, 224, 224 }); +// We use DenseTensor for multi-dimensional access to populate the image data var mean = new[] { 0.485f, 0.456f, 0.406f }; var stddev = new[] { 0.229f, 0.224f, 0.225f }; +DenseTensor processedImage = new(new[] { 1, 3, 224, 224 }); image.ProcessPixelRows(accessor => { for (int y = 0; y < accessor.Height; y++) @@ -87,9 +88,9 @@ image.ProcessPixelRows(accessor => Span pixelSpan = accessor.GetRowSpan(y); for (int x = 0; x < accessor.Width; x++) { - input[0, 0, y, x] = ((pixelSpan[x].R / 255f) - mean[0]) / stddev[0]; - input[0, 1, y, x] = ((pixelSpan[x].G / 255f) - mean[1]) / stddev[1]; - input[0, 2, y, x] = ((pixelSpan[x].B / 255f) - mean[2]) / stddev[2]; + processedImage[0, 0, y, x] = ((pixelSpan[x].R / 255f) - mean[0]) / stddev[0]; + processedImage[0, 1, y, x] = ((pixelSpan[x].G / 255f) - mean[1]) / stddev[1]; + processedImage[0, 2, y, x] = ((pixelSpan[x].B / 255f) - mean[2]) / stddev[2]; } } }); @@ -102,10 +103,17 @@ Here, we're creating a Tensor of the required size `(batch-size, channels, heigh Next, we will create the inputs to the model: ```cs -var inputs = new List +// Pin tensor buffer and create a OrtValue with native tensor that makes use of +// DenseTensor buffer directly. This avoids extra data copy within OnnxRuntime. +// It will be unpinned on ortValue disposal +using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(OrtMemoryInfo.DefaultInstance, + processedImage.Buffer, new long[] { 1, 3, 224, 224 }); + +var inputs = new Dictionary { - NamedOnnxValue.CreateFromTensor("data", input) -}; + { "data", inputOrtValue } +} + ``` To check the input node names for an ONNX model, you can use [Netron](https://github.com/lutzroeder/netron) to visualise the model and see input/output names. In this case, this model has `data` as the input node name. @@ -116,7 +124,8 @@ Next, we will create an inference session and run the input through it: ```cs using var session = new InferenceSession(modelFilePath); -using IDisposableReadOnlyCollection results = session.Run(inputs); +using var runOptions = new RunOptions(); +using IDisposableReadOnlyCollection results = session.Run(runOptions, inputs, session.OutputNames); ``` ### Postprocess output @@ -124,7 +133,9 @@ using IDisposableReadOnlyCollection results = session. Next, we will need to postprocess the output to get the softmax vector, as this is not handled by the model itself: ```cs -IEnumerable output = results.First().AsEnumerable(); +// We copy results to array only to apply algorithms, otherwise data can be accessed directly +// from the native buffer via ReadOnlySpan or Span +var output = results[0].GetTensorDataAsSpan().ToArray(); float sum = output.Sum(x => (float)Math.Exp(x)); IEnumerable softmax = output.Select(x => (float)Math.Exp(x) / sum); ``` From f7566cb533a2ec1362ebb5398bee979ca258ceb9 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 7 Jul 2023 12:23:23 -0700 Subject: [PATCH 06/18] Adjust FasterRCNN C# --- docs/tutorials/csharp/fasterrcnn_csharp.md | 39 ++++++++++++++-------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/docs/tutorials/csharp/fasterrcnn_csharp.md b/docs/tutorials/csharp/fasterrcnn_csharp.md index ff58124fce0d1..0c9714187c862 100644 --- a/docs/tutorials/csharp/fasterrcnn_csharp.md +++ b/docs/tutorials/csharp/fasterrcnn_csharp.md @@ -73,8 +73,10 @@ Next, we will preprocess the image according to the [requirements of the model]( ```cs var paddedHeight = (int)(Math.Ceiling(image.Height / 32f) * 32f); var paddedWidth = (int)(Math.Ceiling(image.Width / 32f) * 32f); -Tensor input = new DenseTensor(new[] { 3, paddedHeight, paddedWidth }); var mean = new[] { 102.9801f, 115.9465f, 122.7717f }; +//Preprocessing image +//We use Tensor for multi-dimensional access +DenseTensor input = new(new[] { 3, paddedHeight, paddedWidth }); image.ProcessPixelRows(accessor => { for (int y = paddedHeight - accessor.Height; y < accessor.Height; y++) @@ -97,10 +99,16 @@ Here, we're creating a Tensor of the required size `(channels, paddedHeight, pad Next, we will create the inputs to the model: ```cs -var inputs = new List +// Pin tensor memory and use it directly +// It will be unpinned on ortValue disposal +using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(OrtMemoryInfo.DefaultInstance, + input.Buffer, new long[] { 3, paddedHeight, paddedWidth }); + +var inputs = new Dictionary { - NamedOnnxValue.CreateFromTensor("image", input) + { "image", inputOrtValue } }; + ``` To check the input node names for an ONNX model, you can use [Netron](https://github.com/lutzroeder/netron) to visualise the model and see input/output names. In this case, this model has `image` as the input node name. @@ -111,7 +119,9 @@ Next, we will create an inference session and run the input through it: ```cs using var session = new InferenceSession(modelFilePath); -using IDisposableReadOnlyCollection results = session.Run(inputs); +using var runOptions = new RunOptions(); +using IDisposableReadOnlyCollection results = session.Run(runOptions, inputs, session.OutputNames); + ``` ### Postprocess output @@ -119,22 +129,23 @@ using IDisposableReadOnlyCollection results = session. Next, we will need to postprocess the output to get boxes and associated label and confidence scores for each box: ```cs -var resultsArray = results.ToArray(); -float[] boxes = resultsArray[0].AsEnumerable().ToArray(); -long[] labels = resultsArray[1].AsEnumerable().ToArray(); -float[] confidences = resultsArray[2].AsEnumerable().ToArray(); +var boxesSpan = results[0].GetTensorDataAsSpan(); +var labelsSpan = results[1].GetTensorDataAsSpan(); +var confidencesSpan = results[2].GetTensorDataAsSpan(); + +const float minConfidence = 0.7f; var predictions = new List(); -var minConfidence = 0.7f; -for (int i = 0; i < boxes.Length - 4; i += 4) + +for (int i = 0; i < boxesSpan.Length - 4; i += 4) { var index = i / 4; - if (confidences[index] >= minConfidence) + if (confidencesSpan[index] >= minConfidence) { predictions.Add(new Prediction { - Box = new Box(boxes[i], boxes[i + 1], boxes[i + 2], boxes[i + 3]), - Label = LabelMap.Labels[labels[index]], - Confidence = confidences[index] + Box = new Box(boxesSpan[i], boxesSpan[i + 1], boxesSpan[i + 2], boxesSpan[i + 3]), + Label = LabelMap.Labels[labelsSpan[index]], + Confidence = confidencesSpan[index] }); } } From fe3193745a1d7191672f7a4fa1edcd6fd9e7399d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 28 Jul 2023 16:53:48 -0700 Subject: [PATCH 07/18] build --- build2023.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build2023.html b/build2023.html index c0efa97a0d3d7..b74078fd47f7b 100644 --- a/build2023.html +++ b/build2023.html @@ -86,4 +86,4 @@

Repos

-f + From d8ef9fdddb6d3d7203d1ecfa260d392a7c683cc4 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 23 Aug 2023 17:02:26 -0700 Subject: [PATCH 08/18] Update examples --- docs/tutorials/csharp/fasterrcnn_csharp.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/docs/tutorials/csharp/fasterrcnn_csharp.md b/docs/tutorials/csharp/fasterrcnn_csharp.md index 0c9714187c862..689f11dabf18a 100644 --- a/docs/tutorials/csharp/fasterrcnn_csharp.md +++ b/docs/tutorials/csharp/fasterrcnn_csharp.md @@ -74,8 +74,9 @@ Next, we will preprocess the image according to the [requirements of the model]( var paddedHeight = (int)(Math.Ceiling(image.Height / 32f) * 32f); var paddedWidth = (int)(Math.Ceiling(image.Width / 32f) * 32f); var mean = new[] { 102.9801f, 115.9465f, 122.7717f }; -//Preprocessing image -//We use Tensor for multi-dimensional access + +// Preprocessing image +// We use DenseTensor for multi-dimensional access DenseTensor input = new(new[] { 3, paddedHeight, paddedWidth }); image.ProcessPixelRows(accessor => { @@ -94,15 +95,20 @@ image.ProcessPixelRows(accessor => Here, we're creating a Tensor of the required size `(channels, paddedHeight, paddedWidth)`, accessing the pixel values, preprocessing them and finally assigning them to the tensor at the appropriate indicies. + ### Setup inputs -Next, we will create the inputs to the model: +// Pin DenseTensor memory and use it directly in the OrtValue tensor +// It will be unpinned on ortValue disposal ```cs -// Pin tensor memory and use it directly -// It will be unpinned on ortValue disposal using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(OrtMemoryInfo.DefaultInstance, input.Buffer, new long[] { 3, paddedHeight, paddedWidth }); +``` + +Next, we will create the inputs to the model: + +```cs var inputs = new Dictionary { From e8dfb1f34a482edbe493b4e19f2932e4032821fc Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 23 Aug 2023 17:31:36 -0700 Subject: [PATCH 09/18] Update C# get started --- docs/get-started/with-csharp.md | 76 ++++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 11 deletions(-) diff --git a/docs/get-started/with-csharp.md b/docs/get-started/with-csharp.md index 51d687428afb0..530c51c04d52e 100644 --- a/docs/get-started/with-csharp.md +++ b/docs/get-started/with-csharp.md @@ -42,28 +42,82 @@ This is an [Azure Function](https://azure.microsoft.com/services/functions/) exa string requestBody = await new StreamReader(req.Body).ReadToEndAsync(); dynamic data = JsonConvert.DeserializeObject(requestBody); - review = review ?? data?.review; + review ??= data.review; + Debug.Assert(!string.IsNullOrEmpty(review), "Expecting a string with a content"); // Get path to model to create inference session. - var modelPath = "./model.onnx"; + const string modelPath = "./model.onnx"; + + // Create an InferenceSession from the Model Path. + // Creating and loading sessions are expensive per request. + // They better be cached + using var session = new InferenceSession(modelPath); // create input tensor (nlp example) - var inputTensor = new DenseTensor(new string[] { review }, new int[] { 1, 1 }); + using var inputOrtValue = OrtValue.CreateTensorWithEmptyStrings(OrtAllocator.DefaultInstance, new long[] { 1, 1 }); + inputOrtValue.StringTensorSetElementAt(review, 0); - // Create input data for session. - var input = new List { NamedOnnxValue.CreateFromTensor("input", inputTensor) }; + // Create input data for session. Request all outputs in this case. + var inputs = new Dictionary + { + { "input", inputOrtValue } + }; - // Create an InferenceSession from the Model Path. - var session = new InferenceSession(modelPath); + using var runOptions = new RunOptions(); + + // We are getting a sequence of maps as output. We are interested in the first element (map) of the sequence. + // That result is a Sequence of Maps, and we only need the first map from there. + using var outputs = session.Run(runOptions, inputs, session.OutputNames); + Debug.Assert(outputs.Count > 0, "Expecting some output"); + + // We want the last output, which is the sequence of maps + var lastOutput = outputs[outputs.Count - 1]; + + // Optional code to check the output type + { + var outputTypeInfo = lastOutput.GetTypeInfo(); + Debug.Assert(outputTypeInfo.OnnxType == OnnxValueType.ONNX_TYPE_SEQUENCE, "Expecting a sequence"); + + var sequenceTypeInfo = outputTypeInfo.SequenceTypeInfo; + Debug.Assert(sequenceTypeInfo.ElementType.OnnxType == OnnxValueType.ONNX_TYPE_MAP, "Expecting a sequence of maps"); + } - // Run session and send input data in to get inference output. Call ToList then get the Last item. Then use the AsEnumerable extension method to return the Value result as an Enumerable of NamedOnnxValue. - var output = session.Run(input).ToList().Last().AsEnumerable(); + var elementsNum = lastOutput.GetValueCount(); + Debug.Assert(elementsNum > 0, "Expecting a non empty sequence"); + + // Get the first map in sequence + using var firstMap = lastOutput.GetValue(0, OrtAllocator.DefaultInstance); + + // Optional code just checking + { + // Maps always have two elements, keys and values + // We are expecting this to be a map of strings to floats + var mapTypeInfo = firstMap.GetTypeInfo().MapTypeInfo; + Debug.Assert(mapTypeInfo.KeyType == TensorElementType.String, "Expecting keys to be strings"); + Debug.Assert(mapTypeInfo.ValueType.OnnxType == OnnxValueType.ONNX_TYPE_TENSOR, "Values are in the tensor"); + Debug.Assert(mapTypeInfo.ValueType.TensorTypeAndShapeInfo.ElementDataType == TensorElementType.Float, "Result map value is float"); + } + + var inferenceResult = new Dictionary(); + // Let use the visitor to read map keys and values + // Here keys and values are represented with the same number of corresponding entries + // string -> float + firstMap.ProcessMap((keys, values) => { + // Access native buffer directly + var valuesSpan = values.GetTensorDataAsSpan(); + + var entryCount = (int)keys.GetTensorTypeAndShape().ElementCount; + inferenceResult.EnsureCapacity(entryCount); + for (int i = 0; i < entryCount; ++i) + { + inferenceResult.Add(keys.GetStringElement(i), valuesSpan[i]); + } + }, OrtAllocator.DefaultInstance); - // From the Enumerable output create the inferenceResult by getting the First value and using the AsDictionary extension method of the NamedOnnxValue. - var inferenceResult = output.First().AsDictionary(); // Return the inference result as json. return new JsonResult(inferenceResult); + } ``` ## Reuse input/output tensor buffers From 265a83cc9403bf2dfb8a75b93a4e7c2af9150f0f Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 7 Sep 2023 18:03:43 -0700 Subject: [PATCH 10/18] Fix stable diffusion --- .../csharp/bert-nlp-csharp-console-app.md | 2 +- .../csharp/stable-diffusion-csharp.md | 128 +++++++++++------- 2 files changed, 82 insertions(+), 48 deletions(-) diff --git a/docs/tutorials/csharp/bert-nlp-csharp-console-app.md b/docs/tutorials/csharp/bert-nlp-csharp-console-app.md index 118b95a7d187f..ac42131dace19 100644 --- a/docs/tutorials/csharp/bert-nlp-csharp-console-app.md +++ b/docs/tutorials/csharp/bert-nlp-csharp-console-app.md @@ -205,7 +205,7 @@ namespace MyApp // Note: actual namespace depends on the project name. }; ``` -### Create the `input` of `List` that is needed for inference +### Create the `inputs` of `name -> OrtValue` pairs as required for inference - Get the model, create 3 OrtValues on top of the input buffers and wrap them into a Dictionary to feed into a Run(). Beware that almost all of the Onnxruntime classes wrap native data structures, and, therefore, must be disposed diff --git a/docs/tutorials/csharp/stable-diffusion-csharp.md b/docs/tutorials/csharp/stable-diffusion-csharp.md index ae0681916d1df..8474ab24958d2 100644 --- a/docs/tutorials/csharp/stable-diffusion-csharp.md +++ b/docs/tutorials/csharp/stable-diffusion-csharp.md @@ -136,33 +136,51 @@ make a picture of green tree with flowers aroundit and a red sky ```csharp public static int[] TokenizeText(string text) { - // Create Tokenizer and tokenize the sentence. - var tokenizerOnnxPath = Directory.GetCurrentDirectory().ToString() + ("\\text_tokenizer\\custom_op_cliptok.onnx"); - - // Create session options for custom op of extensions - var sessionOptions = new SessionOptions(); - var customOp = "ortextensions.dll"; - sessionOptions.RegisterCustomOpLibraryV2(customOp, out var libraryHandle); - - // Create an InferenceSession from the onnx clip tokenizer. - var tokenizeSession = new InferenceSession(tokenizerOnnxPath, sessionOptions); - var inputTensor = new DenseTensor(new string[] { text }, new int[] { 1 }); - var inputString = new List { NamedOnnxValue.CreateFromTensor("string_input", inputTensor) }; - // Run session and send the input data in to get inference output. - var tokens = tokenizeSession.Run(inputString); - var inputIds = (tokens.ToList().First().Value as IEnumerable).ToArray(); - Console.WriteLine(String.Join(" ", inputIds)); - // Cast inputIds to Int32 - var InputIdsInt = inputIds.Select(x => (int)x).ToArray(); - var modelMaxLength = 77; - // Pad array with 49407 until length is modelMaxLength - if (InputIdsInt.Length < modelMaxLength) - { - var pad = Enumerable.Repeat(49407, 77 - InputIdsInt.Length).ToArray(); - InputIdsInt = InputIdsInt.Concat(pad).ToArray(); - } - return InputIdsInt; + // Create Tokenizer and tokenize the sentence. + var tokenizerOnnxPath = Directory.GetCurrentDirectory().ToString() + ("\\text_tokenizer\\custom_op_cliptok.onnx"); + + // Create session options for custom op of extensions + using var sessionOptions = new SessionOptions(); + var customOp = "ortextensions.dll"; + sessionOptions.RegisterCustomOpLibraryV2(customOp, out var libraryHandle); + + // Create an InferenceSession from the onnx clip tokenizer. + using var tokenizeSession = new InferenceSession(tokenizerOnnxPath, sessionOptions); + + // Create input tensor from text + using var inputTensor = OrtValue.CreateTensorWithEmptyStrings(OrtAllocator.DefaultInstance, new long[] { 1 }); + inputTensor.StringTensorSetElementAt(text.AsSpan(), 0); + + var inputs = new Dictionary + { + { "string_input", inputTensor } + }; + + // Run session and send the input data in to get inference output. + using var runOptions = new RunOptions(); + using var tokens = tokenizeSession.Run(runOptions, inputs, tokenizeSession.OutputNames); + + var inputIds = tokens[0].GetTensorDataAsSpan(); + + // Cast inputIds to Int32 + var InputIdsInt = new int[inputIds.Length]; + for(int i = 0; i < inputIds.Length; i++) + { + InputIdsInt[i] = (int)inputIds[i]; + } + + Console.WriteLine(String.Join(" ", InputIdsInt)); + + var modelMaxLength = 77; + // Pad array with 49407 until length is modelMaxLength + if (InputIdsInt.Length < modelMaxLength) + { + var pad = Enumerable.Repeat(49407, 77 - InputIdsInt.Length).ToArray(); + InputIdsInt = InputIdsInt.Concat(pad).ToArray(); + } + return InputIdsInt; } + ``` ```text @@ -186,26 +204,43 @@ The text encoder creates the text embedding which is trained to encode the text - Text Embedding: A vector of numbers that represents the text prompt created from the tokenization result. The text embedding is created by the `text_encoder` model. ```csharp -public static DenseTensor TextEncoder(int[] tokenizedInput) -{ - // Create input tensor. - var input_ids = TensorHelper.CreateTensor(tokenizedInput, new[] { 1, tokenizedInput.Count() }); - - var input = new List { NamedOnnxValue.CreateFromTensor("input_ids", input_ids) }; - - var textEncoderOnnxPath = Directory.GetCurrentDirectory().ToString() + ("\\text_encoder\\model.onnx"); - - var encodeSession = new InferenceSession(textEncoderOnnxPath); - // Run inference. - var encoded = encodeSession.Run(input); - - var lastHiddenState = (encoded.ToList().First().Value as IEnumerable).ToArray(); - var lastHiddenStateTensor = TensorHelper.CreateTensor(lastHiddenState.ToArray(), new[] { 1, 77, 768 }); - - return lastHiddenStateTensor; - -} + public static OrtValue TextEncoder(int[] tokenizedInput) + { + // Create input tensor. OrtValue will not copy, will read from managed memory + using var input_ids = OrtValue.CreateTensorValueFromMemory(tokenizedInput, + new long[] { 1, tokenizedInput.Count() }); + + var textEncoderOnnxPath = Directory.GetCurrentDirectory().ToString() + ("\\text_encoder\\model.onnx"); + + using var encodeSession = new InferenceSession(textEncoderOnnxPath); + + // Pre-allocate the output so it goes to a managed buffer + // we know the shape + var lastHiddenState = new float[1 * 77 * 768]; + using var outputOrtValue = OrtValue.CreateTensorValueFromMemory(lastHiddenState, new long[] { 1, 77, 768 }); + try + { + string[] input_names = { "input_ids" }; + OrtValue[] inputs = { input_ids }; + + string[] output_names = { encodeSession.OutputNames[0] }; + OrtValue[] outputs = { outputOrtValue }; + + // Run inference. + using var runOptions = new RunOptions(); + encodeSession.Run(runOptions, input_names, inputs, output_names, outputs); + + return outputOrtValue; + } + catch(Exception ex) + { + // Dispose on error + outputOrtValue.Dispose(); + throw; + } + } ``` + ```text torch.Size([1, 77, 768]) tensor([[[-0.3884, 0.0229, -0.0522, ..., -0.4899, -0.3066, 0.0675], @@ -259,7 +294,6 @@ For each inference step the latent image is duplicated to create the tensor shap ```csharp // Create Inference Session var unetSession = new InferenceSession(modelPath, options); -var input = new List(); for (int t = 0; t < timesteps.Length; t++) { @@ -270,7 +304,7 @@ for (int t = 0; t < timesteps.Length; t++) latentModelInput = scheduler.ScaleInput(latentModelInput, timesteps[t]); // Create model input of text embeddings, scaled latent image and timestep - input = CreateUnetModelInput(textEmbeddings, latentModelInput, timesteps[t]); + var input = CreateUnetModelInput(textEmbeddings, latentModelInput, timesteps[t]); // Run Inference var output = unetSession.Run(input); From 8a3d37ba3e02195c34598f4be56b9b2b794f0cd5 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 7 Sep 2023 18:07:59 -0700 Subject: [PATCH 11/18] Fix Inference Loop --- .../csharp/stable-diffusion-csharp.md | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/docs/tutorials/csharp/stable-diffusion-csharp.md b/docs/tutorials/csharp/stable-diffusion-csharp.md index 8474ab24958d2..a850a2e451624 100644 --- a/docs/tutorials/csharp/stable-diffusion-csharp.md +++ b/docs/tutorials/csharp/stable-diffusion-csharp.md @@ -292,26 +292,37 @@ var latents = GenerateLatentSample(batchSize, height, width,seed, scheduler.Init For each inference step the latent image is duplicated to create the tensor shape of (2,4,64,64), it is then scaled and inferenced with the unet model. The output tensors (2,4,64,64) are split and guidance is applied. The resulting tensor is then sent into the `LMSDiscreteScheduler` step as part of the denoising process and the resulting tensor from the scheduler step is returned and the loop completes again until the `num_inference_steps` is reached. ```csharp +var modelPath = Directory.GetCurrentDirectory().ToString() + ("\\unet\\model.onnx"); +var scheduler = new LMSDiscreteScheduler(); +var timesteps = scheduler.SetTimesteps(numInferenceSteps); + +var seed = new Random().Next(); +var latents = GenerateLatentSample(batchSize, height, width, seed, scheduler.InitNoiseSigma); + // Create Inference Session -var unetSession = new InferenceSession(modelPath, options); +using var options = new SessionOptions(); +using var unetSession = new InferenceSession(modelPath, options); + +var latentInputShape = new int[] { 2, 4, height / 8, width / 8 }; +var splitTensorsShape = new int[] { 1, 4, height / 8, width / 8 }; for (int t = 0; t < timesteps.Length; t++) { // torch.cat([latents] * 2) - var latentModelInput = TensorHelper.Duplicate(latents.ToArray(), new[] { 2, 4, height / 8, width / 8 }); - + var latentModelInput = TensorHelper.Duplicate(latents.ToArray(), latentInputShape); + // Scale the input latentModelInput = scheduler.ScaleInput(latentModelInput, timesteps[t]); - + // Create model input of text embeddings, scaled latent image and timestep var input = CreateUnetModelInput(textEmbeddings, latentModelInput, timesteps[t]); - + // Run Inference - var output = unetSession.Run(input); - var outputTensor = (output.ToList().First().Value as DenseTensor); + using var output = unetSession.Run(input); + var outputTensor = output[0].Value as DenseTensor; // Split tensors from 2,4,64,64 to 1,4,64,64 - var splitTensors = TensorHelper.SplitTensor(outputTensor, new[] { 1, 4, height / 8, width / 8 }); + var splitTensors = TensorHelper.SplitTensor(outputTensor, splitTensorsShape); var noisePred = splitTensors.Item1; var noisePredText = splitTensors.Item2; @@ -321,6 +332,7 @@ for (int t = 0; t < timesteps.Length; t++) // LMS Scheduler Step latents = scheduler.Step(noisePred, timesteps[t], latents); } + ``` ## Postprocess the `output` with the VAEDecoder After the inference loop is complete, the resulting tensor is scaled and then sent to the `vae_decoder` model to decode the image. Lastly the decoded image tensor is converted to an image and saved to disc. From e476deec418aa0fa7d3b5cc994476cee54038040 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 13 Sep 2023 15:00:33 -0700 Subject: [PATCH 12/18] Adjust Basic --- docs/tutorials/csharp/basic_csharp.md | 134 +++++++++++++++--- docs/tutorials/csharp/csharp-gpu.md | 2 +- .../csharp/stable-diffusion-csharp.md | 28 ++-- 3 files changed, 128 insertions(+), 36 deletions(-) diff --git a/docs/tutorials/csharp/basic_csharp.md b/docs/tutorials/csharp/basic_csharp.md index 029e5d00cf2e7..085444004ab90 100644 --- a/docs/tutorials/csharp/basic_csharp.md +++ b/docs/tutorials/csharp/basic_csharp.md @@ -3,34 +3,134 @@ nav_exclude: true --- # C# Tutorial: Basic -Here is simple tutorial for getting started with running inference on an existing ONNX model for a given input data. The model is typically trained using any of the well-known training frameworks and exported into the ONNX format. +Here is simple tutorial for getting started with running inference on an existing ONNX model for a given input data. +The model is typically trained using any of the well-known training frameworks and exported into the ONNX format. + +`NamedOnnxValue`, `DisposableNamedOnnxValue`, `FixedBufferOnnxValue` are the classes to be deprecated. + +The new `OrtValue` based API is a recommended approach. The `OrtValue` API generates less garbage and is more performant. +Some scenarios indicated 4x performance improvement over the previous API and significantly less garbage. + +`DenseTensor` class can still be used for multi-dimensional access to the data since the new `Slice` based API feature +only a 1-D index. However, some reported a slow performance when using `DenseTensor` class multi-dimensional access. One can then create an OrtValue on top +of the tensors data. + +`ShapeUtils` class provides some help to deal with multi-dimensional indices for OrtValues. + +`OrtValue` based API provides direct native memory access in a type safe manner using `ReadOnlySpan` and `Span` stack bases structures. +OrtValue is a universal container that can hold different ONNX types, such as tensors, maps, and sequences. +It always existed in the onnxruntime library, but was not exposed in the C# API. + +As before, `OrtValues` can be created directly on top of the managed `unmanaged` arrays. Thus, onnxruntime will directly use +managed buffers for input. + +If output shapes are known, one can pre-allocate `OrtValue` on top of the managed or unmanaged allocations and supply +those OrtValues to be used as outputs. + +Character data is represented as UTF-16 string objects in C# will still need to be copied and converted to UTF-8 to the native +memory. However, that conversion is now more optimized and is done in a single pass without intermediate byte arrays. +The same applies to string `OrtValue` tensors returned as outputs. Character based API now operates on Spans, ReadOnlySpans, +and ReadOnlyMemory objects. This adds flexibility to the API and allows to avoid unnecessary copies. + +Except some of the above deprecated API classes, nearly all of C# API classes are `IDisposable`. +Meaning they need to be promptly disposed after use, otherwise you will get memory leaks. +Because OrtValues are used to hold tensor data, the sizes of the leaks can be huge. They are likely +to accumulate with each `Run` call, as each inference call requires input OrtValues and returns output OrtValues. +Do not hold your breath for finalizers which are not guaranteed to ever run, and if they do, they do it +when it is too late. + +Not disposing `OrtValue` that was created on top of the managed buffer would result in +that buffer pinned in memory indefinitely. Such a buffer can not be garbage collected or moved in memory. + +`OrtValue`s that were created on top of the native onnxruntime memory should also be disposed of promptly. +Otherwise, the native memory will not be deallocated. GC can not operate on native memory or any other native resources. +OrtValues returned by `Run()` usually hold native memory. + +The `using` statement is a convenient way to ensure that the objects are disposed. +`InferenceSession` can be a long lived object and a member of another class. It eventually must also need to be disposed. +The hosting class would have to be made disposable to achieve this. + +OrtValue API also provides visitor like API to walk ONNX maps and sequences. +This is a more efficient way to access Onnxruntime data. To start scoring using the model, open a session using the `InferenceSession` class, passing in the file path to the model as a parameter. +```cs +using var session = new InferenceSession("model.onnx"); +``` + +Once a session is created, you can execute queries using the `Run` method of the `InferenceSession` object. ```cs -var session = new InferenceSession("model.onnx"); + +float[] sourceData; // assume your data is loaded into a flat float array +long[] dimensions; // and the dimensions of the input is stored here + +// Create a OrtValue on top of the sourceData array +using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(sourceData, dimensions); + +var inputs = new Dictionary { + { "name1", inputOrtValue } +}; + + +using var runOptions = new RunOptions(); + +// Pass inputs and request the first output +// Note that the output is a disposable collection that holds OrtValues +using var output = session.Run(runOptions, inputs, session.OutputNames[0]); + +var output_0 = output[0]; + +// Assuming the output contains a tensor of float data, you can access it as follows +// Returns Span which points directly to native memory. +var outputData = output_0.GetTensorDataAsSpan(); + +// If you are interested in more information about output, request its type and shape +// Assuming it is a tensor +// This is not disposable, will be GCed +// There you can request Shape, ElementDataType, etc +var tensorTypeAndShape = output_0.GetTensorTypeAndShape(); + +``` +You can still use `Tensor` class for data manipulation if you have existing code that does it. +Then create `OrtValue` on top of Tensor buffer. + +```cs + +// Create and manipulate the data using tensor interface +Tensor t1 = new DenseTensor(sourceData, dimensions); + +// One minor inconvenience is that Tensor class operates on `int` dimensions and indices. +// OrtValue dimensions are `long`. This is required, because `OrtValue` talks directly to +// Ort API and the library uses long dimensions. + +// Convert dims to long[] +var shape = Array.Convert(dimensions, Convert.ToInt64); + +using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(OrtMemoryInfo.DefaultInstance, + t1.Buffer, shape); + ``` -Once a session is created, you can execute queries using the `Run` method of the `InferenceSession` object. Currently, only `Tensor` type of input and outputs are supported. The results of the `Run` method are represented as a collection of .Net `Tensor` objects (as defined in [System.Numerics.Tensor](https://www.nuget.org/packages/System.Numerics.Tensors)). +Here is a way to populate a string tensor. Strings can not be mapped, and must be copy/converted to native memory. +To that end we pre-allocate a native tensor of empty strings with specified dimensions, and then +set individual strings by index. + ```cs -Tensor t1, t2; // let's say data is fed into the Tensor objects -var inputs = new List() - { - NamedOnnxValue.CreateFromTensor("name1", t1), - NamedOnnxValue.CreateFromTensor("name2", t2) - }; -using (var results = session.Run(inputs)) + +string[] strs = { "Hello", "Ort", "World" }; +long[] shape = { 1, 1, 3 }; +var elementsNum = ShapeUtils.GetSizeForShape(shape); + +using var strTensor = OrtValue.CreateTensorWithEmptyStrings(OrtAllocator.DefaultInstance, shape); + +for (long i = 0; i < elementsNum; ++i) { - // manipulate the results + strTensor.StringTensorSetElementAt(strs[i].AsSpan(), i); } + ``` -You can load your input data into Tensor objects in several ways. A simple example is to create the Tensor from arrays. -```cs -float[] sourceData; // assume your data is loaded into a flat float array -int[] dimensions; // and the dimensions of the input is stored here -Tensor t1 = new DenseTensor(sourceData, dimensions); -``` diff --git a/docs/tutorials/csharp/csharp-gpu.md b/docs/tutorials/csharp/csharp-gpu.md index f8edcceba6a48..a7dd199073f7a 100644 --- a/docs/tutorials/csharp/csharp-gpu.md +++ b/docs/tutorials/csharp/csharp-gpu.md @@ -25,7 +25,7 @@ See this table for supported versions: | ONNX Runtime Version | CUDA Toolkit Version | cuDNN Version| |----------------------|----------------------|--------------| -| 1.13 - 1.14 | 11.6 | 8.5.0.96 | +| 1.13 - 1.16 | 11.6 | 8.5.0.96 | | 1.9 - 1.12 | 11.4 | 8.2.2.26 | NOTE: Full table can be found [here](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements) diff --git a/docs/tutorials/csharp/stable-diffusion-csharp.md b/docs/tutorials/csharp/stable-diffusion-csharp.md index a850a2e451624..5313fe7d43692 100644 --- a/docs/tutorials/csharp/stable-diffusion-csharp.md +++ b/docs/tutorials/csharp/stable-diffusion-csharp.md @@ -204,7 +204,7 @@ The text encoder creates the text embedding which is trained to encode the text - Text Embedding: A vector of numbers that represents the text prompt created from the tokenization result. The text embedding is created by the `text_encoder` model. ```csharp - public static OrtValue TextEncoder(int[] tokenizedInput) + public static float[] TextEncoder(int[] tokenizedInput) { // Create input tensor. OrtValue will not copy, will read from managed memory using var input_ids = OrtValue.CreateTensorValueFromMemory(tokenizedInput, @@ -218,26 +218,18 @@ The text encoder creates the text embedding which is trained to encode the text // we know the shape var lastHiddenState = new float[1 * 77 * 768]; using var outputOrtValue = OrtValue.CreateTensorValueFromMemory(lastHiddenState, new long[] { 1, 77, 768 }); - try - { - string[] input_names = { "input_ids" }; - OrtValue[] inputs = { input_ids }; - string[] output_names = { encodeSession.OutputNames[0] }; - OrtValue[] outputs = { outputOrtValue }; + string[] input_names = { "input_ids" }; + OrtValue[] inputs = { input_ids }; - // Run inference. - using var runOptions = new RunOptions(); - encodeSession.Run(runOptions, input_names, inputs, output_names, outputs); + string[] output_names = { encodeSession.OutputNames[0] }; + OrtValue[] outputs = { outputOrtValue }; - return outputOrtValue; - } - catch(Exception ex) - { - // Dispose on error - outputOrtValue.Dispose(); - throw; - } + // Run inference. + using var runOptions = new RunOptions(); + encodeSession.Run(runOptions, input_names, inputs, output_names, outputs); + + return lastHiddenState; } ``` From 6f2867498372489b5f35360d42beb88579921af3 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 20 Sep 2023 15:02:15 -0700 Subject: [PATCH 13/18] Better basics --- docs/tutorials/csharp/basic_csharp.md | 37 ++++++++++++++++----------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/docs/tutorials/csharp/basic_csharp.md b/docs/tutorials/csharp/basic_csharp.md index 085444004ab90..149e7b0bd2e71 100644 --- a/docs/tutorials/csharp/basic_csharp.md +++ b/docs/tutorials/csharp/basic_csharp.md @@ -6,14 +6,16 @@ nav_exclude: true Here is simple tutorial for getting started with running inference on an existing ONNX model for a given input data. The model is typically trained using any of the well-known training frameworks and exported into the ONNX format. -`NamedOnnxValue`, `DisposableNamedOnnxValue`, `FixedBufferOnnxValue` are the classes to be deprecated. +Note, that the following classed `NamedOnnxValue`, `DisposableNamedOnnxValue`, `FixedBufferOnnxValue` that have been used +for some time are going to be deprecated in the future. They are still supported, but are not recommended for new code. The new `OrtValue` based API is a recommended approach. The `OrtValue` API generates less garbage and is more performant. Some scenarios indicated 4x performance improvement over the previous API and significantly less garbage. +It provides uniform access to data via `ReadOnlySpan` and `Span` structures, regardless of its location, managed or unmanaged. -`DenseTensor` class can still be used for multi-dimensional access to the data since the new `Slice` based API feature -only a 1-D index. However, some reported a slow performance when using `DenseTensor` class multi-dimensional access. One can then create an OrtValue on top -of the tensors data. +`DenseTensor` class can still be used for multi-dimensional access to the data since the new `Span` based API feature +only a 1-D index. However, some reported a slow performance when using `DenseTensor` class multi-dimensional access. +One can then create an OrtValue on top of the tensors data. `ShapeUtils` class provides some help to deal with multi-dimensional indices for OrtValues. @@ -21,34 +23,39 @@ of the tensors data. OrtValue is a universal container that can hold different ONNX types, such as tensors, maps, and sequences. It always existed in the onnxruntime library, but was not exposed in the C# API. -As before, `OrtValues` can be created directly on top of the managed `unmanaged` arrays. Thus, onnxruntime will directly use -managed buffers for input. +As before, `OrtValues` can be created directly on top of the managed `unmanaged` (struct based blittable types) arrays. +Read MS documentation on `blittable` data types. onnxruntime C# API allows use of managed buffers for input or output. If output shapes are known, one can pre-allocate `OrtValue` on top of the managed or unmanaged allocations and supply those OrtValues to be used as outputs. -Character data is represented as UTF-16 string objects in C# will still need to be copied and converted to UTF-8 to the native +Character data is represented as UTF-16 string objects in C#. It will still need to be copied and converted to UTF-8 to the native memory. However, that conversion is now more optimized and is done in a single pass without intermediate byte arrays. -The same applies to string `OrtValue` tensors returned as outputs. Character based API now operates on Spans, ReadOnlySpans, -and ReadOnlyMemory objects. This adds flexibility to the API and allows to avoid unnecessary copies. +The same applies to string `OrtValue` tensors returned as outputs. Character based API now operates on `Span`, +`ReadOnlySpan`, and `ReadOnlyMemory` objects. This adds flexibility to the API and allows to avoid unnecessary copies. Except some of the above deprecated API classes, nearly all of C# API classes are `IDisposable`. -Meaning they need to be promptly disposed after use, otherwise you will get memory leaks. +Meaning they need to be disposed after use, otherwise you will get memory leaks. Because OrtValues are used to hold tensor data, the sizes of the leaks can be huge. They are likely to accumulate with each `Run` call, as each inference call requires input OrtValues and returns output OrtValues. Do not hold your breath for finalizers which are not guaranteed to ever run, and if they do, they do it when it is too late. +This includes `SessionOptions`, `RunOptions`, `InferenceSession`, `OrtValue`. Run() calls return `IDisposableCollection` +that allows to dispose all of the containing objects in one statement or `using`. This is because these objects +own some native resource, often a native object. + Not disposing `OrtValue` that was created on top of the managed buffer would result in that buffer pinned in memory indefinitely. Such a buffer can not be garbage collected or moved in memory. `OrtValue`s that were created on top of the native onnxruntime memory should also be disposed of promptly. -Otherwise, the native memory will not be deallocated. GC can not operate on native memory or any other native resources. -OrtValues returned by `Run()` usually hold native memory. +Otherwise, the native memory will not be deallocated. OrtValues returned by `Run()` usually hold native memory. + +GC can not operate on native memory or any other native resources. -The `using` statement is a convenient way to ensure that the objects are disposed. +The `using` statement or a block is a convenient way to ensure that the objects are disposed. `InferenceSession` can be a long lived object and a member of another class. It eventually must also need to be disposed. -The hosting class would have to be made disposable to achieve this. +This means, the containing class also would have to be made disposable to achieve this. OrtValue API also provides visitor like API to walk ONNX maps and sequences. This is a more efficient way to access Onnxruntime data. @@ -99,7 +106,7 @@ Then create `OrtValue` on top of Tensor buffer. ```cs // Create and manipulate the data using tensor interface -Tensor t1 = new DenseTensor(sourceData, dimensions); +DenseTensor t1 = new DenseTensor(sourceData, dimensions); // One minor inconvenience is that Tensor class operates on `int` dimensions and indices. // OrtValue dimensions are `long`. This is required, because `OrtValue` talks directly to From 0c060e3d8bafc861311413f205e8e8eda3f014c4 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 20 Sep 2023 15:52:58 -0700 Subject: [PATCH 14/18] Make basic_cshar show up --- docs/tutorials/csharp/basic_csharp.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/csharp/basic_csharp.md b/docs/tutorials/csharp/basic_csharp.md index 149e7b0bd2e71..51f549d13e10f 100644 --- a/docs/tutorials/csharp/basic_csharp.md +++ b/docs/tutorials/csharp/basic_csharp.md @@ -1,5 +1,10 @@ --- -nav_exclude: true +title: C# Tutorial: Basic +description: Basic usage of C# API +parent: Inference with C# +grand_parent: Tutorials +has_children: false +nav_order: 1 --- # C# Tutorial: Basic From 87561cb2e127ff5d539690e7e34e0083eda6ea86 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 20 Sep 2023 16:21:26 -0700 Subject: [PATCH 15/18] Make nave_order: 0 --- docs/tutorials/csharp/basic_csharp.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/csharp/basic_csharp.md b/docs/tutorials/csharp/basic_csharp.md index 51f549d13e10f..bf03ce25d8863 100644 --- a/docs/tutorials/csharp/basic_csharp.md +++ b/docs/tutorials/csharp/basic_csharp.md @@ -4,8 +4,10 @@ description: Basic usage of C# API parent: Inference with C# grand_parent: Tutorials has_children: false -nav_order: 1 +nav_order: 0 --- + + # C# Tutorial: Basic Here is simple tutorial for getting started with running inference on an existing ONNX model for a given input data. From a7cb18f445cd03444f3d96e7bce45a9706074e5d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 20 Sep 2023 16:46:06 -0700 Subject: [PATCH 16/18] Fix navigation --- docs/tutorials/csharp/basic_csharp.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/tutorials/csharp/basic_csharp.md b/docs/tutorials/csharp/basic_csharp.md index bf03ce25d8863..504ee9e12c9bb 100644 --- a/docs/tutorials/csharp/basic_csharp.md +++ b/docs/tutorials/csharp/basic_csharp.md @@ -4,19 +4,19 @@ description: Basic usage of C# API parent: Inference with C# grand_parent: Tutorials has_children: false -nav_order: 0 +nav_order: 1 --- # C# Tutorial: Basic -Here is simple tutorial for getting started with running inference on an existing ONNX model for a given input data. -The model is typically trained using any of the well-known training frameworks and exported into the ONNX format. +Here is a simple tutorial for getting started with running inference on an existing ONNX model for a given input data. +The model is typically trained using any of the well-known training frameworks and then exported into the ONNX format. -Note, that the following classed `NamedOnnxValue`, `DisposableNamedOnnxValue`, `FixedBufferOnnxValue` that have been used -for some time are going to be deprecated in the future. They are still supported, but are not recommended for new code. +Note, that the following classed `NamedOnnxValue`, `DisposableNamedOnnxValue`, `FixedBufferOnnxValue` are going +to be deprecated in the future. They are not recommended for new code. -The new `OrtValue` based API is a recommended approach. The `OrtValue` API generates less garbage and is more performant. +The new `OrtValue` based API is the recommended approach. The `OrtValue` API generates less garbage and is more performant. Some scenarios indicated 4x performance improvement over the previous API and significantly less garbage. It provides uniform access to data via `ReadOnlySpan` and `Span` structures, regardless of its location, managed or unmanaged. @@ -34,9 +34,9 @@ As before, `OrtValues` can be created directly on top of the managed `unmanaged` Read MS documentation on `blittable` data types. onnxruntime C# API allows use of managed buffers for input or output. If output shapes are known, one can pre-allocate `OrtValue` on top of the managed or unmanaged allocations and supply -those OrtValues to be used as outputs. +those OrtValues to be used as outputs. Due to this fact, the need for `IOBinding` is greatly diminished. -Character data is represented as UTF-16 string objects in C#. It will still need to be copied and converted to UTF-8 to the native +String data is represented as UTF-16 string objects in C#. It will still need to be copied and converted to UTF-8 to the native memory. However, that conversion is now more optimized and is done in a single pass without intermediate byte arrays. The same applies to string `OrtValue` tensors returned as outputs. Character based API now operates on `Span`, `ReadOnlySpan`, and `ReadOnlyMemory` objects. This adds flexibility to the API and allows to avoid unnecessary copies. From a9d30764e8a4d9df475ead76f52b3296aa5fcb66 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 25 Sep 2023 14:44:46 -0700 Subject: [PATCH 17/18] Adjust page title --- docs/tutorials/csharp/basic_csharp.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/csharp/basic_csharp.md b/docs/tutorials/csharp/basic_csharp.md index 504ee9e12c9bb..24a21e79a3551 100644 --- a/docs/tutorials/csharp/basic_csharp.md +++ b/docs/tutorials/csharp/basic_csharp.md @@ -1,5 +1,5 @@ --- -title: C# Tutorial: Basic +title: Basic C# Tutorial description: Basic usage of C# API parent: Inference with C# grand_parent: Tutorials From 4f03dc24c5ed3964152efd76e7a911c216726d28 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 26 Sep 2023 10:22:59 -0700 Subject: [PATCH 18/18] Fix a typo --- docs/tutorials/csharp/basic_csharp.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/csharp/basic_csharp.md b/docs/tutorials/csharp/basic_csharp.md index 24a21e79a3551..6b8014b8e1e7d 100644 --- a/docs/tutorials/csharp/basic_csharp.md +++ b/docs/tutorials/csharp/basic_csharp.md @@ -13,7 +13,7 @@ nav_order: 1 Here is a simple tutorial for getting started with running inference on an existing ONNX model for a given input data. The model is typically trained using any of the well-known training frameworks and then exported into the ONNX format. -Note, that the following classed `NamedOnnxValue`, `DisposableNamedOnnxValue`, `FixedBufferOnnxValue` are going +Note, that the following classes `NamedOnnxValue`, `DisposableNamedOnnxValue`, `FixedBufferOnnxValue` are going to be deprecated in the future. They are not recommended for new code. The new `OrtValue` based API is the recommended approach. The `OrtValue` API generates less garbage and is more performant.