Skip to content

Commit

Permalink
Merge pull request #835 from xmos/yolov8
Browse files Browse the repository at this point in the history
Add yolov8 classification example
  • Loading branch information
panickal-xmos authored Sep 18, 2023
2 parents 43de020 + 386594d commit 215f672
Show file tree
Hide file tree
Showing 9 changed files with 402 additions and 0 deletions.
33 changes: 33 additions & 0 deletions examples/app_yolov8_classification/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
TARGET = XCORE-AI-EXPLORER
APP_NAME =

APP_FLAGS = -report \
-O3 -mcmodel=large -fxscope -Wno-xcore-fptrgroup \
-g \
-lquadflash \
-DSHARED_TENSOR_ARENA

# *******************************************************
# Include AI Tools library and headers
# *******************************************************
# The following variables are defined for AI Tools library
# and should be included when building the app.
# XMOS_AITOOLSLIB_DEFINITIONS
# XMOS_AITOOLSLIB_INCLUDES
# XMOS_AITOOLSLIB_LIBRARIES
#
ifeq ($(XMOS_AITOOLSLIB_PATH),)
$(error Path to XMOS AI Tools library and headers not set correctly!)
endif
include ${XMOS_AITOOLSLIB_PATH}/buildfiles/aitoolslib.make

XCC_FLAGS = $(APP_FLAGS) $(XMOS_AITOOLSLIB_DEFINITIONS) $(XMOS_AITOOLSLIB_INCLUDES)
XCC_CPP_FLAGS = $(APP_FLAGS) -std=c++14 $(XMOS_AITOOLSLIB_DEFINITIONS) $(XMOS_AITOOLSLIB_INCLUDES)
XCC_MAP_FLAGS = $(APP_FLAGS) $(XMOS_AITOOLSLIB_LIBRARIES)

#=============================================================================
# The following part of the Makefile includes the common build infrastructure
# for compiling XMOS applications. You should not need to edit below here.

XMOS_MAKE_PATH ?= ../..
include $(XMOS_MAKE_PATH)/xcommon/module_xcommon/build/Makefile.common
18 changes: 18 additions & 0 deletions examples/app_yolov8_classification/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
MobilenetV2 example with flash
==================

Please consult `here <../../docs/rst/flow.rst>`_ on how to install the tools.

In order to compile and run this example follow these steps::

python obtain_and_build_mobilenetv2.py
xmake
xflash --target XCORE-AI-EXPLORER --data xcore_flash_binary.out
xrun --xscope bin/app_mobilenetv2.xe

In the example, we inference the model with a sample image of a LION.
Running the example should print::

Correct - Inferred class is LION!

The same lion image is saved in raw format as ``lion.bin`` for inference on the host interpreter, and as a header file in ``src/lion.h`` for inference on device.
141 changes: 141 additions & 0 deletions examples/app_yolov8_classification/lion.bin

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from xmos_ai_tools import xformer
from xmos_ai_tools.xinterpreters import xcore_tflm_host_interpreter
from ultralytics import YOLO
import numpy as np

HEIGHT, WIDTH = 160, 160
TFLITE_MODEL_PATH = "yolov8n-cls_saved_model/yolov8n-cls_full_integer_quant.tflite"
OPT_MODEL_PATH = "src/model.tflite"
OPT_PARAMS_PATH = "src/model_flash.params"
NAMING_PREFIX = "model_"

###############################################
# Creating and converting an YoloV8 cls model #
###############################################

# Load a model
model = YOLO('yolov8n-cls.pt') # load an official model

# Export the model
_format = "tflite"

model.export(
format=_format,
imgsz=(HEIGHT, WIDTH),
int8=True
)

# Convert the model to XCore optimized TFLite via xformer:
# There are various ways to configure the compiler to optimize the model,
# operator splitting isn't documented yet. This configuration works well for
# MobileNetV2, reach out if you need assistance with other complex models
xformer.convert(
TFLITE_MODEL_PATH,
OPT_MODEL_PATH,
{
"xcore-flash-image-file": OPT_PARAMS_PATH,
"xcore-thread-count": "5",
"xcore-naming-prefix": NAMING_PREFIX,
},
)

# Generate flash binary
xformer.generate_flash(
output_file="xcore_flash_binary.out",
model_files=[OPT_MODEL_PATH],
param_files=[OPT_PARAMS_PATH],
)

#######################################################################
# Running the model on xcore host interpreter with sample input image #
#######################################################################

# Sample image of a lion (ImageNet class 291)
with open("lion.bin", "rb") as f:
data = f.read()

input_array = np.frombuffer(data, dtype=np.uint8)
# input image values are in the range 0 to 255
# we subtract 128 to change to -128 to 127 for int8
input_array = (input_array - 128).astype(np.int8)


interpreter = xcore_tflm_host_interpreter()
interpreter.set_model(model_path=OPT_MODEL_PATH, params_path=OPT_PARAMS_PATH)
interpreter.allocate_tensors()

# Interpreter.get_input_details and interpreter.get_output_details
# return a list for each input/output in the model
# MobileNetV2 only has a single input and output, so we unwrap it
(input_details,) = interpreter.get_input_details()
(output_details,) = interpreter.get_output_details()

input_data = input_array.astype(input_details["dtype"])
input_data = np.reshape(input_data, input_details["shape"])
interpreter.set_tensor(input_details["index"], input_data)

# Inference
interpreter.invoke()
detections = interpreter.get_tensor(output_details["index"])
print(f"Inferred imagenet class = {detections.argmax()}")
10 changes: 10 additions & 0 deletions examples/app_yolov8_classification/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# python_version 3.8
nvidia-pyindex
ultralytics
onnx
onnxsim>=0.4.33
onnxruntime
onnx2tf>=1.15.4
sng4onnx>=1.0.1
onnx_graphsurgeon>=0.3.26
tflite_support
23 changes: 23 additions & 0 deletions examples/app_yolov8_classification/src/config.xscope
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>

<!-- ======================================================= -->
<!-- The 'ioMode' attribute on the xSCOPEconfig -->
<!-- element can take the following values: -->
<!-- "none", "basic", "timed" -->
<!-- -->
<!-- The 'type' attribute on Probe -->
<!-- elements can take the following values: -->
<!-- "STARTSTOP", "CONTINUOUS", "DISCRETE", "STATEMACHINE" -->
<!-- -->
<!-- The 'datatype' attribute on Probe -->
<!-- elements can take the following values: -->
<!-- "NONE", "UINT", "INT", "FLOAT" -->
<!-- ======================================================= -->

<xSCOPEconfig ioMode="basic" enabled="true">

<!-- For example: -->
<!-- <Probe name="Probe Name" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/> -->
<!-- From the target code, call: xscope_int(PROBE_NAME, value); -->

</xSCOPEconfig>
1 change: 1 addition & 0 deletions examples/app_yolov8_classification/src/lion.h

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions examples/app_yolov8_classification/src/main.xc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include "flash_server.h"
#include "stdio.h"
#include <platform.h>
#include <quadflash.h>
#include <stdint.h>

#define NUMBER_OF_MODELS 1
#define NFLASH_SPECS 1

fl_QuadDeviceSpec flash_spec[NFLASH_SPECS] = {
FL_QUADDEVICE_DEFAULT //FL_QUADDEVICE_MACRONIX_MX25R6435FM2IH0
};

on tile[0]: fl_QSPIPorts qspi = {
PORT_SQI_CS,
PORT_SQI_SCLK,
PORT_SQI_SIO,
XS1_CLKBLK_2
};

extern void model_init(chanend f);
extern void inference();

int main(void) {
chan c_flash[1];

par {
on tile[0] : {
flash_t headers[NUMBER_OF_MODELS];
flash_server(c_flash, headers, NUMBER_OF_MODELS, qspi, flash_spec, 1);
}

on tile[1] : {
unsafe {
c_flash[0] <: FLASH_SERVER_INIT;
model_init(c_flash[0]);

inference();

c_flash[0] <: FLASH_SERVER_QUIT;
}
}
}
return 0;
}
51 changes: 51 additions & 0 deletions examples/app_yolov8_classification/src/support.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#include <platform.h>
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include <xcore/channel.h>
#include "model.tflite.h"
#include <lion.h>

// The sample input image is initialized at the beginning of the tensor arena.
// Before we run inference, the input image is copied to the input tensor
// location in the tensor arena.
// With this optimization, we don't need an extra array to store the input
// image. Sample input image is of a LION and of size 160x160x3 = 76800 bytes
uint8_t tensor_arena[LARGEST_TENSOR_ARENA_SIZE] __attribute__((aligned(8))) =
LION_IMAGE;
#define LION_CLASS 291

void init(unsigned flash_data) { model_init((void *)flash_data); }

void run() {
int8_t *p = model_input(0)->data.int8;
// Copy the input image into input tensor location
// The input image values are between 0 and 255
// Adjust the input image values to be between -128 and 127
for (int i = 0; i < model_input_size(0); ++i) {
p[i] = tensor_arena[i] - 128;
}

model_invoke();

int maxIndex = -1;
int max = -128;
int8_t *out = model_output(0)->data.int8;
for (int i = 0; i < model_output_size(0); ++i) {
if (out[i] > max) {
max = out[i];
maxIndex = i;
}
}
if (maxIndex == LION_CLASS) {
printf("\nCorrect - Inferred class is LION!\n");
} else {
printf("\nIncorrect class!\n");
}
}

extern "C" {
void model_init(unsigned flash_data) { init(flash_data); }

void inference() { run(); }
}

0 comments on commit 215f672

Please sign in to comment.