xmos · panickal-xmos · Sep 12, 2024 · Sep 5, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -18,6 +18,8 @@ def dailyDeviceTest = {
         sh "pytest examples/app_mobilenetv2"
     }
     runPytestDevice("8x8/test_broadcast", "-n 1 --tc 1", "broadcast_1")
+    runPytestDevice("8x8/test_concatenate", "-n 1 --tc 1", "concat_1")
+    runPytestDevice("8x8/test_concatenate", "-n 1 --tc 5", "concat_5")
     runPytestDevice("8x8/test_mean", "-n 1 --tc 1", "mean_1")
     runPytestDevice("8x8/test_lstm", "-n 1 --tc 1", "lstm_1")
     runPytestDevice("8x8/test_lstm", "-n 1", "lstm_5")

diff --git a/integration_tests/models/8x8/test_broadcast/test_broadcast_6.mlir b/integration_tests/models/8x8/test_broadcast/test_broadcast_6.mlir
@@ -0,0 +1,5 @@
+func.func @main(%arg0: tensor<1x127x1x1x!quant.uniform<i8:f32, 0.0078426999971270561:-1>> {tf_saved_model.index_path = ["input_2"]}) -> (tensor<1x127x127x1x!quant.uniform<i8:f32, 0.0078426999971270561:-1>> {tf_saved_model.index_path = ["tf.broadcast_to_1"]}) attributes {tf.entry_function = {inputs = "serving_default_input_2:0", outputs = "PartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<4xi32>, value = dense<[1, 127, 127, 1]> : tensor<4xi32>} : () -> tensor<4xi32>
+  %1 = "tfl.broadcast_to"(%arg0, %0) : (tensor<1x127x1x1x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>, tensor<4xi32>) -> tensor<1x127x127x1x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>
+  return %1 : tensor<1x127x127x1x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>
+}
diff --git a/integration_tests/models/8x8/test_broadcast/test_broadcast_6.tflite b/integration_tests/models/8x8/test_broadcast/test_broadcast_6.tflite
diff --git a/integration_tests/models/8x8/test_broadcast/test_broadcast_7.mlir b/integration_tests/models/8x8/test_broadcast/test_broadcast_7.mlir
@@ -0,0 +1,5 @@
+func.func @main(%arg0: tensor<1x127x1x2x!quant.uniform<i8:f32, 0.0078426999971270561:-1>> {tf_saved_model.index_path = ["input_2"]}) -> (tensor<1x127x127x2x!quant.uniform<i8:f32, 0.0078426999971270561:-1>> {tf_saved_model.index_path = ["tf.broadcast_to_1"]}) attributes {tf.entry_function = {inputs = "serving_default_input_2:0", outputs = "PartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<4xi32>, value = dense<[1, 127, 127, 2]> : tensor<4xi32>} : () -> tensor<4xi32>
+  %1 = "tfl.broadcast_to"(%arg0, %0) : (tensor<1x127x1x2x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>, tensor<4xi32>) -> tensor<1x127x127x2x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>
+  return %1 : tensor<1x127x127x2x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>
+}
diff --git a/integration_tests/models/8x8/test_broadcast/test_broadcast_7.tflite b/integration_tests/models/8x8/test_broadcast/test_broadcast_7.tflite
diff --git a/integration_tests/models/8x8/test_concatenate/generate.py b/integration_tests/models/8x8/test_concatenate/generate.py
@@ -4,31 +4,44 @@
 
 i = 0
 
+
 def generate_concatenate_model(input_shapes, axis):
     dtype = tf.int8
-    input_data = [tf.keras.Input(shape=input_shape, dtype=dtype, batch_size=1) for input_shape in input_shapes]
+    input_data = [
+        tf.keras.Input(shape=input_shape, dtype=dtype, batch_size=1)
+        for input_shape in input_shapes
+    ]
     concatenated_output = tf.concat(input_data, axis=axis)
     model = tf.keras.Model(inputs=input_data, outputs=concatenated_output)
     converter = tfl.TFLiteConverter.from_keras_model(model)
     if dtype == tf.int8 or dtype == tf.int16:
+
         def representative_dataset_gen():
             for _ in range(100):
-                yield [np.random.uniform(low=-127, high=127, size=shp).astype(dtype.as_numpy_dtype) for shp in input_shapes]
+                yield [
+                    np.random.uniform(low=-127, high=127, size=shp).astype(
+                        dtype.as_numpy_dtype
+                    )
+                    for shp in input_shapes
+                ]
+
         converter.optimizations = [tf.lite.Optimize.DEFAULT]
         converter.representative_dataset = representative_dataset_gen
         if dtype == tf.int8:
             converter.target_spec.supported_ops = [tfl.OpsSet.TFLITE_BUILTINS_INT8]
         else:
-            converter.target_spec.supported_ops = [tfl.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
+            converter.target_spec.supported_ops = [
+                tfl.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+            ]
         converter.inference_input_type = dtype
         converter.inference_output_type = dtype
     tflite_model = converter.convert()
     global i
-    model_name = f'test_concatenate_{i}.tflite'
-    i+=1
-    with open(model_name, 'wb') as f:
+    model_name = f"test_concatenate_{i}.tflite"
+    i += 1
+    with open(model_name, "wb") as f:
         f.write(tflite_model)
-    print(f'Model saved: {model_name}')
+    print(f"Model saved: {model_name}")
 
 
 generate_concatenate_model([(64), (64)], 0)  # 0
@@ -42,5 +55,4 @@ def representative_dataset_gen():
 generate_concatenate_model([(2, 6, 5, 2)] * 9, 3)  # 6
 generate_concatenate_model([(2, 6, 5, 2)] * 16, 3)  # 6
 generate_concatenate_model([(2, 6, 5, 2)] * 33, 3)  # 6
-generate_concatenate_model([(2, 6, 5, 2)] * 40, 3)  # 6
-
+generate_concatenate_model([(2, 6, 5, 1)] * 40, 3)  # 6
diff --git a/integration_tests/models/8x8/test_concatenate/test_concatenate_11.tflite b/integration_tests/models/8x8/test_concatenate/test_concatenate_11.tflite
diff --git a/third_party/lib_nn b/third_party/lib_nn
diff --git a/third_party/lib_tflite_micro b/third_party/lib_tflite_micro
+2 −0		lib_nn/api/vpu_memset_256.h
+15 −19		lib_nn/src/asm/dequantize_int16.S
+1 −0		lib_nn/src/asm/mul_elementwise.S
+55 −5		lib_nn/src/c/dequantize_int16.c
+2 −2		lib_nn/src/c/dequantize_int16_transform.c
+39 −7		lib_tflite_micro/src/tflite-xcore-kernels/xcore_broadcast.cc
+77 −11		lib_tflite_micro/src/tflite-xcore-kernels/xcore_concat.cc
+1 −1		repos.list