Skip to content

Commit

Permalink
Merge pull request #12 from nKandel/master
Browse files Browse the repository at this point in the history
Tensorflow 2 support added with custom node feature

Reviewed-By: Roger Dev <[email protected]>
Merged-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday authored Sep 15, 2023
2 parents 7a48af7 + 530ff40 commit 41299e5
Show file tree
Hide file tree
Showing 90 changed files with 9,251 additions and 262 deletions.
333 changes: 268 additions & 65 deletions GNNI.ecl

Large diffs are not rendered by default.

336 changes: 161 additions & 175 deletions Internal/Keras.ecl

Large diffs are not rendered by default.

18 changes: 14 additions & 4 deletions Internal/TensExtract.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ IMPORT PYTHON3 as PYTHON;
IMPORT $.^ AS GNN;
IMPORT GNN.Tensor;
IMPORT Std.System.Thorlib;

IMPORT GNN.Utils;
nodeId := Thorlib.node();
nNodes := Thorlib.nodes();

Expand All @@ -21,7 +21,8 @@ MAX_SLICE := POWER(2, 24);
* @see Tensor.AlignTensors
*/
EXPORT DATASET(t_Tensor) TensExtract(DATASET(t_Tensor) tens, UNSIGNED pos,
UNSIGNED datcount) := FUNCTION
UNSIGNED datcount, INTEGER limitNodes=0) := FUNCTION

// Python embed function to do most of the heavy lifting.
STREAMED DATASET(t_Tensor) extract(STREAMED DATASET(t_Tensor) tens,
UNSIGNED pos, UNSIGNED datcount, nodeid, nNodes, maxslice) := EMBED(Python: activity)
Expand Down Expand Up @@ -179,5 +180,14 @@ EXPORT DATASET(t_Tensor) TensExtract(DATASET(t_Tensor) tens, UNSIGNED pos,
# END OF getResults()
return getResults()
ENDEMBED; // Extract
RETURN SORT(extract(tens, pos-1, datcount, nodeId, nNodes, MAX_SLICE), wi, sliceId, LOCAL);
END;

effNodes := Utils.getEffNodesNumber(limitNodes);

extractedData0 := extract(tens, pos-1, datcount, nodeId, nNodes, MAX_SLICE);
extractedDataD := DISTRIBUTE(extractedData0, nodeId % effNodes); // ROUNDUP(Thorlib.nodes() / effNodes)

extractDataD1 := Project(NOCOMBINE(extractedDataD), TRANSFORM(RECORDOF(LEFT), SELF.nodeId:=nodeId, SELF:=LEFT));
extractedData := IF(limitNodes=0, extractedData0, extractDataD1);

RETURN SORT(extractedData, wi, sliceId, LOCAL);
END;
9 changes: 9 additions & 0 deletions OBTTests/ecl/ClassicTestModified.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,13 @@ Test := PROJECT(TestSet, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1
TrainInd := NORMALIZE(Train, featureCount, TRANSFORM(TensData,
SELF.indexes := [LEFT.id, COUNTER],
SELF.value := LEFT.x[COUNTER]));

OUTPUT(TrainInd, NAMED('TrainInd'));
TrainDep := NORMALIZE(Train, 1, TRANSFORM(TensData,
SELF.indexes := [LEFT.id, COUNTER],
SELF.value := LEFT.y));
OUTPUT(TrainDep, NAMED('TrainDep'));


// Form a Tensor from the tensor data. This packs the data into 'slices' that can contain dense
// or sparse portions of the Tensor. If the tensor is small, it will fit into a single slice.
Expand All @@ -108,6 +112,11 @@ TrainDep := NORMALIZE(Train, 1, TRANSFORM(TensData,
TrainIndTensor:= Tensor.R4.MakeTensor([0, featureCount], TrainInd);
TrainDepTensor := Tensor.R4.MakeTensor([0, 1], TrainDep);

OUTPUT(TrainIndTensor, NAMED('TrainIndTensor'));
OUTPUT(TrainDepTensor, NAMED('TrainDepTensor'));



TestInd := NORMALIZE(test, featureCount, TRANSFORM(TensData,
SELF.indexes := [LEFT.id, COUNTER],
SELF.value := LEFT.x[COUNTER]));
Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ using Keras. This includes Classical (Dense) Neural Networks as
well as Convolutional and Recursive Networks (such as LSTM), or any combination
of the above.

GNN currently supports both Tensorflow 1.x and Tensorflow 2.x versions. It also supports the use of
GNN currently supports Tensorflow 2.x versions. It also supports the use of
GPUs in conjunction with Tensorflow, with certain
restrictions in the supported topology. Specifically:
- All servers in a cluster must have the same GPU configuration
Expand Down Expand Up @@ -50,8 +50,11 @@ The folder Test/HARTests
contains tests that show how to create more sophisticated Convolutional and
Recurrent networks.

The folder Test/PretrainedModelTest
contains tests that show how to use the pre-trained models.

## OTHER DOCUMENTATION
Programmer Documentation is available at:
[HPCC Machine Learning Library](http://hpccsystems.com/download/free-modules/machine-learning-library)
A tutorial on installing and running GNN is available at:
[Generalized Neural Network Blog](http://hpccsystems.com/blog/gnn-bundle)
[Generalized Neural Network Blog](http://hpccsystems.com/blog/gnn-bundle)
6 changes: 4 additions & 2 deletions Test/ClassicTest.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ train0 := DATASET(trainCount, TRANSFORM(trainRec,
);
// Be sure to compute Y in a second step. Otherwise, the RANDOM() will be executed twice and the Y will be based
// on different values than those assigned to X. This is an ECL quirk that is not easy to fix.
train := PROJECT(train0, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
train := PROJECT(train0, TRANSFORM(
RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
OUTPUT(train, NAMED('trainData'));

// Build the test data. Same process as the training data.
Expand All @@ -72,7 +73,8 @@ test0 := DATASET(testCount, TRANSFORM(trainRec,
SELF.y := 0)
);

test := PROJECT(test0, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
test := PROJECT(test0, TRANSFORM(
RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));

// Break the training and test data into X (independent) and Y (dependent) data sets. Format as Tensor Data.
trainX0 := NORMALIZE(train, featureCount, TRANSFORM(TensData,
Expand Down
10 changes: 7 additions & 3 deletions Test/ClassificationTest.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ train0 := DATASET(trainCount, TRANSFORM(trainRec,
);
// Be sure to compute Y in a second step. Otherewise, the RANDOM() will be executed twice and the Y will be based
// on different values than those assigned to X. This is an ECL quirk that is not easy to fix.
train := PROJECT(train0, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
train := PROJECT(train0, TRANSFORM(
RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
OUTPUT(train, NAMED('trainData'));

// Build the test data. Same process as the training data.
Expand All @@ -90,7 +91,8 @@ test0 := DATASET(testCount, TRANSFORM(trainRec,
SELF.y := [])
);

test := PROJECT(test0, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
test := PROJECT(test0, TRANSFORM(
RECORDOF(LEFT), SELF.y := targetFunc(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));

// Break the training and test data into X (independent) and Y (dependent) data sets.
// Format as NumericField data.
Expand Down Expand Up @@ -179,4 +181,6 @@ OUTPUT(metrics, NAMED('metrics'));
preds := GNNI.PredictNF(mod2, testX);

OUTPUT(testY, ALL, NAMED('testDat'));
OUTPUT(preds, NAMED('predictions'));
OUTPUT(preds, NAMED('predictions'));

OUTPUT(IF(metrics[2].value>0.95, 'Pass', 'Fail'), NAMED('Accuracy'));
2 changes: 1 addition & 1 deletion Test/ExtractTest.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
/**
* Test the TensorExtract module
*/
IMPORT Python;
IMPORT Python3 as Python;
IMPORT $.^ AS GNN;
IMPORT GNN.Tensor;
IMPORT GNN.Internal.Types AS iTypes;
Expand Down
12 changes: 8 additions & 4 deletions Test/FuncModelTest.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ train0R := DATASET(trainCount, TRANSFORM(trainRecR,
);
// Be sure to compute Y in a second step. Otherwise, the RANDOM() will be executed twice and the Y will be based
// on different values than those assigned to X. This is an ECL quirk that is not easy to fix.
trainR := PROJECT(train0R, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFuncR(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
trainR := PROJECT(train0R, TRANSFORM(
RECORDOF(LEFT), SELF.y := targetFuncR(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
OUTPUT(trainR, NAMED('trainDataR'));

// Build the test data. Same process as the training data.
Expand All @@ -71,7 +72,8 @@ test0R := DATASET(testCount, TRANSFORM(trainRecR,
SELF.y := 0)
);

testR := PROJECT(test0R, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFuncR(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
testR := PROJECT(test0R, TRANSFORM(
RECORDOF(LEFT), SELF.y := targetFuncR(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));

// Break the training and test data into X (independent) and Y (dependent) data sets. Format as Tensor Data.
trainX0R := NORMALIZE(trainR, featureCount, TRANSFORM(TensData,
Expand Down Expand Up @@ -132,7 +134,8 @@ train0C := DATASET(trainCount, TRANSFORM(trainRecC,
);
// Be sure to compute Y in a second step. Otherewise, the RANDOM() will be executed twice and the Y will be based
// on different values than those assigned to X. This is an ECL quirk that is not easy to fix.
trainC := PROJECT(train0C, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFuncC(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
trainC := PROJECT(train0C, TRANSFORM(
RECORDOF(LEFT), SELF.y := targetFuncC(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
OUTPUT(trainC, NAMED('trainDataC'));

// Build the test data. Same process as the training data.
Expand All @@ -146,7 +149,8 @@ test0C := DATASET(testCount, TRANSFORM(trainRecC,
SELF.y := [])
);

testC := PROJECT(test0C, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFuncC(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
testC := PROJECT(test0C, TRANSFORM(
RECORDOF(LEFT), SELF.y := targetFuncC(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));

// Break the training and test data into X (independent) and Y (dependent) data sets.
// Format as NumericField data.
Expand Down
16 changes: 12 additions & 4 deletions Test/MultiModel.ecl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ trainR0 := DATASET(trainCount, TRANSFORM(trainRecR,
);
// Be sure to compute Y in a second step. Otherwise, the RANDOM() will be executed twice and the Y will be based
// on different values than those assigned to X. This is an ECL quirk that is not easy to fix.
trainR := PROJECT(trainR0, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFuncR(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
trainR := PROJECT(trainR0, TRANSFORM(RECORDOF(LEFT),
SELF.y := targetFuncR(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]),
SELF := LEFT));
OUTPUT(trainR, NAMED('trainDataR'));

// Build the test data. Same process as the training data.
Expand All @@ -75,7 +77,9 @@ testR0 := DATASET(testCount, TRANSFORM(trainRecR,
SELF.y := 0)
);

testR := PROJECT(testR0, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFuncR(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
testR := PROJECT(testR0, TRANSFORM(RECORDOF(LEFT),
SELF.y := targetFuncR(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]),
SELF := LEFT));

// Break the training and test data into X (independent) and Y (dependent) data sets. Format as Tensor Data.
trainRX0 := NORMALIZE(trainR, featureCount, TRANSFORM(TensData,
Expand Down Expand Up @@ -210,7 +214,9 @@ trainC0 := DATASET(trainCount, TRANSFORM(trainRecC,
);
// Be sure to compute Y in a second step. Otherewise, the RANDOM() will be executed twice and the Y will be based
// on different values than those assigned to X. This is an ECL quirk that is not easy to fix.
trainC := PROJECT(trainC0, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFuncC(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
trainC := PROJECT(trainC0, TRANSFORM(RECORDOF(LEFT),
SELF.y := targetFuncC(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]),
SELF := LEFT));
OUTPUT(trainC, NAMED('trainData'));

// Build the test data. Same process as the training data.
Expand All @@ -224,7 +230,9 @@ testC0 := DATASET(testCount, TRANSFORM(trainRecC,
SELF.y := [])
);

testC := PROJECT(testC0, TRANSFORM(RECORDOF(LEFT), SELF.y := targetFuncC(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]), SELF := LEFT));
testC := PROJECT(testC0, TRANSFORM(RECORDOF(LEFT),
SELF.y := targetFuncC(LEFT.x[1], LEFT.x[2], LEFT.x[3], LEFT.x[4], LEFT.x[5]),
SELF := LEFT));

// Break the training and test data into X (independent) and Y (dependent) data sets.
// Format as NumericField data.
Expand Down
110 changes: 110 additions & 0 deletions Test/PretrainedModelTest/ConvNeXtBase.ecl
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*##############################################################################
## HPCC SYSTEMS software Copyright (C) 2023 HPCC Systems. All rights reserved.
############################################################################## */
/*
About this test:
Test the usability of Pre-trained Model ConvNeXtBase.
Reference: https://www.tensorflow.org/api_docs/python/tf/keras/applications/convnext/ConvNeXtBase
Input shape = (224, 224, 3)
Note: The outputs of convnext.preprocess_input are integers
Results:
class probability
tusker 9.285942077636719
African_elephant 8.67857837677002
Indian_elephant 3.142804145812988
*/

IMPORT Python3 AS Python;
IMPORT $.^ AS GNN;
IMPORT GNN.GNNI;
IMPORT GNN.Tensor;
IMPORT GNN.Internal AS int;
IMPORT GNN.Internal.Types AS iTypes;
IMPORT Std.System.Thorlib;
IMPORT STD;

kString := iTypes.kString;
kStrType := iTypes.kStrType;
t_Tensor := Tensor.R4.t_Tensor;
TensData := Tensor.R4.TensData;

// load the test data, an image of an elephant
imageRecord := RECORD
STRING filename;
DATA image;
//first 4 bytes contain the length of the image data
UNSIGNED8 RecPos{virtual(fileposition)};
END;

imageData := DATASET('~le::elephant',imageRecord,FLAT);
OUTPUT(imageData, NAMED('elephant'));

result := (STRING)(imageData[1].image);

SET OF INTEGER hexToNparry(DATA byte_array):= EMBED(Python)
from PIL import Image
import numpy as np
import io
try:
import tensorflow as tf # V2.x
except:
assert 1 == 0, 'tensorflow not found'
bytes_data = bytes(byte_array)
image = Image.open(io.BytesIO(bytes_data))
image = image.resize((224,224))
I_array = np.array(image)
I_array = tf.keras.applications.convnext.preprocess_input(I_array)
return I_array.flatten().tolist()
ENDEMBED;

valueRec := RECORD
INTEGER value;
END;

idValueRec := RECORD
UNSIGNED8 id;
INTEGER value;
END;

imageNpArray := hexToNparry(imageData[1].image);
x1 := DATASET(imageNpArray, valueRec);
x2 := PROJECT(x1, TRANSFORM(idValueRec, SELF.id := COUNTER - 1, SELF.value := LEFT.value));
x3 := PROJECT(x2, TRANSFORM(TensData,
SELF.indexes := [1, TRUNCATE(LEFT.id/(224*3)) + 1, TRUNCATE(LEFT.id/3)%224 + 1, LEFT.id%3 + 1],
SELF.value := LEFT.value));
x := Tensor.R4.MakeTensor([0,224,224,3], x3);

// load the model
s := GNNI.GetSession(1);
ldef := ['''applications.convnext.ConvNeXtBase(weights = "imagenet")'''];
mod := GNNI.DefineModel(s, ldef);

// Predict
preds_tens := GNNI.Predict(mod, x);
preds := Tensor.R4.GetData(preds_tens);

predictRes := RECORD
STRING class;
REAL4 probability;
END;

// decode predictions
DATASET(predictRes) decodePredictions(DATASET(TensData) preds, INTEGER topK = 3) := EMBED(Python)
try:
from tensorflow.keras.applications.convnext import decode_predictions
except:
assert 1 == 0, 'tensorflow not found'
import numpy as np
predsNp = np.zeros((1, 1000))
for pred in preds:
predsNp[0, pred[0][1]-1] = pred[1]
res = decode_predictions(predsNp, top=topK)[0]
ret = []
for i in range(topK):
ret.append((res[i][1], res[i][2]))
return ret
ENDEMBED;

OUTPUT(decodePredictions(preds), NAMED('predictions'));
Loading

0 comments on commit 41299e5

Please sign in to comment.