Skip to content

Commit

Permalink
Optimize reduce (#1245)
Browse files Browse the repository at this point in the history
* [ntt.x86] Refactor reduce kernel

* [ntt.x86]Add u_reduce

* Apply code-format changes

* [ntt.x86] Optimize u_reduce

* Apply code-format changes

* Update actions

* Remove packed softmax/layernorm packRule

* Support reduce with keepdims=False

* Apply code-format changes

* Avoid infinite recursion for tree_reduce

---------

Co-authored-by: sunnycase <[email protected]>
  • Loading branch information
sunnycase and sunnycase authored Sep 13, 2024
1 parent b72ba28 commit 9fc10cb
Show file tree
Hide file tree
Showing 29 changed files with 621 additions and 1,064 deletions.
6 changes: 3 additions & 3 deletions .github/disable-workflows/compiler-python-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ jobs:
run: python -m cibuildwheel --output-dir wheelhouse

- name: Upload a Build Artifact
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
if: runner.os == 'Windows'
with:
name: nncase-python-windows
path: ${{github.workspace}}/wheelhouse
if-no-files-found: error

- name: Upload a Build Artifact
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
if: runner.os == 'Linux'
with:
name: nncase-python-linux
path: ${{github.workspace}}/wheelhouse
if-no-files-found: error

- name: Upload a Build Artifact
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
if: runner.os == 'Macos'
with:
name: nncase-python-macos
Expand Down
2 changes: 1 addition & 1 deletion .github/disable-workflows/runtime-k210.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
run: cmake --install . --prefix ../install

- name: Upload a Build Artifact
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
with:
name: nncaseruntime-k210
path: ${{github.workspace}}/install
Expand Down
4 changes: 2 additions & 2 deletions .github/disable-workflows/runtime-linux-x64-gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ jobs:
run: ${{github.workspace}}/install/bin/benchnncase > benchnncase.log

- name: Upload a Build Artifact
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
with:
name: nncaseruntime-linux-x64-gcc
path: ${{github.workspace}}/install
if-no-files-found: error

- name: Upload Benchmark Result
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
with:
name: nncasebenchmark-linux-x64-gcc
path: ${{github.workspace}}/benchnncase.log
Expand Down
4 changes: 2 additions & 2 deletions .github/disable-workflows/runtime-macos-x64-appleclang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ jobs:
run: ${{github.workspace}}/install/bin/benchnncase > benchnncase.log

- name: Upload a Build Artifact
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
with:
name: nncaseruntime-macos-x64-appleclang
path: ${{github.workspace}}/install
if-no-files-found: error

- name: Upload Benchmark Result
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
with:
name: nncasebenchmark-macos-x64-appleclang
path: ${{github.workspace}}/benchnncase.log
Expand Down
4 changes: 2 additions & 2 deletions .github/disable-workflows/runtime-win-x64-msvc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ jobs:
run: .\install\bin\benchnncase.exe > benchnncase.log

- name: Upload a Build Artifact
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
with:
name: nncaseruntime-win-x64-msvc
path: ${{github.workspace}}/install
if-no-files-found: error

- name: Upload Benchmark Result
uses: actions/upload-artifact@v2.2.2
uses: actions/upload-artifact@v4
with:
name: nncasebenchmark-win-x64-msvc
path: ${{github.workspace}}/benchnncase.log
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/compiler-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
cmake --install build/${{matrix.config.buildType}} --prefix install
- name: Upload nncase Native Build Artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: nncase-native-${{matrix.config.name}}
path: ${{github.workspace}}/install
Expand Down Expand Up @@ -97,7 +97,7 @@ jobs:
${{ runner.os }}-nuget-
- name: Install nncase native Artifact
uses: actions/download-artifact@v2.0.9
uses: actions/download-artifact@v4
with:
name: nncase-native-${{matrix.config.name}}
path: ${{github.workspace}}/install
Expand Down Expand Up @@ -139,15 +139,15 @@ jobs:
dotnet-coverage merge -o coverage.unit.xml -f cobertura -r coverage/*.xml
- name: Upload Coverage
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
if: matrix.config.name == 'x86_64-linux'
with:
name: nncase-coverage-unit
path: coverage.unit.xml
if-no-files-found: error

- name: Upload nncase Build Artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: nncase-${{matrix.config.name}}
path: ${{github.workspace}}/src/Nncase.Compiler/bin/${{matrix.config.buildType}}/net${{matrix.dotnet-version}}/${{matrix.config.rid}}/publish
Expand Down Expand Up @@ -185,13 +185,13 @@ jobs:
cache-dependency-path: '**/requirements.test.txt'

- name: Install nncase native Artifact
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: nncase-native-${{matrix.config.name}}
path: ${{github.workspace}}/install

- name: Install nncase
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: nncase-${{matrix.config.name}}
path: ${{github.workspace}}/install
Expand Down Expand Up @@ -259,7 +259,7 @@ jobs:
dotnet-coverage merge -o coverage.integration.xml -f cobertura -r coverage/*.xml
- name: Upload Coverage
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
if: matrix.config.name == 'x86_64-linux'
with:
name: nncase-coverage-integration
Expand All @@ -283,13 +283,13 @@ jobs:
dotnet-version: "8.0"

- name: Download Unit Test Coverage
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: nncase-coverage-unit
path: ${{github.workspace}}/coverage

- name: Download Integration Test Coverage
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: nncase-coverage-integration
path: ${{github.workspace}}/coverage
Expand All @@ -314,7 +314,7 @@ jobs:
reportgenerator -reports:coverage.xml -targetdir:"coveragereport" -reporttypes:Html
- name: Upload Coverage Report
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: nncase-coverage-report
path: coveragereport
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/compiler-python-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
dotnet publish src/Nncase.Compiler -c ${{matrix.config.buildType}} --no-restore --sc false -r ${{matrix.config.rid}}
- name: Upload nncase Build Artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: nncase-${{matrix.config.name}}
path: ${{github.workspace}}/src/Nncase.Compiler/bin/${{matrix.config.buildType}}/net${{matrix.dotnet-version}}/${{matrix.config.rid}}/publish
Expand Down Expand Up @@ -69,7 +69,7 @@ jobs:
dotnet-version: ${{matrix.dotnet-version}}

- name: Install nncase
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: nncase-${{matrix.config.name}}
path: ${{github.workspace}}/install
Expand Down Expand Up @@ -97,7 +97,7 @@ jobs:
run: python -m cibuildwheel --output-dir wheelhouse

- name: Upload nncase-python Build Artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: nncase-python-${{matrix.config.name}}
path: ${{github.workspace}}/wheelhouse
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/runtime-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ jobs:
# cat benchnncase.log

- name: Upload nncaseruntime Build Artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: nncaseruntime-${{matrix.config.name}}
path: ${{github.workspace}}/install
if-no-files-found: error

#- name: Upload nncaseruntime Benchmark
# uses: actions/upload-artifact@v3
# uses: actions/upload-artifact@v4
# with:
# name: nncaseruntime-benchmark-${{matrix.config.name}}
# path: ${{github.workspace}}/benchnncase.log
Expand Down Expand Up @@ -134,7 +134,7 @@ jobs:
ctest -C ${{matrix.config.buildType}} --test-dir src/Native/test/ctest --output-on-failure -j4
- name: Upload nncaseruntime Build Artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: nncaseruntime-${{matrix.config.name}}
path: ${{github.workspace}}/install
Expand Down
4 changes: 2 additions & 2 deletions modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ public static string ToC(this PrimType primType) =>
{
ReduceOp.Min => "min",
ReduceOp.Max => "max",
ReduceOp.Sum => "add",
ReduceOp.Sum => "sum",
ReduceOp.Mean => "mean",
ReduceOp.Prod => "mul",
ReduceOp.Prod => "prod",
_ => throw new NotImplementedException(),
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ protected override CSymbol VisitCall(Call expr)
IndentScope.Writer.Write($"pad<{string.Join(",", pad.Paddings)}>({Visit(args[0]).Name}, {Visit(args[1]).Name}, {args[0].CheckedDataType.ToC()} {{ {pad.PadValue} }} );\n");
break;
case TIR.CPU.Reduce reduce:
IndentScope.Writer.Write($"reduce<ops::{reduce.ReduceOp.ToC()}>({Visit(args[0]).Name}, {Visit(args[1]).Name}, fixed_shape<{string.Join(",", reduce.Axis)}>{{}}, fixed_shape<{string.Join(",", reduce.PackedAxes)}>{{}}, fixed_shape<{string.Join(",", reduce.PadedNums)}>{{}});\n");
IndentScope.Writer.Write($"reduce_{reduce.ReduceOp.ToC()}<fixed_shape<{string.Join(",", reduce.Axis)}>, fixed_shape<{string.Join(",", reduce.PackedAxes)}>, fixed_shape<{string.Join(",", reduce.PadedNums)}>>({Visit(args[0]).Name}, {Visit(args[1]).Name});\n");
break;
case TIR.CPU.ReduceArg reduceArg:
IndentScope.Writer.Write($"reduce_arg<ops::{reduceArg.ReduceArgOp.ToC()[4..]}, {reduceArg.Axis}, {reduceArg.SelectLastIndex.ToString().ToLower(System.Globalization.CultureInfo.CurrentCulture)}, {reduceArg.KeepDims.ToString().ToLower(System.Globalization.CultureInfo.CurrentCulture)}>({Visit(args[0]).Name}, {Visit(args[1]).Name}, fixed_shape<>{{}}, fixed_shape<>{{}});\n");
Expand Down
118 changes: 0 additions & 118 deletions modules/Nncase.Modules.CPU/Passes/Rules/CPU/PackRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,52 +35,6 @@ public PackRule(int rank, int lane)
public override Expr? GetReplace(IMatchResult result, RunPassContext options) => throw new NotImplementedException();
}

public class PackSoftmax : PackRule
{
public PackSoftmax(int rank, int lane)
: base(rank, lane)
{
}

public override Pattern Pattern { get; } = IsSoftmax(
"target",
IsWildcard("input") with { TypePattern = IsFloat() },
IsWildcard("axis") with { TypePattern = IsIntegralScalar() });

public override List<Expr> GetReplaceCandidates(IMatchResult result, RunPassContext context)
{
var rets = new List<Expr>();
var input = (Expr)result["input"];
var axis = ((TensorConst)result["axis"]).Value.ToScalar<int>();
var inShape = input.CheckedShape.ToValueArray();

void AddCandidate(int[] packedAxes, int[] lanes)
{
var packed = IR.F.CPU.Pack(PackUtility.PadForPack(input, inShape, packedAxes, lanes, float.NegativeInfinity, out var pads), lanes, packedAxes);
var softmax = IR.F.CPU.PackedSoftmax(packed, axis, packedAxes);
if (softmax.CheckedType is not InvalidType)
{
var post = PackUtility.SliceForPack(IR.F.CPU.Unpack(softmax, lanes, packedAxes), inShape, pads);
rets.Add(post);
}
}

for (int i = 0; i < input.CheckedShape.Count; i++)
{
AddCandidate(new[] { i }, new[] { Lane });
for (int j = i + 1; j < input.CheckedShape.Count; j++)
{
if (Rank > 1)
{
AddCandidate(new[] { i, j }, new[] { Lane, Lane });
}
}
}

return rets;
}
}

public sealed class PackResizeImage : PackRule
{
public PackResizeImage(int rank, int lane)
Expand Down Expand Up @@ -190,78 +144,6 @@ void AddCandidate(int[] packedAxes, int[] lanes)
}
}

public sealed class PackLayerNorm : PackRule
{
public PackLayerNorm(int rank, int lane)
: base(rank, lane)
{
}

public override Pattern Pattern { get; } = IsLayerNorm(
"target",
_ => true,
IsWildcard("input") with { TypePattern = IsFloat() },
IsWildcard("scale") with { TypePattern = IsFloat() },
IsWildcard("bias") with { TypePattern = IsFloat() });

public override List<Expr> GetReplaceCandidates(IMatchResult result, RunPassContext context)
{
var rets = new List<Expr>();
var op = (IR.NN.LayerNorm)result["target"];
var input = (Expr)result["input"];
var scale = (Expr)result["scale"];
var bias = (Expr)result["bias"];
var inShape = input.CheckedShape.ToValueArray();
var pshape = scale.CheckedShape.ToValueArray();

void AddCandidate(int[] packedAxes, int[] lanes)
{
var packedInput = IR.F.CPU.Pack(PackUtility.PadForPack(input, inShape, packedAxes, lanes, 0f, out var padsInput), lanes, packedAxes);

// todo support padings.
if (padsInput.Any(x => x > 0))
{
return;
}

var pAxes = packedAxes.Where(i => i >= op.Axis).Select(i => i - op.Axis).ToArray();
var packedScale = PackUtility.PadForPack(scale, pshape, pAxes, lanes, 0f, out var padsScale);
if (pAxes.Length > 0)
{
packedScale = IR.F.CPU.Pack(packedScale, Enumerable.Repeat(Lane, pAxes.Length).ToArray(), pAxes);
}

var packedBias = PackUtility.PadForPack(bias, pshape, pAxes, lanes, 0f, out var padsBias);
if (pAxes.Length > 0)
{
packedBias = IR.F.CPU.Pack(packedBias, Enumerable.Repeat(Lane, pAxes.Length).ToArray(), pAxes);
}

var layernorm = IR.F.CPU.PackedLayerNorm(packedInput, packedScale, packedBias, op.Axis, op.Epsilon, op.UseMean, packedAxes, padsInput);

if (layernorm.CheckedType is not InvalidType)
{
var post = PackUtility.SliceForPack(IR.F.CPU.Unpack(layernorm, lanes, packedAxes), inShape, padsInput);
rets.Add(post);
}
}

for (int i = 0; i < input.CheckedShape.Count; i++)
{
AddCandidate(new[] { i }, new[] { Lane });
for (int j = i + 1; j < input.CheckedShape.Count; j++)
{
if (Rank > 1)
{
AddCandidate(new[] { i, j }, new[] { Lane, Lane });
}
}
}

return rets;
}
}

public sealed class PackMatMul : PackRule
{
public PackMatMul(int rank, int lane)
Expand Down
2 changes: 0 additions & 2 deletions modules/Nncase.Modules.CPU/Targets/CPUTarget.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ public void RegisterTargetDependentAfterQuantPass(IPassManager passManager, Comp
// todo config it in the target options.
var rank = 1;
var lane = System.Runtime.Intrinsics.Vector256.IsHardwareAccelerated ? 8 : 4;
p.Add<Passes.Rules.CPU.PackSoftmax>(rank, lane);
p.Add<Passes.Rules.CPU.PackSwish>(rank, lane);
p.Add<Passes.Rules.CPU.PackLayerNorm>(rank, lane);
p.Add<Passes.Rules.CPU.PackResizeImage>(rank, lane);
p.Add<Passes.Rules.CPU.PackMatMul>(rank, lane);
p.Add<Passes.Rules.CPU.PackConv2D>(rank, lane);
Expand Down
Loading

0 comments on commit 9fc10cb

Please sign in to comment.