Skip to content

Commit

Permalink
Added Python module isolation-forest-onnx which can convert an isolat…
Browse files Browse the repository at this point in the history
…ion forest model to ONNX format. (#53)

* Added Python module isolation-forest-onnx which can convert an isolation forest model to ONNX format.

* Added setuptools dev dependency for venv.
  • Loading branch information
jverbus authored Sep 3, 2024
1 parent 73a4a45 commit ae6efe5
Show file tree
Hide file tree
Showing 24 changed files with 72,527 additions and 63 deletions.
56 changes: 53 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,60 @@
.gradle
# Gradle and Build directories
.gradle/
build/

# macOS system files
.DS_Store
.idea
spark-warehouse

# IDE configurations
.idea/
*.ipr
*.iml
*.iws
.vscode/
.sublime-*

# Python artifacts and caches
*.egg
*.egg-info/
*.pyc
*.pyo
__pycache__/
.cache/
.tox*
.venv*
.env
.envrc
.direnv/
.mypy_cache/
pinned.txt
/*/*pinned.txt

# Test-related files
.coverage
TEST-*.xml
coverage.xml
/htmlcov/

# Python build artifacts
dist/
/build/
/*/dist/
/*/build/
/MANIFEST
/*/MANIFEST
/*/activate

# Configuration files
product-spec.json
/*/product-spec.json
config/
/config/external/
/*/config

# Miscellaneous project files
spark-warehouse
version.txt

# Project-specific paths
isolation-forest/bin
isolation-forest-onnx/venv
4 changes: 2 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ Outlier Detection Datasets (ODDS)
This project includes datasets from the Outlier Detection Datasets (ODDS)
repository (http://odds.cs.stonybrook.edu/).

- Dataset at isolation-forest/src/test/resources/mammography.csv
- Dataset at isolation-forest/src/test/resources/shuttle.csv
- Dataset at isolation-forest/src/test/resources/mammography.csv and isolation-forest-onnx/test/resources/mammography.csv
- Dataset at isolation-forest/src/test/resources/shuttle.csv and isolation-forest-onnx/test/resources/shuttle.csv

Your use of these datasets is subject to the citation requirements
outlined here: http://odds.cs.stonybrook.edu/about-odds/
Expand Down
8 changes: 4 additions & 4 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,22 @@ terms than as set forth above. In addition, such third party code may also
depend on and load multiple tiers of dependencies.

========================================================================
Dataset at isolation-forest/src/test/resources/mammography.csv
Dataset at isolation-forest/src/test/resources/mammography.csv and isolation-forest-onnx/test/resources/mammography.csv
========================================================================
Shebuti Rayana (2016). ODDS Library [http://odds.cs.stonybrook.edu].
Stony Brook, NY: Stony Brook University, Department of Computer Science.
Dataset from http://odds.cs.stonybrook.edu/mammography-dataset/
Dataset from https://odds.cs.stonybrook.edu/mammography-dataset/

The original Mammography (Woods et al., 1993) data set was made available by the courtesy of
Aleksandar Lazarevic. This dataset is publicly available in openML (https://www.openml.org/d/310)
under Public Domain Mark 1.0.

========================================================================
Dataset at isolation-forest/src/test/resources/shuttle.csv
Dataset at isolation-forest/src/test/resources/shuttle.csv and isolation-forest-onnx/test/resources/shuttle.csv
========================================================================
Shebuti Rayana (2016). ODDS Library [http://odds.cs.stonybrook.edu].
Stony Brook, NY: Stony Brook University, Department of Computer Science.
Dataset from http://odds.cs.stonybrook.edu/shuttle-dataset/
Dataset from https://odds.cs.stonybrook.edu/shuttle-dataset/

Dua, D. and Graff, C. (2019). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml].
Irvine, CA: University of California, School of Information and Computer Science.
Expand Down
6 changes: 0 additions & 6 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
buildscript {
repositories {
jcenter()
maven {
url "https://plugins.gradle.org/m2/"
}
Expand All @@ -24,11 +23,6 @@ allprojects {
group = "com.linkedin.isolation-forest"

repositories {
jcenter()
mavenCentral()
}
}

task clean(type: Delete) {
delete "build"
}
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
37 changes: 17 additions & 20 deletions gradlew
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
Expand Down Expand Up @@ -82,6 +82,7 @@ esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
Expand Down Expand Up @@ -125,10 +126,11 @@ if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

JAVACMD=`cygpath --unix "$JAVACMD"`

# We build the pattern for arguments to be converted via cygpath
Expand All @@ -154,19 +156,19 @@ if $cygwin ; then
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
i=`expr $i + 1`
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
0) set -- ;;
1) set -- "$args0" ;;
2) set -- "$args0" "$args1" ;;
3) set -- "$args0" "$args1" "$args2" ;;
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi

Expand All @@ -175,14 +177,9 @@ save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi

exec "$JAVACMD" "$@"
27 changes: 8 additions & 19 deletions gradlew.bat
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem http://www.apache.org/licenses/LICENSE-2.0
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
Expand All @@ -29,6 +29,9 @@ if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

Expand All @@ -37,7 +40,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Expand All @@ -51,7 +54,7 @@ goto fail
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto init
if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
Expand All @@ -61,28 +64,14 @@ echo location of your Java installation.

goto fail

:init
@rem Get command-line arguments, handling Windows variants

if not "%OS%" == "Windows_NT" goto win9xME_args

:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2

:win9xME_args_slurp
if "x%~1" == "x" goto execute

set CMD_LINE_ARGS=%*

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
Expand Down
1 change: 1 addition & 0 deletions isolation-forest-onnx/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include version.txt
124 changes: 124 additions & 0 deletions isolation-forest-onnx/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
plugins {
id 'base'
}

def venvDir = file('venv') // Directory for the virtual environment

task createVersionFile {
description = 'Create version.txt file for package version used in setup.py'
doLast {
def versionFile = file("${projectDir}/version.txt")
versionFile.parentFile.mkdirs()
versionFile.text = project.version
}
}

task createVenv(type: Exec) {
description = 'Create a Python virtual environment'
commandLine 'python3', '-m', 'venv', venvDir
}

task installDependencies(type: Exec) {
description = 'Install Python dependencies into the virtual environment'
dependsOn createVenv
commandLine "${venvDir}/bin/pip", 'install', '-r', 'requirements-dev.txt'
}

task buildPythonPackage(type: Exec) {
description = 'Build the Python source distribution and wheel'
dependsOn installDependencies, createVersionFile
commandLine "${venvDir}/bin/python", 'setup.py', 'sdist', 'bdist_wheel'
}

// Task to publish the Python package to PyPI
task publishPythonPackage(type: Exec) {
description = 'Upload the Python package to PyPI'
group = 'upload'

dependsOn buildPythonPackage
environment 'TWINE_USERNAME', '__token__'
environment 'TWINE_PASSWORD', System.getenv('PYPI_TOKEN')
commandLine "${venvDir}/bin/twine", 'upload', 'dist/*'
}

task publishPythonPackageToTestPyPI(type: Exec) {
description = 'Upload the Python package to Test PyPI'
group = 'upload'

dependsOn buildPythonPackage
environment 'TWINE_USERNAME', '__token__'
environment 'TWINE_PASSWORD', System.getenv('TEST_PYPI_TOKEN')
commandLine "${venvDir}/bin/twine", 'upload', '--repository-url', 'https://test.pypi.org/legacy/', 'dist/*'
}

task test(type: Exec) {
description = 'Run Python tests using pytest'
group = 'verification'

dependsOn installDependencies
environment 'PYTHONPATH', "${projectDir}/src"
commandLine "${venvDir}/bin/python", '-m', 'pytest', 'test', '--junitxml=build/reports/tests/TEST-pytest.xml'
inputs.dir file("src")
inputs.dir file("test")
outputs.dir file("build/reports/tests")
}

task runCoverage(type: Exec) {
description = 'Run tests with coverage'
group = 'verification'

dependsOn installDependencies
environment 'PYTHONPATH', "${projectDir}/src"

// Define the coverage report directory
def coverageReportDir = file("${buildDir}/reports/coverage")
def coverageFile = file("${coverageReportDir}/.coverage")
def coverageReportFile = file("${coverageReportDir}/coverage_report.txt")

// Create the directory if it doesn't exist
doFirst {
if (!coverageReportDir.exists()) {
coverageReportDir.mkdirs()
}
}

commandLine 'bash', '-c', ". ${venvDir}/bin/activate && coverage run --data-file=${coverageFile} -m pytest"
doLast {
// Generate the coverage report and output it to the specified file
exec {
commandLine 'bash', '-c', ". ${venvDir}/bin/activate && coverage report --data-file=${coverageFile} > ${coverageReportFile}"
}
// Generate HTML coverage report
exec {
commandLine 'bash', '-c', ". ${venvDir}/bin/activate && coverage html -d ${coverageReportDir}/html_report --data-file=${coverageFile}"
}
}
}

task runFlake8(type: Exec) {
description = 'Run flake8 linting'
group = 'verification'

dependsOn installDependencies
commandLine 'bash', '-c', ". ${venvDir}/bin/activate && flake8"
}

task runMypy(type: Exec) {
description = 'Run mypy type checks'
group = 'verification'

dependsOn installDependencies
commandLine 'bash', '-c', ". ${venvDir}/bin/activate && mypy src"
inputs.dir file("src")
outputs.dir file("build/reports/mypy")
}

clean {
delete 'venv' // Virtual environment for Python
delete 'dist' // Python distribution directory
delete 'version.txt' // Version file used for setup.py
}

// Define task dependencies
check.dependsOn test, runFlake8, runCoverage, runMypy
build.dependsOn buildPythonPackage
Loading

0 comments on commit ae6efe5

Please sign in to comment.