diff --git a/.github/workflows/build_and_python_import_on_push.yml b/.github/workflows/build_and_python_import_on_push.yml deleted file mode 100644 index b3753c36..00000000 --- a/.github/workflows/build_and_python_import_on_push.yml +++ /dev/null @@ -1,27 +0,0 @@ -on: [ push ] - -name: build and python import - -jobs: - build_and_python_import: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v2 - - name: Install required packages - run: sudo apt-get update && sudo apt-get install -y - git - bash - cmake - make - g++ - python3-dev - python3-distutils - autoconf - build-essential - libjemalloc-dev - - name: Init and update submodules - run: git submodule update --init --recursive - - name: Build odgi - run: cmake -H. -Bbuild && cmake --build build -- -j 2 - - name: Test python import - run: export LD_PRELOAD=/lib/x86_64-linux-gnu/libjemalloc.so.2 && cd lib && ls -l && python3.8 -c 'import odgi; g = odgi.graph()' diff --git a/.gitignore b/.gitignore index ceed94b9..b6ae1c93 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ include/ docs/sphinx_build docs/sphinx_build_man docs/_build +Testing/ diff --git a/CMakeLists.txt b/CMakeLists.txt index d459878c..cc92edf7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,10 +101,10 @@ endif() # Function to invoke doctests function(add_pydoctest TEST_FILE) add_test( - NAME ${TEST_FILE} + NAME pydoctest_${TEST_FILE} COMMAND python3 -m doctest -o NORMALIZE_WHITESPACE -o REPORT_UDIFF python/${TEST_FILE}.md WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test) - set_tests_properties(${TEST_FILE} PROPERTIES ENVIRONMENT "PYTHONPATH=${PROJECT_SOURCE_DIR}/lib;LD_LIBRARY_PATH=$ENV{LIBRARY_PATH};LD_PRELOAD=${PRELOAD}") + set_tests_properties(pydoctest_${TEST_FILE} PROPERTIES ENVIRONMENT "PYTHONPATH=${PROJECT_SOURCE_DIR}/lib;LD_LIBRARY_PATH=$ENV{LIBRARY_PATH};LD_PRELOAD=${PRELOAD}") endfunction() if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # untested @@ -820,6 +820,7 @@ else (NOT PIC) set_target_properties(odgi_ffi PROPERTIES OUTPUT_NAME "odgi_ffi") install(TARGETS odgi_ffi LIBRARY DESTINATION lib) add_pydoctest(odgi_ffi) + add_pydoctest(odgi_performance) # Build original Python module pybind11_add_module(odgi_pybind11 "${CMAKE_SOURCE_DIR}/src/pythonmodule.cpp") @@ -828,6 +829,11 @@ else (NOT PIC) target_link_libraries(odgi_pybind11 PRIVATE "${CMAKE_SOURCE_DIR}/lib/libodgi.a" "${odgi_LIBS}") set_target_properties(odgi_pybind11 PROPERTIES OUTPUT_NAME "odgi") install(TARGETS odgi_pybind11 LIBRARY DESTINATION lib) + add_test( + NAME pythonmodule + COMMAND python3 -c "import odgi; g = odgi.graph()" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test) + set_tests_properties(pythonmodule PROPERTIES ENVIRONMENT "PYTHONPATH=${PROJECT_SOURCE_DIR}/lib;LD_LIBRARY_PATH=$ENV{LIBRARY_PATH};LD_PRELOAD=${PRELOAD}") endif (NOT PIC) diff --git a/guix.scm b/guix.scm index 8eb705a9..758c1a30 100644 --- a/guix.scm +++ b/guix.scm @@ -6,16 +6,16 @@ ;; ;; guix build -f guix.scm --target=aarch64-linux ;; -;; To get a development container (emacs shell will work) +;; To get a development container (inside emacs shell will work) ;; -;; guix shell -C -D -f guix.scm +;; guix shell -C -D -f guix.scm -- bash --init-file <(echo "ln -s /bin/sh /bin/bash") ;; ;; and build ;; ;; find -name CMakeCache.txt|xargs rm -v ;; cd build ;; cmake -DCMAKE_BUILD_TYPE=Debug .. -;; cmake --build . --verbose +;; cmake --build . --verbose -- -j 14 && ctest . --verbose ;; ;; For the tests you may need /usr/bin/env. In a container create it with ;; @@ -89,8 +89,8 @@ ; ("lodepng" ,lodepng) later! ("openmpi" ,openmpi) ("python" ,python) - ; ("sdsl-lite" ,sdsl-lite) - ; ("libdivsufsort" ,libdivsufsort) + ("sdsl-lite" ,sdsl-lite) + ("libdivsufsort" ,libdivsufsort) )) (native-inputs `(("pkg-config" ,pkg-config) @@ -99,6 +99,10 @@ `(#:phases (modify-phases %standard-phases + (add-after 'unpack 'symlink-bash + (lambda _ + (symlinklink "/bin/bash" "/bin/sh") + #t)) ;; This stashes our build version in the executable (add-after 'unpack 'set-version (lambda _ diff --git a/scripts/test_binary.sh b/scripts/test_binary.sh index 3a50552c..bce17f99 100755 --- a/scripts/test_binary.sh +++ b/scripts/test_binary.sh @@ -44,4 +44,4 @@ if [[ $ret -eq 0 ]]; then else echo "[binary_tester] FAILED: At least one binary test for odgi untangle failed." exit 1 -fi \ No newline at end of file +fi diff --git a/test/python/odgi_ffi.md b/test/python/odgi_ffi.md index a9cf8efc..bc354bb8 100644 --- a/test/python/odgi_ffi.md +++ b/test/python/odgi_ffi.md @@ -4,6 +4,7 @@ The odgi toolkit for pangenomics comes with a simple "C" foreign function interface (FFI) that can be used from any computer language. The header file for the C-API can be found [here](https://github.com/pjotrp/odgi/blob/master/src/odgi-api.h). +This C-API is covered by the `odgi-ffi` module. In this document we walk through the low-level API using the Python `odgi_ffi` module that comes with odgi. Note that odgi also has an older high-level Python API `import odgi` that is somewhat obsolete. Instead you should probably use below `import odgi_ffi` lower level API to construct your own library. diff --git a/test/python/odgi_performance.md b/test/python/odgi_performance.md new file mode 100644 index 00000000..5384525d --- /dev/null +++ b/test/python/odgi_performance.md @@ -0,0 +1,61 @@ +% -*- coding: utf-8 -*- + +# ODGI Python performance + +Here we showcase the new `odgi_ffi` module that is faster than the original `odgi` python module. Both are shipped with odgi now. + +Python bindings will always be slower than C++, or Rust, because much low level copying is going on and python keeps track of typing. Still python can be useful as a tool for exploring graphs. With the newer `odgi_ffi` module we using values instead of implicit C references. + +Let's time loading a graph and traversing it 100 times + +```python +>>> import time +>>> from odgi_ffi import * + +>>> graph = odgi_load_graph("DRB1-3123_sorted.og") + +>>> odgi_get_node_count(graph) +3214 + +>>> res = [] +>>> tic = time.perf_counter() +>>> for x in range(1, 100): +... res.append(odgi_for_each_handle(graph, lambda h: odgi_get_sequence(graph,h) and True)) + +>>> toc = time.perf_counter() +>>> print(f"{toc - tic:0.4f} seconds") # doctest: +SKIP +0.7916 seconds + +``` + +This is twice as fast as the older `odgi` module. That would look like + +```python +from odgi import * +import time + +gr = graph() + +gr.load("DRB1-3123_sorted.og") + +gr.get_node_count() + +tic = time.perf_counter() +for x in range(1, 100): + gr.for_each_handle(lambda h: gr.get_sequence(h) and True) +toc = time.perf_counter() +print(f"{toc - tic:0.4f} seconds") + +``` + +Run that in the odgi built source repository with something like + +```sh +cd test +env LD_PRELOAD=libjemalloc.so.2 \ + LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib:../lib \ + PYTHONPATH=../lib python3 performance2.py +1.4674 seconds +``` + +For more see the [odgi_ffi interface](odgi_ffi.md).