diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cf5d12ad4..4e6597871 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -613,6 +613,7 @@ test:macos:x64: BATCH: 0 MODEL_SPEED: hac MODEL_VERSION: v4.2.0 + timeout: 1h 30m # Test that you can run dorado in a clean cuda 20.04 environment test_archive:linux:x86:20.04_nvidia: diff --git a/dorado/demux/AdapterDetector.cpp b/dorado/demux/AdapterDetector.cpp index 6988e8b42..bf4bf60ad 100644 --- a/dorado/demux/AdapterDetector.cpp +++ b/dorado/demux/AdapterDetector.cpp @@ -1,7 +1,6 @@ #include "AdapterDetector.h" #include "parse_custom_sequences.h" -#include "utils/alignment_utils.h" #include "utils/parse_custom_kit.h" #include "utils/sequence_utils.h" #include "utils/types.h" @@ -27,7 +26,7 @@ const int PRIMER_TRIM_LENGTH = 150; EdlibAlignConfig init_edlib_config_for_adapters() { EdlibAlignConfig placement_config = edlibDefaultAlignConfig(); placement_config.mode = EDLIB_MODE_HW; - placement_config.task = EDLIB_TASK_PATH; + placement_config.task = EDLIB_TASK_LOC; // Currently none of our adapters or primers have Ns, but we should support them. static const EdlibEqualityPair additionalEqualities[4] = { {'N', 'A'}, {'N', 'T'}, {'N', 'C'}, {'N', 'G'}}; @@ -122,8 +121,13 @@ const std::vector& AdapterDetector::get_primer_sequences static SingleEndResult copy_results(const EdlibAlignResult& source, const std::string& name, size_t length) { - SingleEndResult dest; + SingleEndResult dest{}; dest.name = name; + + if (source.status != EDLIB_STATUS_OK || !source.startLocations || !source.endLocations) { + return dest; + } + dest.score = 1.0f - float(source.editDistance) / length; dest.position = {source.startLocations[0], source.endLocations[0]}; return dest; @@ -219,8 +223,13 @@ AdapterScoreResult AdapterDetector::detect(const std::string& seq, } } } - result.front = front_results[best_front]; - result.rear = rear_results[best_rear]; + + if (best_front != -1) { + result.front = front_results[best_front]; + } + if (best_rear != -1) { + result.rear = rear_results[best_rear]; + } return result; } diff --git a/dorado/demux/Trimmer.cpp b/dorado/demux/Trimmer.cpp index 872ded8a8..f206b2c1b 100644 --- a/dorado/demux/Trimmer.cpp +++ b/dorado/demux/Trimmer.cpp @@ -75,9 +75,9 @@ std::pair Trimmer::determine_trim_interval(const BarcodeScoreResult& r // window. if (trim_interval.second <= trim_interval.first) { if (res.use_top) { - return {res.top_barcode_pos.second, seqlen}; + trim_interval = {res.top_barcode_pos.second, seqlen}; } else { - return {0, res.bottom_barcode_pos.first}; + trim_interval = {0, res.bottom_barcode_pos.first}; } } } else { diff --git a/tests/data/pod5/degenerate/overtrim.pod5 b/tests/data/pod5/degenerate/overtrim.pod5 new file mode 100755 index 000000000..16f06b326 Binary files /dev/null and b/tests/data/pod5/degenerate/overtrim.pod5 differ diff --git a/tests/test_simple_basecaller_execution.sh b/tests/test_simple_basecaller_execution.sh index 4df3e20b8..253a2a8ed 100755 --- a/tests/test_simple_basecaller_execution.sh +++ b/tests/test_simple_basecaller_execution.sh @@ -78,6 +78,9 @@ if $dorado_bin basecaller $model_5k_v43 $data_dir/duplex/pod5 --modified-bases 5 fi set -e +# Check that dorado handles degenerate reads without crashing +$dorado_bin basecaller $model_5k_v43 $data_dir/pod5/degenerate/overtrim.pod5 -b ${batch} --skip-model-compatibility-check --kit-name EXP-NBD196 > $output_dir/error_condition.fq + echo dorado summary test stage $dorado_bin summary $output_dir/calls.bam