diff --git a/pyroomacoustics/libroom_src/libroom.cpp b/pyroomacoustics/libroom_src/libroom.cpp index 300a5395..444c6be2 100644 --- a/pyroomacoustics/libroom_src/libroom.cpp +++ b/pyroomacoustics/libroom_src/libroom.cpp @@ -282,10 +282,7 @@ PYBIND11_MODULE(libroom, m) { m.def("dist_line_point", &dist_line_point, "Computes the distance between a point and an infinite line"); - m.def("rir_builder", &rir_builder, "RIR builder", - py::call_guard()); - m.def("delay_sum", &delay_sum, "Delay and sum", - py::call_guard()); - m.def("fractional_delay", &fractional_delay, "Fractional delays", - py::call_guard()); + m.def("rir_builder", &rir_builder, "RIR builder"); + m.def("delay_sum", &delay_sum, "Delay and sum"); + m.def("fractional_delay", &fractional_delay, "Fractional delays"); } diff --git a/pyroomacoustics/libroom_src/rir_builder.cpp b/pyroomacoustics/libroom_src/rir_builder.cpp index 5b599aa3..36cf0821 100644 --- a/pyroomacoustics/libroom_src/rir_builder.cpp +++ b/pyroomacoustics/libroom_src/rir_builder.cpp @@ -44,11 +44,12 @@ constexpr T get_pi() { template void threaded_rir_builder_impl( py::array_t rir, - const py::array_t time, - const py::array_t alpha, + const py::array_t &time, + const py::array_t &alpha, const py::array_t - visibility, + &visibility, int fs, size_t fdl, size_t lut_gran, size_t num_threads) { + auto pi = get_pi(); // accessors for the arrays @@ -110,6 +111,9 @@ void threaded_rir_builder_impl( std::vector rir_out(num_threads * rir_len); size_t block_size = size_t(std::ceil(double(n_times) / double(num_threads))); + // relase the GIL from here on + py::gil_scoped_release release; + // build the RIR ThreadPool pool(num_threads); std::vector> results; @@ -166,9 +170,10 @@ void threaded_rir_builder_impl( for (auto &&result : sum_results) result.get(); } -void rir_builder(py::buffer rir, const py::buffer time, const py::buffer alpha, - const py::buffer visibility, int fs, size_t fdl, +void rir_builder(py::buffer rir, const py::buffer &time, const py::buffer &alpha, + const py::buffer &visibility, int fs, size_t fdl, size_t lut_gran, size_t num_threads) { + // dispatch to correct implementation depending on input type auto buf = pybind11::array::ensure(rir); if (py::isinstance>(buf)) { @@ -214,6 +219,9 @@ void threaded_delay_sum_impl( std::vector out_buffers(num_threads * out_len, 0); size_t block_size = size_t(std::ceil(double(n_irs) / double(num_threads))); + // release the GIL from here on + py::gil_scoped_release release; + // build the RIR ThreadPool pool(num_threads); std::vector> results; @@ -315,6 +323,9 @@ void threaded_fractional_delay_impl( // divide into equal size blocks for thread processing size_t block_size = size_t(std::ceil(double(n_times) / double(num_threads))); + // relase the GIL from here on + py::gil_scoped_release release; + // build the RIR ThreadPool pool(num_threads); std::vector> results; diff --git a/pyroomacoustics/libroom_src/rir_builder.hpp b/pyroomacoustics/libroom_src/rir_builder.hpp index f665123a..f4802a4c 100644 --- a/pyroomacoustics/libroom_src/rir_builder.hpp +++ b/pyroomacoustics/libroom_src/rir_builder.hpp @@ -7,8 +7,8 @@ namespace py = pybind11; -void rir_builder(py::buffer rir, const py::buffer time, const py::buffer alpha, - const py::buffer visibility, int fs, size_t fdl, +void rir_builder(py::buffer rir, const py::buffer &time, const py::buffer &alpha, + const py::buffer &visibility, int fs, size_t fdl, size_t lut_gran, size_t num_threads); void delay_sum(const py::buffer irs, const py::buffer delays, py::buffer output, diff --git a/pyroomacoustics/tests/test_build_rir.py b/pyroomacoustics/tests/test_build_rir.py index b1afc709..c28c7e14 100644 --- a/pyroomacoustics/tests/test_build_rir.py +++ b/pyroomacoustics/tests/test_build_rir.py @@ -285,7 +285,7 @@ def measure_runtime(dtype=np.float32, num_threads=4): td = np.round(tt).astype(np.int32) tf = (tt - td).astype(dtype) irs = np.zeros((tt.shape[0], fdl), dtype=dtype) - fractional_delay(irs, tf, 20, num_threads) + libroom.fractional_delay(irs, tf, 20, num_threads) irs *= alpha[:, None] libroom.delay_sum(irs, td, rir, num_threads) tock_2steps = (time.perf_counter() - tick) / n_repeat