From c0338c966b97c69a58ef804c40f497b86ea91694 Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Mon, 18 Mar 2024 15:50:17 -0400 Subject: [PATCH 1/4] Add a num_threads helper argument to pathfinder() --- cmdstanpy/model.py | 16 ++++++++++++++++ test/test_pathfinder.py | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/cmdstanpy/model.py b/cmdstanpy/model.py index 17be3b5e..275f637f 100644 --- a/cmdstanpy/model.py +++ b/cmdstanpy/model.py @@ -1635,6 +1635,7 @@ def pathfinder( refresh: Optional[int] = None, time_fmt: str = "%Y%m%d%H%M%S", timeout: Optional[float] = None, + num_threads: Optional[int] = None, ) -> CmdStanPathfinder: """ Run CmdStan's Pathfinder variational inference algorithm. @@ -1737,6 +1738,10 @@ def pathfinder( :param timeout: Duration at which Pathfinder times out in seconds. Defaults to None. + :param num_threads: Number of threads to request for parallel execution. + A number other than ``1`` requires the model to have been compiled + with STAN_THREADS=True. + :return: A :class:`CmdStanPathfinder` object References @@ -1763,6 +1768,17 @@ def pathfinder( "available for CmdStan versions 2.34 and later" ) + if num_threads is not None: + if ( + num_threads != 1 + and exe_info.get('STAN_THREADS', '').lower() != 'true' + ): + raise ValueError( + "Model must be compiled with 'STAN_THREADS=true' to use" + " 'num_threads' argument" + ) + os.environ['STAN_NUM_THREADS'] = str(num_threads) + if num_paths == 1: if num_single_draws is None: num_single_draws = draws diff --git a/test/test_pathfinder.py b/test/test_pathfinder.py index b8f7c050..3289e485 100644 --- a/test/test_pathfinder.py +++ b/test/test_pathfinder.py @@ -152,3 +152,20 @@ def test_pathfinder_no_lp_calc(): n_lp_nan = np.sum(np.isnan(pathfinder.method_variables()['lp__'])) assert n_lp_nan < 4000 # some lp still calculated during pathfinder assert n_lp_nan > 3000 # but most are not + + +def test_pathfinder_threads(): + stan = DATAFILES_PATH / 'bernoulli.stan' + bern_model = cmdstanpy.CmdStanModel(stan_file=stan) + jdata = str(DATAFILES_PATH / 'bernoulli.data.json') + + bern_model.pathfinder(data=jdata, num_threads=1) + + with pytest.raises(ValueError, match="STAN_THREADS"): + bern_model.pathfinder(data=jdata, num_threads=4) + + bern_model = cmdstanpy.CmdStanModel( + stan_file=stan, cpp_options={'STAN_THREADS': True}, force_compile=True + ) + pathfinder = bern_model.pathfinder(data=jdata, num_threads=4) + assert pathfinder.draws().shape == (1000, 3) From 72e9c5c2a8d4de941a77c9862762f23b6a1ca2f6 Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Mon, 18 Mar 2024 16:12:11 -0400 Subject: [PATCH 2/4] Fix sample test that didn't like threads being enabled --- test/test_sample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_sample.py b/test/test_sample.py index 62f88317..fd6d2f2c 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -55,7 +55,7 @@ ) def test_bernoulli_good(stanfile: str): stan = os.path.join(DATAFILES_PATH, stanfile) - bern_model = CmdStanModel(stan_file=stan) + bern_model = CmdStanModel(stan_file=stan, force_compile=True) jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json') bern_fit = bern_model.sample( @@ -74,6 +74,7 @@ def test_bernoulli_good(stanfile: str): for i in range(bern_fit.runset.chains): csv_file = bern_fit.runset.csv_files[i] + # NB this assumes we're not using threads for chains stdout_file = bern_fit.runset.stdout_files[i] assert os.path.exists(csv_file) assert os.path.exists(stdout_file) From 3164e706c9cc96d25f3f4df91e562e05e34aac65 Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Mon, 25 Mar 2024 14:55:22 -0400 Subject: [PATCH 3/4] Fix: properly allow multiple inits in Pathfinder --- cmdstanpy/cmdstan_args.py | 1 + test/test_pathfinder.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/cmdstanpy/cmdstan_args.py b/cmdstanpy/cmdstan_args.py index 0054fdce..07040d6d 100644 --- a/cmdstanpy/cmdstan_args.py +++ b/cmdstanpy/cmdstan_args.py @@ -930,6 +930,7 @@ def validate(self) -> None: if not ( isinstance(self.method_args, SamplerArgs) and self.method_args.num_chains > 1 + or isinstance(self.method_args, PathfinderArgs) ): if not os.path.exists(self.inits): raise ValueError('no such file {}'.format(self.inits)) diff --git a/test/test_pathfinder.py b/test/test_pathfinder.py index 3289e485..dfbab0b6 100644 --- a/test/test_pathfinder.py +++ b/test/test_pathfinder.py @@ -2,6 +2,8 @@ Tests for the Pathfinder method. """ +import contextlib +from io import StringIO from pathlib import Path import numpy as np @@ -129,6 +131,26 @@ def test_pathfinder_init_sampling(): assert fit.draws().shape == (1000, 4, 9) +def test_inits_for_pathfinder(): + stan = DATAFILES_PATH / 'bernoulli.stan' + bern_model = cmdstanpy.CmdStanModel(stan_file=stan) + jdata = str(DATAFILES_PATH / 'bernoulli.data.json') + bern_model.pathfinder( + jdata, inits=[{"theta": 0.1}, {"theta": 0.9}], num_paths=2 + ) + + # second path is initialized too large! + with contextlib.redirect_stdout(StringIO()) as captured: + bern_model.pathfinder( + jdata, + inits=[{"theta": 0.1}, {"theta": 1.1}], + num_paths=2, + show_console=True, + ) + + assert "Bounded variable is 1.1" in captured.getvalue() + + def test_pathfinder_no_psis(): stan = DATAFILES_PATH / 'bernoulli.stan' bern_model = cmdstanpy.CmdStanModel(stan_file=stan) From 71d22e03174b58b5e48be02ff0905d49703bf67a Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Mon, 25 Mar 2024 16:08:01 -0400 Subject: [PATCH 4/4] Clarify comment --- test/test_sample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_sample.py b/test/test_sample.py index fd6d2f2c..05b204a8 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -74,7 +74,8 @@ def test_bernoulli_good(stanfile: str): for i in range(bern_fit.runset.chains): csv_file = bern_fit.runset.csv_files[i] - # NB this assumes we're not using threads for chains + # NB: This will fail if STAN_THREADS is enabled + # due to sampling only producing 1 stdout file in that case stdout_file = bern_fit.runset.stdout_files[i] assert os.path.exists(csv_file) assert os.path.exists(stdout_file)