From c0338c966b97c69a58ef804c40f497b86ea91694 Mon Sep 17 00:00:00 2001
From: Brian Ward <bward@flatironinstitute.org>
Date: Mon, 18 Mar 2024 15:50:17 -0400
Subject: [PATCH 1/4] Add a num_threads helper argument to pathfinder()

---
 cmdstanpy/model.py      | 16 ++++++++++++++++
 test/test_pathfinder.py | 17 +++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/cmdstanpy/model.py b/cmdstanpy/model.py
index 17be3b5e..275f637f 100644
--- a/cmdstanpy/model.py
+++ b/cmdstanpy/model.py
@@ -1635,6 +1635,7 @@ def pathfinder(
         refresh: Optional[int] = None,
         time_fmt: str = "%Y%m%d%H%M%S",
         timeout: Optional[float] = None,
+        num_threads: Optional[int] = None,
     ) -> CmdStanPathfinder:
         """
         Run CmdStan's Pathfinder variational inference algorithm.
@@ -1737,6 +1738,10 @@ def pathfinder(
         :param timeout: Duration at which Pathfinder times
             out in seconds. Defaults to None.
 
+        :param num_threads: Number of threads to request for parallel execution.
+            A number other than ``1`` requires the model to have been compiled
+            with STAN_THREADS=True.
+
         :return: A :class:`CmdStanPathfinder` object
 
         References
@@ -1763,6 +1768,17 @@ def pathfinder(
                 "available for CmdStan versions 2.34 and later"
             )
 
+        if num_threads is not None:
+            if (
+                num_threads != 1
+                and exe_info.get('STAN_THREADS', '').lower() != 'true'
+            ):
+                raise ValueError(
+                    "Model must be compiled with 'STAN_THREADS=true' to use"
+                    " 'num_threads' argument"
+                )
+            os.environ['STAN_NUM_THREADS'] = str(num_threads)
+
         if num_paths == 1:
             if num_single_draws is None:
                 num_single_draws = draws
diff --git a/test/test_pathfinder.py b/test/test_pathfinder.py
index b8f7c050..3289e485 100644
--- a/test/test_pathfinder.py
+++ b/test/test_pathfinder.py
@@ -152,3 +152,20 @@ def test_pathfinder_no_lp_calc():
     n_lp_nan = np.sum(np.isnan(pathfinder.method_variables()['lp__']))
     assert n_lp_nan < 4000  # some lp still calculated during pathfinder
     assert n_lp_nan > 3000  # but most are not
+
+
+def test_pathfinder_threads():
+    stan = DATAFILES_PATH / 'bernoulli.stan'
+    bern_model = cmdstanpy.CmdStanModel(stan_file=stan)
+    jdata = str(DATAFILES_PATH / 'bernoulli.data.json')
+
+    bern_model.pathfinder(data=jdata, num_threads=1)
+
+    with pytest.raises(ValueError, match="STAN_THREADS"):
+        bern_model.pathfinder(data=jdata, num_threads=4)
+
+    bern_model = cmdstanpy.CmdStanModel(
+        stan_file=stan, cpp_options={'STAN_THREADS': True}, force_compile=True
+    )
+    pathfinder = bern_model.pathfinder(data=jdata, num_threads=4)
+    assert pathfinder.draws().shape == (1000, 3)

From 72e9c5c2a8d4de941a77c9862762f23b6a1ca2f6 Mon Sep 17 00:00:00 2001
From: Brian Ward <bward@flatironinstitute.org>
Date: Mon, 18 Mar 2024 16:12:11 -0400
Subject: [PATCH 2/4] Fix sample test that didn't like threads being enabled

---
 test/test_sample.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_sample.py b/test/test_sample.py
index 62f88317..fd6d2f2c 100644
--- a/test/test_sample.py
+++ b/test/test_sample.py
@@ -55,7 +55,7 @@
 )
 def test_bernoulli_good(stanfile: str):
     stan = os.path.join(DATAFILES_PATH, stanfile)
-    bern_model = CmdStanModel(stan_file=stan)
+    bern_model = CmdStanModel(stan_file=stan, force_compile=True)
 
     jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
     bern_fit = bern_model.sample(
@@ -74,6 +74,7 @@ def test_bernoulli_good(stanfile: str):
 
     for i in range(bern_fit.runset.chains):
         csv_file = bern_fit.runset.csv_files[i]
+        # NB this assumes we're not using threads for chains
         stdout_file = bern_fit.runset.stdout_files[i]
         assert os.path.exists(csv_file)
         assert os.path.exists(stdout_file)

From 3164e706c9cc96d25f3f4df91e562e05e34aac65 Mon Sep 17 00:00:00 2001
From: Brian Ward <bward@flatironinstitute.org>
Date: Mon, 25 Mar 2024 14:55:22 -0400
Subject: [PATCH 3/4] Fix: properly allow multiple inits in Pathfinder

---
 cmdstanpy/cmdstan_args.py |  1 +
 test/test_pathfinder.py   | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/cmdstanpy/cmdstan_args.py b/cmdstanpy/cmdstan_args.py
index 0054fdce..07040d6d 100644
--- a/cmdstanpy/cmdstan_args.py
+++ b/cmdstanpy/cmdstan_args.py
@@ -930,6 +930,7 @@ def validate(self) -> None:
                 if not (
                     isinstance(self.method_args, SamplerArgs)
                     and self.method_args.num_chains > 1
+                    or isinstance(self.method_args, PathfinderArgs)
                 ):
                     if not os.path.exists(self.inits):
                         raise ValueError('no such file {}'.format(self.inits))
diff --git a/test/test_pathfinder.py b/test/test_pathfinder.py
index 3289e485..dfbab0b6 100644
--- a/test/test_pathfinder.py
+++ b/test/test_pathfinder.py
@@ -2,6 +2,8 @@
     Tests for the Pathfinder method.
 """
 
+import contextlib
+from io import StringIO
 from pathlib import Path
 
 import numpy as np
@@ -129,6 +131,26 @@ def test_pathfinder_init_sampling():
     assert fit.draws().shape == (1000, 4, 9)
 
 
+def test_inits_for_pathfinder():
+    stan = DATAFILES_PATH / 'bernoulli.stan'
+    bern_model = cmdstanpy.CmdStanModel(stan_file=stan)
+    jdata = str(DATAFILES_PATH / 'bernoulli.data.json')
+    bern_model.pathfinder(
+        jdata, inits=[{"theta": 0.1}, {"theta": 0.9}], num_paths=2
+    )
+
+    # second path is initialized too large!
+    with contextlib.redirect_stdout(StringIO()) as captured:
+        bern_model.pathfinder(
+            jdata,
+            inits=[{"theta": 0.1}, {"theta": 1.1}],
+            num_paths=2,
+            show_console=True,
+        )
+
+    assert "Bounded variable is 1.1" in captured.getvalue()
+
+
 def test_pathfinder_no_psis():
     stan = DATAFILES_PATH / 'bernoulli.stan'
     bern_model = cmdstanpy.CmdStanModel(stan_file=stan)

From 71d22e03174b58b5e48be02ff0905d49703bf67a Mon Sep 17 00:00:00 2001
From: Brian Ward <bward@flatironinstitute.org>
Date: Mon, 25 Mar 2024 16:08:01 -0400
Subject: [PATCH 4/4] Clarify comment

---
 test/test_sample.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_sample.py b/test/test_sample.py
index fd6d2f2c..05b204a8 100644
--- a/test/test_sample.py
+++ b/test/test_sample.py
@@ -74,7 +74,8 @@ def test_bernoulli_good(stanfile: str):
 
     for i in range(bern_fit.runset.chains):
         csv_file = bern_fit.runset.csv_files[i]
-        # NB this assumes we're not using threads for chains
+        # NB: This will fail if STAN_THREADS is enabled
+        # due to sampling only producing 1 stdout file in that case
         stdout_file = bern_fit.runset.stdout_files[i]
         assert os.path.exists(csv_file)
         assert os.path.exists(stdout_file)