From 18343c20f7f7a34948ff39e7fdc07a50a9ecf32d Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Sat, 20 Apr 2024 20:31:29 -0600
Subject: [PATCH] Client gets threads_per_worker=1

This reduces run time of TPI on 64 physical core AMD workstation from
~12 hours to ~3 hours. It looks like the default of Dask is to
oversubscribe threads to cores. This is not a good default for
computationally intensive workloads.
---
 examples/run_og_usa.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/run_og_usa.py b/examples/run_og_usa.py
index 97d21d7e..8606b1c6 100644
--- a/examples/run_og_usa.py
+++ b/examples/run_og_usa.py
@@ -15,7 +15,7 @@
 def main():
     # Define parameters to use for multiprocessing
     num_workers = min(multiprocessing.cpu_count(), 7)
-    client = Client(n_workers=num_workers)
+    client = Client(n_workers=num_workers, threads_per_worker=1)
     print("Number of workers = ", num_workers)
 
     # Directories to save data
@@ -50,7 +50,7 @@ def main():
     # close and delete client bc cache is too large
     client.close()
     del client
-    client = Client(n_workers=num_workers)
+    client = Client(n_workers=num_workers, threads_per_worker=1)
     d = c.get_dict()
     # # additional parameters to change
     updated_params = {
@@ -107,7 +107,7 @@ def main():
     # close and delete client bc cache is too large
     client.close()
     del client
-    client = Client(n_workers=num_workers)
+    client = Client(n_workers=num_workers, threads_per_worker=1)
     # update tax function parameters in Specifications Object
     d = c2.get_dict()
     # # additional parameters to change