Merge remote-tracking branch 'origin/dev'

geoschem · Nov 21, 2024 · 0dc30a6 · 0dc30a6
2 parents e64b99b + e4497de
commit 0dc30a6
Show file tree

Hide file tree

Showing 13 changed files with 185 additions and 110 deletions.
diff --git a/src/components/jacobian_component/jacobian.sh b/src/components/jacobian_component/jacobian.sh
@@ -28,13 +28,18 @@ setup_jacobian() {
     # Create directory that will contain all Jacobian run directories
     mkdir -p -v jacobian_runs
 
-    if [ $NumJacobianTracers -gt 1 ]; then
-        nRuns=$(calculate_num_jacobian_runs $NumJacobianTracers $nElements $OptimizeBCs $OptimizeOH $isRegional)
+    if ! "$PrecomputedJacobian"; then
+        if [ $NumJacobianTracers -gt 1 ]; then
+            nRuns=$(calculate_num_jacobian_runs $NumJacobianTracers $nElements $OptimizeBCs $OptimizeOH $isRegional)
 
-        # Determine approx. number of CH4 tracers per Jacobian run
-        printf "\nCombining Jacobian runs: Generating $nRuns run directories with approx. $NumJacobianTracers CH4 tracers (representing state vector elements) per run\n"
-    else
-        nRuns=$nElements
+            # Determine approx. number of CH4 tracers per Jacobian run
+            printf "\nCombining Jacobian runs: Generating $nRuns run directories with approx. $NumJacobianTracers CH4 tracers (representing state vector elements) per run\n"
+        else
+            nRuns=$nElements
+        fi
+    else 
+        # only need to set up the prior run directory
+        nRuns=0
     fi
 
     # Copy run scripts
@@ -459,6 +464,7 @@ run_jacobian() {
         ln -s $precomputedJacobianCache data_converted_reference
 
         # Run the prior simulation
+        JacobianRunsDir=${RunDirs}/jacobian_runs
         cd ${JacobianRunsDir}
 
         # Submit prior simulation to job scheduler

diff --git a/src/components/jacobian_component/make_perturbation_sf.py b/src/components/jacobian_component/make_perturbation_sf.py
@@ -17,7 +17,8 @@
 
 def update_jacobian_perturbation_files(jacobian_dir, state_vector_labels, flat_sf):
     """
-    Update the perturbation files in the jacobian run directories with the new scale factors.
+    Update the perturbation files in the jacobian run directories with the new
+    perturbation scale factors.
     """
     contents = os.listdir(jacobian_dir)
     jacobian_rundirs = [
@@ -27,9 +28,8 @@ def update_jacobian_perturbation_files(jacobian_dir, state_vector_labels, flat_s
     ]
 
     # loop through each jacobian run directory and update the perturbation file
-    # with the new scale factors
+    # with the new perturbation SFs
     for jacobian_rundir in jacobian_rundirs:
-        dir_suffix = jacobian_rundir.split("/")[-1].split("_")[-1]
         perturbation_files = glob.glob(f"{jacobian_rundir}/Perturbations_*.txt")
         for perturbation_file in perturbation_files:
 
@@ -43,7 +43,7 @@ def update_jacobian_perturbation_files(jacobian_dir, state_vector_labels, flat_s
                 sv_element = int(sv_label)
                 sv_idx = sv_element - 1
 
-                # make sure we only apply scale factors to emission elements
+                # make sure we only apply perturbation SFs to emission elements
                 if sv_element <= int(state_vector_labels.max().item()):
                     # add the right amount of padding
                     padding = "".ljust(4 - len(str(sv_element)))
@@ -65,44 +65,65 @@ def update_jacobian_perturbation_files(jacobian_dir, state_vector_labels, flat_s
                         file.writelines(lines)
 
 
-def calculate_sfs(state_vector, emis_prior, target_emission=1e-8, prior_sf=None):
+def calculate_perturbation_sfs(
+    state_vector, emis_prior, target_emission=1e-8, prior_sf=None
+):
     """
-    Calculate the scale factors to perturb each state vector
-    element by based on the target_emission. Return a dictionary containing
-    two flat numpy arrays of perturbation scale factors indexed by state vector
-    element. The first array contains the perturbation scale factors to apply
-    to the state vector elements in jacobian simulations (based on the
-    original prior emissions). The second array contains the effective scale
-    factors (based on the nudged prior emissions for kalman mode) used in the
-    inversion to calculate the sensitivity of observations to the perturbation.
-    For a standalone inversion, the effective scale factors == jacobian scale factors.
+    Calculate the perturbation scale factors to perturb each state vector element based on the
+    target_emission.
+
+    Args:
+        state_vector [xr.Dataset] : the state vector dataset
+        emis_prior   [xr.Dataset] : the prior emissions dataset
+        target_emission [float]   : the target emission value to perturb each state vector element by
+        prior_sf     [xr.Dataset] : the prior scale factor dataset (for kalman mode)
+
+    Returns:
+        dictionary containing two flat numpy arrays of perturbation scale factors
+        indexed by state vector element. The dictionary arrays are as follows:
+
+        jacobian_pert_sf [array]: contains the perturbation scale factors to apply
+                                  to the state vector elements in jacobian simulations
+                                  (based on the original prior emissions). To accomodate
+                                  how perturbations are applied in HEMCO these are relative
+                                  to 1, so a 50% perturbation is represented as 1.5.
+
+        effective_pert_sf [array]: contains the perturbation scalefactors (based on the
+                                   nudged prior emissions for kalman mode) used in the
+                                   inversion to calculate the sensitivity of observations
+                                   to the perturbation. These are relative to 0, so a 50%
+                                   perturbation is represented as 0.5. For a standalone
+                                   inversion, effective_pert_sf + 1 == jacobian_pert_sf.
+
+        target_emission   [float]: the target emission value used to calculate the perturbation
     """
-    # create a sf dataset with the same structure as the state vector
-    sf = state_vector.copy()
-    sf = sf.rename({"StateVector": "ScaleFactor"})
+    # create a dataset with the same structure as the state vector for perturbation SFs
+    pert_sf = state_vector.copy()
+    pert_sf = pert_sf.rename({"StateVector": "ScaleFactor"})
 
-    # Calculate scale factors such that applying them to the original emissions
-    # will result in a target_emission kg/m2/s2 emission.
-    sf["ScaleFactor"] = target_emission / emis_prior["EmisCH4_Total_ExclSoilAbs"]
+    # Calculate perturbation SFs such that applying them to the original
+    # emissions will result in a target_emission kg/m2/s2 emission.
+    pert_sf["ScaleFactor"] = target_emission / emis_prior["EmisCH4_Total_ExclSoilAbs"]
 
     # Extract state vector labels
     state_vector_labels = state_vector["StateVector"]
 
-    # Add state vector labels to sf Dataset
-    sf = sf.assign(StateVector=state_vector_labels)
+    # Add state vector labels to pert sf Dataset
+    pert_sf = pert_sf.assign(StateVector=state_vector_labels)
 
-    # Use groupby to find the median scale factor for each state vector label
-    max_sf_da = sf["ScaleFactor"].groupby(sf["StateVector"]).median()
+    # Use groupby to find the median perturbation scale factor for each state vector label
+    max_pert_sf_da = pert_sf["ScaleFactor"].groupby(pert_sf["StateVector"]).median()
 
     # get flat, numpy array by converting to dataframe and sorting based on
     # state vector element
-    max_sf_df = max_sf_da.to_dataframe().reset_index()
-    max_sf_df = max_sf_df[max_sf_df["StateVector"] > 0].sort_values(by="StateVector")
-    jacobian_pert_sf = max_sf_df["ScaleFactor"].values
+    max_pert_sf_df = max_pert_sf_da.to_dataframe().reset_index()
+    max_pert_sf_df = max_pert_sf_df[max_pert_sf_df["StateVector"] > 0].sort_values(
+        by="StateVector"
+    )
+    jacobian_pert_sf = max_pert_sf_df["ScaleFactor"].values
 
     # Replace any values greater than the threshold to avoid issues
-    # with reaching infinity
-    # Replace any NaN values to 1.0
+    # with reaching infinity. Replace any NaN values with 1.0
     max_sf_threshold = 15000000.0
     jacobian_pert_sf[jacobian_pert_sf > max_sf_threshold] = max_sf_threshold
     jacobian_pert_sf = np.nan_to_num(jacobian_pert_sf, nan=1.0)
@@ -122,10 +143,13 @@ def calculate_sfs(state_vector, emis_prior, target_emission=1e-8, prior_sf=None)
     else:
         flat_prior_sf = np.ones(len(jacobian_pert_sf))
 
-    # calculate the effective scale factors
+    # calculate the effective perturbation SFs
     effective_pert_sf = jacobian_pert_sf / flat_prior_sf
 
-    # return dictionary of scale factor arrays
+    # make the effective perturbations relative to 0, as expected in the inversion
+    effective_pert_sf = effective_pert_sf - 1.0
+
+    # return dictionary of perturbation scale factor arrays
     perturbation_dict = {
         "effective_pert_sf": effective_pert_sf,
         "jacobian_pert_sf": jacobian_pert_sf,
@@ -138,7 +162,7 @@ def calculate_sfs(state_vector, emis_prior, target_emission=1e-8, prior_sf=None)
 def make_perturbation_sf(config, period_number, perturb_value=1e-8):
     """
     Calculate the perturbations for each state vector element and update the perturbation files.
-    Write out an archive of the flat scale factors for later use in sensitivity calculations.
+    Write out an archive of the flat perturbation scale factors for later use in sensitivity calculations.
     Args:
         config            [dict] : dictionary of configuration settings
         period_number      [int] : the period number for which to calculate the perturbations
@@ -174,16 +198,18 @@ def make_perturbation_sf(config, period_number, perturb_value=1e-8):
     # load the state vector dataset
     state_vector = xr.load_dataset(os.path.join(base_directory, "StateVector.nc"))
 
-    # calculate the scale factors to perturb each state vector element by
-    perturbation_dict = calculate_sfs(state_vector, hemco_emis, perturb_value, prior_sf)
+    # calculate the perturbation scale factors we perturb each state vector element by
+    perturbation_dict = calculate_perturbation_sfs(
+        state_vector, hemco_emis, perturb_value, prior_sf
+    )
 
-    # update jacobian perturbation files with new scale factors
+    # update jacobian perturbation files with new perturbation scale factors
     # before we run the jacobian simulations
     update_jacobian_perturbation_files(
         jacobian_dir, state_vector["StateVector"], perturbation_dict["jacobian_pert_sf"]
     )
 
-    # archive npz file of scale factor dictionary for later calculation of sensitivity
+    # archive npz file of perturbation scale factor dictionary for later calculation of sensitivity
     archive_dir = os.path.join(base_directory, "archive_perturbation_sfs")
     os.makedirs(archive_dir, exist_ok=True)
     np.savez(

diff --git a/src/components/posterior_component/posterior.sh b/src/components/posterior_component/posterior.sh
@@ -154,6 +154,16 @@ run_posterior() {
             OHPertPrevLine='DEFAULT    0     1.0'
             OHPertNewLine="N_HEMIS    1     ${oh_sfs[0]}\nS_HEMIS    2     ${oh_sfs[1]}"
             sed -i "/$OHPertPrevLine/a $OHPertNewLine" PerturbationsOH.txt
+
+            # Modify OH scale factor in HEMCO config
+            sed -i -e "s|AnalyticalInversion    :       false|AnalyticalInversion    :       true|g" HEMCO_Config.rc
+            sed -i -e "s| OH_pert_factor  1.0 - - - xy 1 1| OH_pert_factor PerturbationsOH.txt - - - xy 1 1|g" HEMCO_Config.rc
+
+            HcoPrevLineMask='CH4_STATE_VECTOR'
+            HcoNextLineMask='* HEMIS_MASK $ROOT\/MASKS\/v2024-08\/hemisphere_mask.01x01.nc Hemisphere 2000\/1\/1\/0 C xy 1 * - 1 1 
+'
+            sed -i "/${HcoPrevLineMask}/a ${HcoNextLineMask}" HEMCO_Config.rc
+
             printf "OH optimized perturbation values set to:\n"
             printf " ${oh_sfs[0]} for Northern Hemisphere\n"
             printf " ${oh_sfs[1]} for Southern Hemisphere\n"

diff --git a/src/components/setup_component/setup.sh b/src/components/setup_component/setup.sh
@@ -89,7 +89,7 @@ setup_imi() {
         exit 1
     fi
     # Use cropped met for regional simulations instead of using global met
-    if "$isRegional"; then
+    if [ "$RegionID" != "" ]; then
         gridDir="${gridDir}_${RegionID}"
     fi