-
Notifications
You must be signed in to change notification settings - Fork 0
/
eof_module.py
144 lines (109 loc) · 4.83 KB
/
eof_module.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def compute_eof(data):
import numpy as np
import scipy.linalg as la
"""
Computes Empirical Orthogonal Functions (EOFs) from the given data.
Parameters:
- data (3D array or xarray DataArray): Input data with dimensions
[time, latitude, longitude]
Returns:
- val_prop (1D array): Eigenvalues
- vec_prop (2D array): Eigenvectors
- eof (3D array): Empirical Orthogonal Functions reshaped back to
the original dimensions
- var_exp (1D array): Variance explained by each mode
"""
# Verify if input data is numpy array or xarray object
if not isinstance(data, np.ndarray):
try:
# Try to extract numpy array fromxarray DataArray
data = data.values
except AttributeError:
raise ValueError("Input data should be either a numpy array or '\
'an xarray DataArray.")
# Extract the shape dimensions
ntime, nlat, nlon = data.shape
# Reshape the 3D data into 2D
data_reshape = data.reshape(ntime, nlat*nlon)
# Find positions of NaN values in the reshaped data
idx_nan_array = np.where(np.isnan(data_reshape[0, :]))
# Remove NaN values to create a new matrix without NaN
data_reshape_No_NaN = np.delete(data_reshape, idx_nan_array[0], 1)
# Check for NaN or Inf values in the data
if np.isnan(data_reshape_No_NaN).any() or \
np.isinf(data_reshape_No_NaN).any():
pass
#raise ValueError("Input data contains NaN or Inf values.")
# Calculate the covariance matrix
matriz_cov = np.dot(data_reshape_No_NaN, data_reshape_No_NaN.T)
# Compute eigenvalues and eigenvectors of the covariance matrix
val_prop, vec_prop = la.eig(matriz_cov)
# Calculate the total variance
sum_evals = np.sum(val_prop)
# Calculate the percentage of variance explained by each mode
var_exp = (val_prop / sum_evals) * 100
# Project the eigenvectors onto the data to obtain the EOFs
eof = np.dot(vec_prop.T, data_reshape_No_NaN)
# Initialize a space filled with NaNs to store the EOF information
eof_con_NaN = np.copy(data_reshape)*np.nan
# Identify positions without NaN values
dim_espacio = np.arange(data_reshape.shape[1])
Not_Nan = np.setdiff1d(dim_espacio, idx_nan_array)
# Store the non-NaN EOF information
eof_con_NaN[:, Not_Nan] = eof
# Update the EOF variable
eof = eof_con_NaN
# Reshape the EOFs back to their original 3D shape
eof = eof.reshape(ntime, nlat, nlon)
return val_prop, vec_prop, eof, var_exp
def project_data(data, vec_prop):
"""
Project 3D data onto a subspace defined by a set of eigenvectors.
Parameters:
- data (numpy.ndarray or xarray.DataArray): Input 3D data array with
dimensions (time, latitude, longitude).
- vec_prop (numpy.ndarray): Eigenvectors defining the subspace.
Should have the same number of time steps as the input data.
Returns:
- numpy.ndarray: Projected data in the subspace defined
by the eigenvectors, with the same dimensions as
the input data.
Raises:
- ValueError: If the input data is not a numpy array or an
xarray DataArray.
- ValueError: If the time dimensions of the input data and
eigenvectors do not match.
"""
import numpy as np
# Verify if input data is numpy array or xarray object
if not isinstance(data, np.ndarray):
try:
# Try to extract numpy array fromxarray DataArray
data = data.values
except AttributeError:
raise ValueError("Input data should be either a numpy array or '\
'an xarray DataArray.")
# Extract the shape dimensions
ntime, nlat, nlon = data.shape
if (ntime, ntime) != vec_prop.shape:
raise ValueError(f"Time longitude does not coincide:\n"
f'\tdata_time:{ntime}, {vec_prop.shape}')
# Reshape the 3D data into 2D
data_reshape = data.reshape(ntime, nlat*nlon)
# Find positions of NaN values in the reshaped data
idx_nan_array = np.where(np.isnan(data_reshape[0, :]))
# Remove NaN values to create a new matrix without NaN
data_reshape_No_NaN = np.delete(data_reshape, idx_nan_array[0], 1)
# Calculate the covariance matrix
eof = np.dot(vec_prop.T, data_reshape_No_NaN)
eof_con_NaN = np.copy(data_reshape)*np.nan
# Identify positions without NaN values
dim_espacio = np.arange(data_reshape.shape[1])
Not_Nan = np.setdiff1d(dim_espacio, idx_nan_array)
# Store the non-NaN EOF information
eof_con_NaN[:, Not_Nan] = eof
# Update the EOF variable
eof = eof_con_NaN
# Reshape the EOFs back to their original 3D shape
eof = eof.reshape(ntime, nlat, nlon)
return eof