Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

70 support for rsm plots #78

Merged
merged 22 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies = [
[project.optional-dependencies]
dev = [
"pytest",
"ruff==0.1.8",
"structlog==22.3.0",
]

Expand All @@ -40,6 +41,48 @@ file = "LICENSE"
"Homepage" = "https://github.com/FAIRmat-NFDI/nomad-measurements"
"Bug Tracker" = "https://github.com/FAIRmat-NFDI/nomad-measurements/issues"

[tool.ruff]
include = ["src/*.py", "tests/*.py"]
select = [
"E", # pycodestyle
"W", # pycodestyle
"PL", # pylint
]
ignore = [
"E501", # Line too long ({width} > {limit} characters)
"E701", # Multiple statements on one line (colon)
"E731", # Do not assign a lambda expression, use a def
"E402", # Module level import not at top of file
"PLR0911", # Too many return statements
"PLR0912", # Too many branches
"PLR0913", # Too many arguments in function definition
"PLR0915", # Too many statements
"PLR2004", # Magic value used instead of constant
"PLW0603", # Using the global statement
"PLW2901", # redefined-loop-name
"PLR1714", # consider-using-in
"PLR5501", # else-if-used
]
fixable = ["ALL"]
exclude = ["dependencies"]

# Same as Black.
line-length = 88
indent-width = 4

[tool.ruff.format]
# use single quotes for strings.
quote-style = "single"

# indent with spaces, rather than tabs.
indent-style = "space"

# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"

[tool.setuptools.packages.find]
where = [
"src",
Expand Down
162 changes: 162 additions & 0 deletions src/nomad_measurements/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# limitations under the License.
#
import os.path
import numpy as np
import collections
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -134,3 +136,163 @@ def to_pint_quantity(value: Any=None, unit: str=None) -> Any:
return value
return value.to(unit)
return value * ureg(unit)

def are_all_identical(arr_list):
'''
Check if all the arrays in the list are identical. Also works if the arrays are
pint.Quantity.

Args:
arr_list (list): A list of numpy arrays.

Returns:
bool: True if all the arrays are identical, False otherwise.
'''
first_arr = arr_list[0]
if isinstance(first_arr, ureg.Quantity):
first_arr = first_arr.magnitude

for arr in arr_list[1:]:
if isinstance(arr, ureg.Quantity):
arr = arr.magnitude
if not np.array_equal(first_arr, arr):
return False
return True

def detect_scan_type(scan_data):
'''
Based on the shape of data vectors, decide whether the scan_type is `line` (single
line scan), `multiline` (multiple line scans), or `rsm` (reciprocal space mapping).
For a 2D scan, if the conditions for `rsm` are not met, it is considered a `multiline`
scan.

Args:
scan_data (dict): The X-ray diffraction data in a Python dictionary. Each key is
a list of scan data as pint.Quantity arrays.

Returns:
str: The type of scan.
'''
if len(scan_data['intensity']) == 1:
return 'line'

# if intensity data is not a regular 2D array, it is not `rsm`
for scan_intensity in scan_data['intensity'][1:]:
if scan_intensity.shape != scan_data['intensity'][0].shape:
return 'multiline'

intensity_data = np.array(scan_data['intensity']).squeeze()
if intensity_data.ndim > 2:
raise AssertionError(f'Scan type not detected. `intensity.ndim` must be 1 or 2.\
Found: {intensity_data.ndim}')

if not are_all_identical(scan_data['2Theta']):
return 'multiline'
# find axis that updates from one scan to other
var_axis = []
for key in ['Omega', 'Chi', 'Phi', 'Theta']:
if key not in scan_data:
continue
data = scan_data[key]
if not are_all_identical(data):
var_axis.append(key)
# if only one var_axis
# and dimensions of 2theta, var_axis, and intensity are consistent, it is a rsm
if len(var_axis) == 1:
two_theta = np.array(scan_data['2Theta'])
var_axis_data = np.array(scan_data[var_axis[0]])
if (
intensity_data.shape == two_theta.shape
and intensity_data.shape[0] == np.unique(var_axis_data).shape[0]
):
return 'rsm'
return 'multiline'

def modify_scan_data(scan_data: dict, scan_type: str):
'''
Modifies the scan data based on the scan type:

If the scan type is `line`, the data is converted to 1D arrays.

If the scan type is `rsm`, data is converted into 2D arrays. Reduction of dimensions
is performed wherever possible. Matrix of shape (1,n) is converted to a 1D array of
length `n`. Further, if the vector contains identical elements, it is reduced to a
point vector of size 1. In case the rows of the 2D array are identical, it is reduced
to a 1D array containing the first row. Similar to before, if the elements of this row
are identical, it is reduced to a point vector of size 1.

If the scan type is `multiline`, the data is converted into a list of 1D arrays.
Currently not implemented.

Args:
scan_data (dict): The X-ray diffraction data in a Python dictionary. Each key is
a list of scan data as pint.Quantity arrays.
scan_type (str): The type of scan.

Returns:
dict: scan_data containing same keys but modified values.
'''
ka-sarthak marked this conversation as resolved.
Show resolved Hide resolved
output = collections.defaultdict(lambda: None)

if scan_type not in ['line', 'rsm', 'multiline']:
raise ValueError(f'Invalid scan type: {scan_type}')

if scan_type == 'line':
for key, value in scan_data.items():
if value is None:
continue
data = value[0].magnitude
if np.all(np.diff(data, axis=0) == 0):
# if elements are identical, pick the first one
data = np.array([data[0]])
output[key] = data * value[0].units
return output

elif scan_type == 'multiline':
raise NotImplementedError(f'Scan type {scan_type} is not supported.')

elif scan_type == 'rsm':
for key, value in scan_data.items():
if value is None:
continue
data = np.array(value)
# if it is column vector, make it a row vector
if data.shape[1] == 1:
data = data.reshape(-1)
# if rows (or elements of a row) are identical, pick the first one
if np.all(np.diff(data, axis=0) == 0):
data = data[0].reshape(-1)
output[key] = data * value[0].units
return output

def get_bounding_range_2d(ax1, ax2):
'''
Calculates the range of the smallest rectangular grid that can contain arbitrarily
distributed 2D data.

Args:
ax1 (np.ndarray): array of first axis values
ax2 (np.ndarray): array of second axis values

Returns:
(list, list): ax1_range, ax2_range
'''
ax1_range_length = np.max(ax1) - np.min(ax1)
ax2_range_length = np.max(ax2) - np.min(ax2)

if ax1_range_length > ax2_range_length:
ax1_range = [np.min(ax1),np.max(ax1)]
ax2_mid = np.min(ax2) + ax2_range_length/2
ax2_range = [
ax2_mid-ax1_range_length/2,
ax2_mid+ax1_range_length/2,
]
else:
ax2_range = [np.min(ax2),np.max(ax2)]
ax1_mid = np.min(ax1) + ax1_range_length/2
ax1_range = [
ax1_mid-ax2_range_length/2,
ax1_mid+ax2_range_length/2,
]

return ax1_range, ax2_range
Loading
Loading