diff --git a/README.md b/README.md index ea5f872..725d2f9 100644 --- a/README.md +++ b/README.md @@ -1,86 +1,86 @@ -# ArcaNN XYZ Frame Extractor - -The ArcaNN XYZ Frame Extractor is a command-line tool that processes trajectory files in the XYZ format. It allows you to extract frames from a trajectory file based on specified options and write them to a new trajectory file. - -## Features - -- Extract frames from an XYZ trajectory file -- Specify the frame extraction interval using the `--stride` option -- Skip a certain number of frames from the beginning using the `--skip` option -- Choose the comment line using the `--comment` option: `frame`, `cp2k`, or `cell` -- Provide a CP2K cell file using the `--cell_file` option -- Process large trajectory files efficiently - -## Requirements - -- Python >= 3.6 -- Additional Python dependencies can be installed using the `requirements.txt` file - -## Installation - -1. Download the ArcaNN XYZ Frame Extractor - - Option a. Clone the ArcaNN XYZ Frame Extractor repository: - - ```bash - git clone https://github.com/arcann-chem/xyz_frame_extractor.git - ``` - - Option b. Download the archive by clicking on the green Code button and then download zip - - ```bash - unzip xyz_frame_extractor-main.zip -d xyz_frame_extractor - ``` - -2. Navigate to the project directory: - - ```bash - cd xyz_frame_extractor - ``` - -3. Install the dependencies and the module (please do not forget the dot at the end): - - ```bash - pip install -r requirements.txt . - ``` - -## Usage - -Go to the directory where the trajectory is located or otherwise specify the absolute path of the file of the trajectory, then - -```bash -python -m xyz_frame_extractor input.xyz output.xyz --stride 2 --skip 10 --comment frame --cell_file input.cell -``` - -- `input.xyz` is the name of the input XYZ trajectory file (if not in the directory specify the absolute path) -- `output.xyz` is the name of the output XYZ trajectory file (if needed specify the absolute path where you want to locate your file) -- `--stride` (optional) specifies the frame extraction interval (default: 1). -- `--skip` (optional) specifies the number of frames to skip from the beginning of the trajectory (default: 0). -- `--comment` (optional) sepecifies the comment line (default: frame): frame, cp2k or cell. - - `frame`: the comment in is the format Frame: $i - - `cp2k`: the comment line is in the CP2K format: the input.xyz has to be in the cp2k format too. - - `cell`: used with `--cell_file` (the name of a CP2K cell file) provide the comment as format `ABX xx xy xz yx yy yz zx zy zz`. - -**Note:** The input and output file paths are required parameters, while `--stride`, `--skip`, `--comment` and `--cell_file` are optional. - -## Examples - -1. Extract frames from `input.xyz` with a stride of 2, skipping the first 10 frames: - - ```bash - python -m xyz_frame_extractor input.xyz output.xyz --stride 2 --skip 10 - ``` - -2. Extract frames from `input.xyz` with a stride of 50 without skipping any frames: - - ```bash - python -m xyz_frame_extractor $HOME/inputs/input.xyz $HOME/outputs/output.xyz --stride 50 - ``` - -## License - -Distributed under the GNU Affero General Public License v3.0. See `LICENSE` for more information. - -## Contact - -For any questions or inquiries, please contact the ArcaNN developers group at [https://github.com/arcann-chem](https://github.com/arcann-chem). +# ArcaNN XYZ Frame Extractor + +The ArcaNN XYZ Frame Extractor is a command-line tool that processes trajectory files in the XYZ format. It allows you to extract frames from a trajectory file based on specified options and write them to a new trajectory file. + +## Features + +- Extract frames from an XYZ trajectory file +- Specify the frame extraction interval using the `--stride` option +- Skip a certain number of frames from the beginning using the `--skip` option +- Choose the comment line using the `--comment` option: `frame`, `cp2k`, or `cell` +- Provide a CP2K cell file using the `--cell_file` option +- Process large trajectory files efficiently + +## Requirements + +- Python >= 3.6 +- Additional Python dependencies can be installed using the `requirements.txt` file + +## Installation + +1. Download the ArcaNN XYZ Frame Extractor + + Option a. Clone the ArcaNN XYZ Frame Extractor repository: + + ```bash + git clone https://github.com/arcann-chem/xyz_frame_extractor.git + ``` + + Option b. Download the archive by clicking on the green Code button and then download zip + + ```bash + unzip xyz_frame_extractor-main.zip -d xyz_frame_extractor + ``` + +2. Navigate to the project directory: + + ```bash + cd xyz_frame_extractor + ``` + +3. Install the dependencies and the module (please do not forget the dot at the end): + + ```bash + pip install -r requirements.txt . + ``` + +## Usage + +Go to the directory where the trajectory is located or otherwise specify the absolute path of the file of the trajectory, then + +```bash +python -m xyz_frame_extractor input.xyz output.xyz --stride 2 --skip 10 --comment frame --cell_file input.cell +``` + +- `input.xyz` is the name of the input XYZ trajectory file (if not in the directory specify the absolute path) +- `output.xyz` is the name of the output XYZ trajectory file (if needed, specify the absolute path where you want to locate your file) +- `--stride` (optional) specifies the frame extraction interval (default: 1). +- `--skip` (optional) specifies the number of frames to skip from the beginning of the trajectory (default: 0). +- `--comment` (optional) specifies the comment line (default: frame): frame, cp2k or cell. + - `frame`: the comment in is the format Frame: $i + - `cp2k`: the comment line is in the CP2K format: the input.xyz has to be in the cp2k format too. + - `cell`: used with `--cell_file` (the name of a CP2K cell file) provide the comment as format `ABX xx xy xz yx yy yz zx zy zz`. + +**Note:** The input and output file paths are required parameters, while `--stride`, `--skip`, `--comment` and `--cell_file` are optional. + +## Examples + +1. Extract frames from `input.xyz` with a stride of 2, skipping the first 10 frames: + + ```bash + python -m xyz_frame_extractor input.xyz output.xyz --stride 2 --skip 10 + ``` + +2. Extract frames from `input.xyz` with a stride of 50 without skipping any frames: + + ```bash + python -m xyz_frame_extractor $HOME/inputs/input.xyz $HOME/outputs/output.xyz --stride 50 + ``` + +## License + +Distributed under the GNU Affero General Public License v3.0. See `LICENSE` for more information. + +## Contact + +For any questions or inquiries, please contact the ArcaNN developers group at [https://github.com/arcann-chem](https://github.com/arcann-chem). diff --git a/xyz_frame_extractor/__main__.py b/xyz_frame_extractor/__main__.py index 9ccde1b..2bced6a 100644 --- a/xyz_frame_extractor/__main__.py +++ b/xyz_frame_extractor/__main__.py @@ -46,12 +46,8 @@ choices=["frame", "cell", "cp2k"], help="path to the cell information file (required for 'cell' comment type)", ) -parser.add_argument( - "--cell_file", - type=str, - default="", - help="TODO" -) +parser.add_argument("--cell_file", type=str, default="", help="TODO") + def main(input_file, output_file, frame_stride, skip_frames, comment, cell_file): logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") @@ -61,7 +57,9 @@ def main(input_file, output_file, frame_stride, skip_frames, comment, cell_file) # Check comment type known_comment_types = ["frame", "cell", "cp2k"] if comment not in known_comment_types: - logging.error(f"'{comment}' is not a known comment type. Choose from {', '.join(known_comment_types)}.") + logging.error( + f"'{comment}' is not a known comment type. Choose from {', '.join(known_comment_types)}." + ) return 1 # Process cell file for 'cell' comment type @@ -83,7 +81,13 @@ def main(input_file, output_file, frame_stride, skip_frames, comment, cell_file) # Prompt before overwriting output file if output_xyz.is_file(): while True: - user_input = input(f"File '{output_xyz}' already exists. Delete it (Y) or abort (N)? ").strip().upper() + user_input = ( + input( + f"File '{output_xyz}' already exists. Delete it (Y) or abort (N)? " + ) + .strip() + .upper() + ) if user_input == "Y": output_xyz.unlink() logging.info(f"Deleted '{output_xyz}'.") @@ -94,16 +98,19 @@ def main(input_file, output_file, frame_stride, skip_frames, comment, cell_file) else: logging.warning("Invalid input. Please enter 'Y' or 'N'.") - # Validate stride and skip_frames values if frame_stride <= 0 or skip_frames < 0: - logging.error("Stride should be a positive integer, and skip count should be non-negative.") + logging.error( + "Stride should be a positive integer, and skip count should be non-negative." + ) return 1 num_atoms, atom_symbols, atom_coords, step_infos = read_xyz_trajectory(input_xyz) if frame_stride > num_atoms.size: - logging.error("Stride value cannot be greater than the total number of frames in the trajectory.") + logging.error( + "Stride value cannot be greater than the total number of frames in the trajectory." + ) return 1 if comment_line is None and comment == "cp2k": @@ -113,7 +120,15 @@ def main(input_file, output_file, frame_stride, skip_frames, comment, cell_file) for frame_idx in range(skip_frames, num_atoms.size, frame_stride): if frame_idx >= num_atoms.size: continue - write_xyz_frame(output_xyz, frame_idx, num_atoms, atom_coords, atom_symbols, comment_line, comment=comment) + write_xyz_frame( + output_xyz, + frame_idx, + num_atoms, + atom_coords, + atom_symbols, + comment_line, + comment=comment, + ) num_saved_frames += 1 logging.info("Processing complete without errors.") diff --git a/xyz_frame_extractor/tests/test_xyz.py b/xyz_frame_extractor/tests/test_xyz.py index 80f1275..dc8809f 100644 --- a/xyz_frame_extractor/tests/test_xyz.py +++ b/xyz_frame_extractor/tests/test_xyz.py @@ -137,7 +137,9 @@ def test_read_xyz_trajectory_oneframe(self): ) def test_read_xyz_trajectory(self): - num_atoms, atom_symbols, atom_coords, step_infos = read_xyz_trajectory(self.file_path) + num_atoms, atom_symbols, atom_coords, step_infos = read_xyz_trajectory( + self.file_path + ) self.assertIsInstance(num_atoms, np.ndarray) self.assertIsInstance(atom_symbols, np.ndarray) @@ -277,7 +279,9 @@ def tearDown(self): self.tmp_dir.cleanup() def test_read_write_xyz_trajectory(self): - num_atoms, atom_symbols, atom_coords, step_infos = read_xyz_trajectory(self.file_path) + num_atoms, atom_symbols, atom_coords, step_infos = read_xyz_trajectory( + self.file_path + ) self.assertIsInstance(num_atoms, np.ndarray) self.assertIsInstance(atom_symbols, np.ndarray) @@ -306,7 +310,9 @@ def test_read_write_xyz_trajectory(self): self.file_new_path = Path(self.tmp_dir.name) / "new.xyz" write_xyz_frame(self.file_new_path, 0, num_atoms, atom_coords, atom_symbols) - num_atoms, atom_symbols, atom_coords, step_infos = read_xyz_trajectory(self.file_new_path) + num_atoms, atom_symbols, atom_coords, step_infos = read_xyz_trajectory( + self.file_new_path + ) self.assertIsInstance(num_atoms, np.ndarray) self.assertIsInstance(atom_symbols, np.ndarray) diff --git a/xyz_frame_extractor/xyz.py b/xyz_frame_extractor/xyz.py index c33b6e7..a6ddff8 100644 --- a/xyz_frame_extractor/xyz.py +++ b/xyz_frame_extractor/xyz.py @@ -23,7 +23,9 @@ import numpy as np -def read_xyz_trajectory(file_path: Path) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: +def read_xyz_trajectory( + file_path: Path, +) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ Read an XYZ format trajectory file and return the number of atoms, atomic symbols, and atomic coordinates. @@ -80,10 +82,15 @@ def read_xyz_trajectory(file_path: Path) -> Tuple[np.ndarray, np.ndarray, np.nda # Second line contains the molecule name or comment (optional) comment_line = lines[i + 1].strip() - match = re.search(r"i\s*=\s*(\d+),\s*time\s*=\s*(\d+\.\d+),\s*E\s*=\s*(-?\d+\.\d+)", comment_line) + match = re.search( + r"i\s*=\s*(\d+),\s*time\s*=\s*(\d+\.\d+),\s*E\s*=\s*(-?\d+\.\d+)", + comment_line, + ) if match: - step_info_list.append([int(match.group(1)),float(match.group(2)),float(match.group(3))]) + step_info_list.append( + [int(match.group(1)), float(match.group(2)), float(match.group(3))] + ) # Initialize arrays to store the symbols and coordinates for the current timestep step_atom_symbols = np.zeros((num_atoms,), dtype="