Skip to content

Commit

Permalink
Merge pull request #15 from sandipgiri576/master
Browse files Browse the repository at this point in the history
new descriptor
  • Loading branch information
sandipgiri576 authored Aug 19, 2024
2 parents 3f58f9a + 1e1739c commit 239564b
Show file tree
Hide file tree
Showing 48 changed files with 273 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pyar.egg-info/
test/
22 changes: 22 additions & 0 deletions pyar.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Metadata-Version: 2.1
Name: pyar
Version: 1.0
Summary: A Python Code for Aggregation and Reaction
Home-page: https://github.com/anooplab/pyar
Author: Anoop et al
Author-email: [email protected]
License: GPL v3
Keywords: computational chemistry global minima aggregation automated reaction
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Topic :: Scientific/Engineering :: Chemistry
Requires-Python: >=3.6
License-File: LICENSE

UNKNOWN

109 changes: 109 additions & 0 deletions pyar.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
LICENSE
MANIFEST.in
README.md
setup.py
pyar/Molecule.py
pyar/__init__.py
pyar/aggregator.py
pyar/checkpt.py
pyar/checkvalidity.py
pyar/crawler.py
pyar/file_manager.py
pyar/optimiser.py
pyar/property.py
pyar/reactor.py
pyar/representations.py
pyar/scan.py
pyar/similarity.py
pyar/tabu.py
pyar.egg-info/PKG-INFO
pyar.egg-info/SOURCES.txt
pyar.egg-info/dependency_links.txt
pyar.egg-info/requires.txt
pyar.egg-info/top_level.txt
pyar/AIMNet2/__init__.py
pyar/AIMNet2/calculators/__init__.py
pyar/AIMNet2/calculators/afir_aimnet.py
pyar/AIMNet2/calculators/aimnet2_ase_opt.py
pyar/AIMNet2/calculators/aimnet2ase.py
pyar/AIMNet2/models/__init__.py
pyar/AIMNet2/models/aimnet2_wb97m-d3_0.jpt
pyar/afir/__init__.py
pyar/afir/restraints.py
pyar/data/__init__.py
pyar/data/atomic_data.py
pyar/data/defualt_parameters.py
pyar/data/new_atomic_data.py
pyar/data/units.py
pyar/data_analysis/__init__.py
pyar/data_analysis/clustering.py
pyar/interface/ASE.py
pyar/interface/__init__.py
pyar/interface/aimnet_2.py
pyar/interface/aiqm1_mlatom.py
pyar/interface/ani.py
pyar/interface/babel.py
pyar/interface/gaussian.py
pyar/interface/mlatom_aiqm1.py
pyar/interface/mlopt.py
pyar/interface/mopac.py
pyar/interface/orca.py
pyar/interface/psi4.py
pyar/interface/turbomole.py
pyar/interface/xtb.py
pyar/interface/xtb_aimnet2.py
pyar/interface/xtb_aiqm1.py
pyar/interface/xtbturbo.py
pyar/mlatom/MLTPA.py
pyar/mlatom/ML_NEA.py
pyar/mlatom/MLatom.py
pyar/mlatom/MLtasks.py
pyar/mlatom/__init__.py
pyar/mlatom/aiqm1.py
pyar/mlatom/args_class.py
pyar/mlatom/ccsdtstarcbs.py
pyar/mlatom/composite_methods.py
pyar/mlatom/constants.py
pyar/mlatom/conversions.py
pyar/mlatom/data.py
pyar/mlatom/doc.py
pyar/mlatom/environment_variables.py
pyar/mlatom/header.py
pyar/mlatom/initial_conditions.py
pyar/mlatom/interface_MLatomF.py
pyar/mlatom/kreg_api.py
pyar/mlatom/mlatom_gui.py
pyar/mlatom/models-old.py
pyar/mlatom/models.py
pyar/mlatom/plot.py
pyar/mlatom/shell_cmd.py
pyar/mlatom/simulations.py
pyar/mlatom/sliceData.py
pyar/mlatom/stats.py
pyar/mlatom/stopper.py
pyar/mlatom/thermostat.py
pyar/mlatom/utils.py
pyar/mlatom/xyz.py
pyar/mlatom/aiqm1_model/__init__.py
pyar/mlatom/interfaces/__init__.py
pyar/mlatom/interfaces/ase_interface.py
pyar/mlatom/interfaces/dftd4_interface.py
pyar/mlatom/interfaces/dpmd_interface.py
pyar/mlatom/interfaces/gap_interface.py
pyar/mlatom/interfaces/gaussian_interface.py
pyar/mlatom/interfaces/mndo_interface.py
pyar/mlatom/interfaces/orca_interface.py
pyar/mlatom/interfaces/physnet_interface.py
pyar/mlatom/interfaces/pyscf_interface.py
pyar/mlatom/interfaces/sgdml_interface.py
pyar/mlatom/interfaces/sparrow_interface.py
pyar/mlatom/interfaces/torchani_interface.py
pyar/mlatom/interfaces/xtb_interface.py
pyar/scripts/__init__.py
pyar/scripts/pyar-cli
pyar/scripts/pyar-clustering
pyar/scripts/pyar-descriptor
pyar/scripts/pyar-optimiser
pyar/scripts/pyar-similarity
pyar/scripts/pyar-tabu
test/test-descriptor.py
1 change: 1 addition & 0 deletions pyar.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

10 changes: 10 additions & 0 deletions pyar.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
numpy
scikit-learn
scipy
pandas
matplotlib
pyh5md
h5py
networkx
DBCV@ git+https://github.com/christopherjenness/DBCV.git
dscribe
1 change: 1 addition & 0 deletions pyar.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pyar
Binary file added pyar/AIMNet2/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added pyar/__pycache__/Molecule.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file added pyar/__pycache__/aggregator.cpython-37.pyc
Binary file not shown.
Binary file added pyar/__pycache__/file_manager.cpython-37.pyc
Binary file not shown.
Binary file added pyar/__pycache__/optimiser.cpython-37.pyc
Binary file not shown.
Binary file added pyar/__pycache__/property.cpython-37.pyc
Binary file not shown.
Binary file added pyar/__pycache__/representations.cpython-37.pyc
Binary file not shown.
Binary file added pyar/__pycache__/scan.cpython-37.pyc
Binary file not shown.
Binary file added pyar/__pycache__/similarity.cpython-37.pyc
Binary file not shown.
Binary file added pyar/__pycache__/tabu.cpython-37.pyc
Binary file not shown.
Binary file added pyar/data/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
8 changes: 3 additions & 5 deletions pyar/data_analysis/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def choose_geometries(list_of_molecules, features='mbtr', maximum_number_of_seed
dt_array = np.array(dt)
elif features == 'mbtr':
dt = [pyar.representations.mbtr_descriptor(i.atoms_list, i.coordinates) for i in list_of_molecules]
# dt = [d.get_k2()['element_Z'] for d in dt]
elif features == 'soap':
dt = [pyar.representations.soap_descriptor(i.atoms_list, i.coordinates) for i in list_of_molecules]
dt = [d.todense() for d in dt] # Convert sparse arrays to dense arrays
Expand All @@ -145,9 +146,7 @@ def choose_geometries(list_of_molecules, features='mbtr', maximum_number_of_seed

dt = np.around(dt, decimals=5)

# df = pd.DataFrame(dt)
# df.to_csv("features.csv")
# Assuming 'dt' is a list of LMBTR descriptors for multiple molecules

dt_array = np.array(dt)

# Reshape the array into a 2-dimensional array
Expand All @@ -157,8 +156,7 @@ def choose_geometries(list_of_molecules, features='mbtr', maximum_number_of_seed
df = pd.DataFrame(dt_reshaped)
df.to_csv("features.csv")

# scale_it = RobustScaler()
# dt = scale_it.fit_transform(dt)

scale_it = RobustScaler()
dt = scale_it.fit_transform(dt_reshaped)

Expand Down
Binary file not shown.
Binary file not shown.
Binary file added pyar/interface/__pycache__/xtb.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/constants.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/conversions.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/data.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/environment_variables.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/initial_conditions.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/plot.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/simulations.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/stats.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/stopper.cpython-37.pyc
Binary file not shown.
Binary file modified pyar/mlatom/__pycache__/xyz.cpython-37.pyc
Binary file not shown.
26 changes: 26 additions & 0 deletions pyar/representations.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,33 @@ def mbtr_descriptor(atoms_list, coordinates):

return mbtr_output






# def mbtr_descriptor(atoms_list, coordinates):
# # Create an Atoms object from the atoms_list and coordinates
# molecule = Atoms(atoms_list, positions=coordinates)

# # Get unique species from atoms_list
# unique_species = list(set(atoms_list))

# k2min, k2max, k2n = 0.7, 2.0, 100

# mbtr = MBTR(
# species=unique_species,
# geometry={"function": "distance"},
# grid={"min": k2min, "max": k2max, "n": k2n, "sigma": 0.000000001},
# weighting={"function": "exp", "scale": 0.5, "threshold": 3e-3},
# periodic=False,
# normalization="l2_each",
# )

# # Create MBTR output for the molecule
# mbtr_output = mbtr.create(molecule)

# return mbtr_output


def soap_descriptor(atoms_list, coordinates):
Expand Down
Empty file modified pyar/scripts/__init__.py
100644 → 100755
Empty file.
Empty file modified pyar/scripts/pyar-clustering
100644 → 100755
Empty file.
98 changes: 98 additions & 0 deletions pyar/scripts/pyar-descriptor
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python3

import os
import sys
import numpy as np
import warnings
from ase.io import read, write
from scipy.spatial import ConvexHull
import MDAnalysis as mda
import pandas as pd
import argparse
import glob

# Suppress specific warnings from MDAnalysis
warnings.filterwarnings("ignore", category=UserWarning, module="MDAnalysis.analysis.base")

def calculate_properties(atoms):
# Calculate cluster size (number of atoms)
cluster_size = len(atoms)

# Calculate the convex hull to approximate the cluster volume and surface area
points = atoms.get_positions()
hull = ConvexHull(points)

# Volume and surface area from convex hull
volume = hull.volume
surface_area = hull.area

# Calculate the maximum distance between any two atoms (maximum length)
distances = np.linalg.norm(points[:, np.newaxis, :] - points[np.newaxis, :, :], axis=-1)
max_length = np.max(distances)

# Calculate gyration radius as an additional size measure
rgyr = np.sqrt(np.mean(np.sum((points - np.mean(points, axis=0))**2, axis=1)))

return cluster_size, volume, surface_area, max_length, rgyr

def create_combined_descriptor(properties):
# Normalize the properties
normalized = np.array(properties) / np.sum(properties)

# Create a combined descriptor
combined = np.prod(normalized)

return combined

def main(args):
xyz_files = []
for pattern in args.input_files:
xyz_files.extend(glob.glob(pattern))

if not xyz_files:
print("No XYZ files found.")
sys.exit(1)

data = []
unique_descriptors = {}
unique_atoms = []
duplicate_atoms = []

for filename in xyz_files:
atoms = read(filename)
properties = calculate_properties(atoms)
combined_descriptor = create_combined_descriptor(properties)

# Create .mb file
basename = os.path.splitext(os.path.basename(filename))[0]
mb_filename = f"{basename}.mb"
with open(mb_filename, 'w') as f:
f.write(f"{combined_descriptor}\n")

# Check if this descriptor is unique
if combined_descriptor not in unique_descriptors:
unique_descriptors[combined_descriptor] = filename
data.append([filename] + list(properties) + [combined_descriptor])
unique_atoms.append(atoms)
else:
duplicate_atoms.append(atoms)

# Create a DataFrame and save it as CSV
columns = ["Filename", "Cluster Size", "Volume (ų)", "Surface Area (Ų)", "Maximum Length (Å)", "Radius of Gyration (Å)", "Combined Descriptor"]
df = pd.DataFrame(data, columns=columns)
df.to_csv("unique_cluster_properties.csv", index=False)

# Write trajectory files
write("unique_files.xyz", unique_atoms)
write("duplicate_files.xyz", duplicate_atoms)

print(f"Processed {len(xyz_files)} XYZ files.")
print(f"Found {len(unique_atoms)} unique structures and {len(duplicate_atoms)} duplicates.")
print("Results saved in unique_cluster_properties.csv, unique_files.xyz, and duplicate_files.xyz.")

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Analyze molecular cluster XYZ files.")
parser.add_argument("input_files", metavar='files', type=str, nargs='+',
help='input coordinate files (supports wildcards)')
args = parser.parse_args()
main(args)
Empty file modified pyar/scripts/pyar-optimiser
100644 → 100755
Empty file.
Empty file modified pyar/scripts/pyar-tabu
100644 → 100755
Empty file.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
'pyar/scripts/pyar-tabu',
'pyar/scripts/pyar-clustering',
'pyar/scripts/pyar-similarity',
'pyar/scripts/pyar-descriptor',
'pyar/interface/mlopt.py',
'pyar/AIMNet2/calculators/aimnet2_ase_opt.py'
],
Expand Down

0 comments on commit 239564b

Please sign in to comment.