From 911ce561b3bfdddc62e04d0d64397878caab7d88 Mon Sep 17 00:00:00 2001 From: "T.Tian" Date: Wed, 16 Oct 2024 23:12:45 +0800 Subject: [PATCH] update joss paper and 1 figure --- paper/fig/fig_socket_hetero.svg | 1493 +++++++++++++++++++++++++++++++ paper/paper.bib | 307 +++++++ paper/paper.md | 477 +++++++--- 3 files changed, 2136 insertions(+), 141 deletions(-) create mode 100644 paper/fig/fig_socket_hetero.svg diff --git a/paper/fig/fig_socket_hetero.svg b/paper/fig/fig_socket_hetero.svg new file mode 100644 index 00000000..031001f9 --- /dev/null +++ b/paper/fig/fig_socket_hetero.svg @@ -0,0 +1,1493 @@ + + + + + + + + + + + + + + + ++ + + + + + + + + + + + + + + + + + + + + + + + +.inpt.out.ion.staticHardwareSoftwareINET socketserver:31415SPARC protocol(extending i-PI )(No file access)Server Node (AWS instance, GPU node, etc)Native i-PI Protocol/tmp/ipi_sparc_*Local file I/OComputational Nodes (CPU-intensive)SPARC C-binarympirun -np 256 sparc \ -n 8 -c 32 \ -name $fname \ -socket $sname:unixSPARC-X-API (Socket Client)Pickle dataExtra-info parsingE, F, SSPARCCalculatorSPARC-X-API (Socket Server)• ML models• Sampling algorithmsPickle dataSPARCCalculatorTraining Inferencehead nodenode groupsUNIX socketClient 1Client N...... diff --git a/paper/paper.bib b/paper/paper.bib index 7a59ed1f..c498a005 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -317,3 +317,310 @@ @article{mortensen_gpaw_2_2024 url = {http://dx.doi.org/10.1063/5.0182685}, publisher = {AIP Publishing} } +@article{tancogne_dejean_octopus_2020, + author = {Tancogne-Dejean, Nicolas and Oliveira, Micael + J. T. and Andrade, Xavier and Appel, Heiko and + Borca, Carlos H. and Le Breton, Guillaume and + Buchholz, Florian and Castro, Alberto and Corni, + Stefano and Correa, Alfredo A. and De Giovannini, + Umberto and Delgado, Alain and Eich, Florian G. and + Flick, Johannes and Gil, Gabriel and Gomez, Adrián + and Helbig, Nicole and Hübener, Hannes and Jestädt, + René and Jornet-Somoza, Joaquim and Larsen, Ask + H. and Lebedeva, Irina V. and Lüders, Martin and + Marques, Miguel A. L. and Ohlmann, Sebastian T. and + Pipolo, Silvio and Rampp, Markus and Rozzi, Carlo + A. and Strubbe, David A. and Sato, Shunsuke A. and + Schäfer, Christian and Theophilou, Iris and Welden, + Alicia and Rubio, Angel}, + title = {Octopus, a computational framework for exploring + light-driven phenomena and quantum dynamics in + extended and finite systems}, + journal = {The Journal of Chemical Physics}, + year = 2020, + volume = 152, + number = 12, + month = mar, + issn = {1089-7690}, + doi = {10.1063/1.5142502}, + url = {http://dx.doi.org/10.1063/1.5142502}, + publisher = {AIP Publishing} +} +@article{kresse_vasp_1996, + author = {Kresse, G. and Furthmüller, J.}, + title = {Efficiency of ab-initio total energy calculations + for metals and semiconductors using a plane-wave + basis set}, + journal = {Computational Materials Science}, + year = 1996, + volume = 6, + number = 1, + month = jul, + pages = {15–50}, + issn = {0927-0256}, + doi = {10.1016/0927-0256(96)00008-0}, + url = {http://dx.doi.org/10.1016/0927-0256(96)00008-0}, + publisher = {Elsevier BV} +} +@article{giannozzi_qe_2017, + author = {Giannozzi, P and Andreussi, O and Brumme, T and + Bunau, O and Buongiorno Nardelli, M and Calandra, M + and Car, R and Cavazzoni, C and Ceresoli, D and + Cococcioni, M and Colonna, N and Carnimeo, I and Dal + Corso, A and de Gironcoli, S and Delugas, P and + DiStasio, R A and Ferretti, A and Floris, A and + Fratesi, G and Fugallo, G and Gebauer, R and + Gerstmann, U and Giustino, F and Gorni, T and Jia, J + and Kawamura, M and Ko, H-Y and Kokalj, A and + Küçükbenli, E and Lazzeri, M and Marsili, M and + Marzari, N and Mauri, F and Nguyen, N L and Nguyen, + H-V and Otero-de-la-Roza, A and Paulatto, L and + Poncé, S and Rocca, D and Sabatini, R and Santra, B + and Schlipf, M and Seitsonen, A P and Smogunov, A + and Timrov, I and Thonhauser, T and Umari, P and + Vast, N and Wu, X and Baroni, S}, + title = {Advanced capabilities for materials modelling with + Quantum ESPRESSO}, + journal = {Journal of Physics: Condensed Matter}, + year = 2017, + volume = 29, + number = 46, + month = oct, + pages = 465901, + issn = {1361-648X}, + doi = {10.1088/1361-648x/aa8f79}, + url = {http://dx.doi.org/10.1088/1361-648X/aa8f79}, + publisher = {IOP Publishing} +} +@article{gonze_abinit_2020, + author = {Gonze, Xavier and Amadon, Bernard and Antonius, + Gabriel and Arnardi, Frédéric and Baguet, Lucas and + Beuken, Jean-Michel and Bieder, Jordan and Bottin, + François and Bouchet, Johann and Bousquet, Eric and + Brouwer, Nils and Bruneval, Fabien and Brunin, + Guillaume and Cavignac, Théo and Charraud, + Jean-Baptiste and Chen, Wei and Côté, Michel and + Cottenier, Stefaan and Denier, Jules and Geneste, + Grégory and Ghosez, Philippe and Giantomassi, Matteo + and Gillet, Yannick and Gingras, Olivier and Hamann, + Donald R. and Hautier, Geoffroy and He, Xu and + Helbig, Nicole and Holzwarth, Natalie and Jia, + Yongchao and Jollet, François and + Lafargue-Dit-Hauret, William and Lejaeghere, Kurt + and Marques, Miguel A.L. and Martin, Alexandre and + Martins, Cyril and Miranda, Henrique P.C. and + Naccarato, Francesco and Persson, Kristin and + Petretto, Guido and Planes, Valentin and Pouillon, + Yann and Prokhorenko, Sergei and Ricci, Fabio and + Rignanese, Gian-Marco and Romero, Aldo H. and + Schmitt, Michael Marcus and Torrent, Marc and van + Setten, Michiel J. and Van Troeye, Benoit and + Verstraete, Matthieu J. and Zérah, Gilles and + Zwanziger, Josef W.}, + title = {The Abinitproject: Impact, environment and recent + developments}, + journal = {Computer Physics Communications}, + year = 2020, + volume = 248, + month = mar, + pages = 107042, + issn = {0010-4655}, + doi = {10.1016/j.cpc.2019.107042}, + url = {http://dx.doi.org/10.1016/j.cpc.2019.107042}, + publisher = {Elsevier BV} +} + +@article{suryanarayana_sparc_sq_2018, + author = {Suryanarayana, Phanish and Pratapa, Phanisri P. and + Sharma, Abhiraj and Pask, John E.}, + title = {SQDFT: Spectral Quadrature method for large-scale + parallel O(N) Kohn–Sham calculations at high + temperature}, + journal = {Computer Physics Communications}, + year = 2018, + volume = 224, + month = mar, + pages = {288–298}, + issn = {0010-4655}, + doi = {10.1016/j.cpc.2017.12.003}, + url = {http://dx.doi.org/10.1016/j.cpc.2017.12.003}, + publisher = {Elsevier BV} +} + +@article{sharma_sparc_cyclix_2021, + author = {Sharma, Abhiraj and Suryanarayana, Phanish}, + title = {Real-space density functional theory adapted to + cyclic and helical symmetry: Application to + torsional deformation of carbon nanotubes}, + journal = {Physical Review B}, + year = 2021, + volume = 103, + number = 3, + month = jan, + issn = {2469-9969}, + doi = {10.1103/physrevb.103.035101}, + url = {http://dx.doi.org/10.1103/PhysRevB.103.035101}, + publisher = {American Physical Society (APS)} +} + +@article{sharma_sparc_dfpt_2023, + author = {Sharma, Abhiraj and Suryanarayana, Phanish}, + title = {Calculation of phonons in real-space density + functional theory}, + journal = {Physical Review E}, + year = 2023, + volume = 108, + number = 4, + month = oct, + issn = {2470-0053}, + doi = {10.1103/physreve.108.045302}, + url = {http://dx.doi.org/10.1103/PhysRevE.108.045302}, + publisher = {American Physical Society (APS)} +} + +@article{ghosh_sparc_ofdft_2016, + author = {Ghosh, Swarnava and Suryanarayana, Phanish}, + title = {Higher-order finite-difference formulation of + periodic Orbital-free Density Functional Theory}, + journal = {Journal of Computational Physics}, + year = 2016, + volume = 307, + month = feb, + pages = {634–652}, + issn = {0021-9991}, + doi = {10.1016/j.jcp.2015.12.027}, + url = {http://dx.doi.org/10.1016/j.jcp.2015.12.027}, + publisher = {Elsevier BV} +} + +@article{kumar_ofdft_delta_ml_2023, + author = {Kumar, Shashikant and Jing, Xin and Pask, John + E. and Medford, Andrew J. and Suryanarayana, + Phanish}, + title = {Kohn–Sham accuracy from orbital-free density + functional theory via Δ-machine learning}, + journal = {The Journal of Chemical Physics}, + year = 2023, + volume = 159, + number = 24, + month = dec, + issn = {1089-7690}, + doi = {10.1063/5.0180541}, + url = {http://dx.doi.org/10.1063/5.0180541}, + publisher = {AIP Publishing} +} + +@article{timmerman_sparc_mlff_2024, + author = {Timmerman, Lucas R. and Kumar, Shashikant and + Suryanarayana, Phanish and Medford, Andrew J.}, + title = {Overcoming the Chemical Complexity Bottleneck in + on-the-Fly Machine Learned Molecular Dynamics + Simulations}, + journal = {Journal of Chemical Theory and Computation}, + year = 2024, + volume = 20, + number = 14, + month = jul, + pages = {5788–5795}, + issn = {1549-9626}, + doi = {10.1021/acs.jctc.4c00474}, + url = {http://dx.doi.org/10.1021/acs.jctc.4c00474}, + publisher = {American Chemical Society (ACS)} +} + +@article{kumar_sparc_mlff_2024, + author = {Kumar, Shashikant and Pask, John E. and + Suryanarayana, Phanish}, + title = {Shock Hugoniot calculations using on-the-fly machine + learned force fields with ab initio accuracy}, + journal = {Physics of Plasmas}, + year = 2024, + volume = 31, + number = 10, + month = oct, + issn = {1089-7674}, + doi = {10.1063/5.0230060}, + url = {http://dx.doi.org/10.1063/5.0230060}, + publisher = {AIP Publishing} +} + +@article{goedecker_order_n_dft_1999, + author = {Goedecker, Stefan}, + title = {Linear scaling electronic structure methods}, + journal = {Reviews of Modern Physics}, + year = 1999, + volume = 71, + number = 4, + month = jul, + pages = {1085–1123}, + issn = {1539-0756}, + doi = {10.1103/revmodphys.71.1085}, + url = {http://dx.doi.org/10.1103/revmodphys.71.1085}, + publisher = {American Physical Society (APS)} +} + + + +@article{bowler_order_n_dft_2012, + author = {Bowler, D R and Miyazaki, T}, + title = {O(N) methods in electronic structure + calculations}, + journal = {Reports on Progress in Physics}, + year = 2012, + volume = 75, + number = 3, + month = feb, + pages = 036503, + issn = {1361-6633}, + doi = {10.1088/0034-4885/75/3/036503}, + url = {http://dx.doi.org/10.1088/0034-4885/75/3/036503}, + publisher = {IOP Publishing} +} + + +@article{shojaei_sparc_pseudopot_2023, + author = {Shojaei, Mostafa Faghih and Pask, John E. and + Medford, Andrew J. and Suryanarayana, Phanish}, + title = {Soft and transferable pseudopotentials from + multi-objective optimization}, + journal = {Computer Physics Communications}, + year = 2023, + volume = 283, + month = feb, + pages = 108594, + issn = {0010-4655}, + doi = {10.1016/j.cpc.2022.108594}, + url = {http://dx.doi.org/10.1016/j.cpc.2022.108594}, + publisher = {Elsevier BV} +} + +@article{smith_psi4_2020, + author = {Smith, Daniel G. A. and Burns, Lori A. and + Simmonett, Andrew C. and Parrish, Robert M. and + Schieber, Matthew C. and Galvelis, Raimondas and + Kraus, Peter and Kruse, Holger and Di Remigio, + Roberto and Alenaizan, Asem and James, Andrew M. and + Lehtola, Susi and Misiewicz, Jonathon P. and + Scheurer, Maximilian and Shaw, Robert A. and + Schriber, Jeffrey B. and Xie, Yi and Glick, Zachary + L. and Sirianni, Dominic A. and O’Brien, Joseph + Senan and Waldrop, Jonathan M. and Kumar, Ashutosh + and Hohenstein, Edward G. and Pritchard, Benjamin + P. and Brooks, Bernard R. and Schaefer, Henry F. and + Sokolov, Alexander Yu. and Patkowski, Konrad and + DePrince, A. Eugene and Bozkaya, Uğur and King, + Rollin A. and Evangelista, Francesco A. and Turney, + Justin M. and Crawford, T. Daniel and Sherrill, + C. David}, + title = {PSI4 1.4: Open-source software for + high-throughput quantum chemistry}, + journal = {The Journal of Chemical Physics}, + year = 2020, + volume = 152, + number = 18, + month = may, + issn = {1089-7690}, + doi = {10.1063/5.0006002}, + url = {http://dx.doi.org/10.1063/5.0006002}, + publisher = {AIP Publishing} +} diff --git a/paper/paper.md b/paper/paper.md index 6483f263..0eb21d18 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -8,13 +8,14 @@ tags: - Socket Interface authors: - name: Tian Tian - orcid: - affiliation: [1,2] + orcid: 0000-0003-0634-0455 + affiliation: 1 - name: Lucas Timmerman orcid: affiliation: 1 - name: Shashikant Kumar - affiliation: 2 + orcid: 0009-0001-5134-1580 + affiliation: 1 - name: Ben Comer orcid: 0000-0002-7528-0049 affiliation: 1 @@ -25,14 +26,12 @@ authors: - name: Phanish Suryanarayana orcid: 0000-0001-5172-0049 corresponding: true - affiliation: [2, 3] + affiliation: [1, 2] affiliations: - - name: School of Civil and Environmental Engineering, Georgia Institute of Technology + - name: College of Engineering, Georgia Institute of Technology, Atlanta, GA 30332, USA index: 1 - - name: School of Chemical and Biomolecular Engineering, Georgia Institute of Technology + - name: College of Computing, Georgia Institute of Technology, Atlanta, GA 30332, USA index: 2 - - name: School of Computational Science and Engineering, Georgia Institute of Technology - index: 3 date: 24 September 2024 bibliography: paper.bib @@ -41,152 +40,348 @@ bibliography: paper.bib # Summary Density Functional Theory (DFT) is the de facto gold standard for -electronic structure calculations in chemistry and materials -science. While plane-wave DFT remains the most widely used, real-space -DFT provides advantages in handling complex boundary conditions and -scaling to very large systems. The SPARC-X project -(https://github.com/SPARC-X) has pioneered highly efficient real-space -DFT codes available in both Matlab [@xu_m-sparc-1.0_2020; -@zhang_m-sparc-2.0_2023] and C [@xu_sparc-1.0_2021; -@zhang_sparc-2.0_2024]. However, the specific input formats for SPARC -have often made it challenging for users accustomed to plane-wave DFT -to transition to real-space methods. To address this, we introduce -SPARC-X-API, a Python interface designed to bridge the SPARC-X project -with broader computational frameworks. Built on the atomic simulation -environment (ASE [@larsen_ase_2017]) standard, SPARC-X-API allows -users to handle SPARC file formats, and run SPARC calculations using -the same interface as with other computational packages. SPARC-X-API -provides additional features beyond the standard ASE package, -including 1) support of complex boundary conditions, 2) a JSON schema -for validating and converting calculation parameters, and 3) a -comprehensive calculator interface with advanced socket-communication -support. SPARC-X-API provides a smooth transition for users from -plane-wave DFT, making the access to real-space DFT calculations more -available and flexible for a wider range of users and computational -workflows. +electronic structure calculations in chemistry and materials science. +While plane-wave DFT implementations remain the most widely used, +real-space DFT provides advantages in handling complex boundary +conditions and scaling to very large systems by allowing for the +efficient use of large-scale supercomputers and linear-scaling methods +that circumvent the cubic scaling bottleneck. The SPARC-X project +([https://github.com/SPARC-X](https://github.com/SPARC-X)) provides +highly efficient and straightforward-to-install real-space DFT codes +for a wide range of first principle applications, available in both +Matlab (M-SPARC [@xu_m-sparc-1.0_2020; @zhang_m-sparc-2.0_2023]) and C +(SPARC [@xu_sparc-1.0_2021; @zhang_sparc-2.0_2024]). The rapid growth +of SPARC’s feature set has created the need for a fully functional +interface to drive SPARC in high-throughput calculations. Here we +introduce SPARC-X-API, a Python package designed to bridge the SPARC-X +project with broader computational frameworks. Built on the atomic +simulation environment (ASE [@larsen_ase_2017]) standard, SPARC-X-API +allows users to handle SPARC file formats and run SPARC calculations +through the same interface as with other ASE-compatible DFT packages. +Beyond standard ASE capabilities, SPARC-X-API provides additional +features including 1) support of SPARC-specific setups, including +complex boundary conditions and unit conversion, 2) a JSON schema +parsed from SPARC's documentation for parameter validating and +compatibility check, and 3) a comprehensive socket communication layer +derived from the i-PI protol [@ceriotti_i-pi-1.0_2014; +@kapil_i-pi-2.0_2019] facilitating message passing between low-level C +code and Python interface. The goal of SPARC-X-API is to provide a +easy-to-use interface for users with diverse needs and levels of +expertise, allowing for minimal effort in adapting SPARC to existing +computational workflows, while also supporting developers of advanced +real-space methods. # Statement of Need -Kohn-Sham Density Functional Theory (DFT) has unargubaly become the -cornerstone of electronic simulations in chemical and materials -sciences due to its simplicity and applications across a wide range of -systems. The popularity of DFT over other first-principle methods in -materials simulation largely stems from the simplicity of the -plane-wave pseudopotential implementation, where convergence is -controlled by simply the plane-wave cutoff energy, and solving the -Kohn-Sham equations can be benefited from highly-optimized Fast -Fourier Transform (FFT) packages. While many non-theoretical -researchers may associate DFT exclusively with plane-wave -implementations, this approach has notable limitations. The periodic -nature of the Fourier basis enforces the use of periodic boundary -conditions, making the simulation setup of isolated and semi-finite -systems non-straightforward. Additionally, the global nature of the -Fourier basis causes plane-wave codes to scale poorly with increasing -numbers of parallel processes. A compelling alternative to overcome -these limitations is to solve the Kohn-Sham equations using a -finite-difference approach on real-space grids. Real-space DFT -naturally supports both periodic and Dirichlet boundary conditions, -allowing for the flexible treatment of systems in any -dimensionality. Furthermore, the locality of the finite-difference -grids makes real-space DFT methods inherently scalable, paving the way -for the development of linearly-scaling solutions to the Kohn-Sham -equations. - - Despite the advantages of -real-space DFT, plane-wave implementations remain dominant in the -field of computational chemistry and materials science, largely due to -the greater accessibility of plane-wave DFT codes and their more -established programmable interfaces. While real-space DFT offers -significant benefits, there are currently few widely used packages -that provide comprehensive real-space DFT capabilities. - -The only notable exception has been GPAW -[@mortensen_gpaw_original_2005; @enkovaara_gpaw_1_2011; -@mortensen_gpaw_2_2024], which originally focused on real-space -finite-difference methods. However, in recent years, the development -of GPAW has shifted its focus toward plane-wave implementations -[@@mortensen_gpaw_2_2024], leaving its finite-difference capabilities -underdeveloped and missing key functionality. In contrast, the SPARC-X -project (https://github.com/SPARC-X) has pioneered efforts to develop -an open-source, real-space DFT code that is both user-friendly and -competitive with state-of-the-art plane-wave codes. - -SPARC-X offers real-space DFT algorithms through two implementations: -M-SPARC [@xu_m-sparc-1.0_2020; @zhang_m-sparc-2.0_2023] for -prototyping and small-system simulations, and SPARC +Density Functional Theory (DFT) has unargubaly become the cornerstone +of electronic simulations in chemical and materials sciences due to +its simplicity and applications across a wide range of systems. While +many researchers primarily associate DFT with the plane-wave +pseudopotential method, due to the maturity and wide availability of +such codes, these approaches do have limitations. One long-standing +challenge in DFT is to develop methods that overcomes the huge +computational cost for solving the Kohn-Sham equation, which scales +cubically with respect to the system size. In plane-wave methods, +the global nature of the Fourier basis used limits the ability to +achieve linear scaling [@bowler_order_n_dft_2012]. This becomes +especially problematic in massively parallel computing environments, +where the extensive global communication required during Fourier +transformations limits the scalability, making it challenging to +efficiently simulate very large systems in plane-wave DFT. Moreover, +the periodic nature of the Fourier basis enforces the use of periodic +boundary conditions, making the simulation setup of isolated and +semi-finite systems non-straightforward. A compelling +alternative to overcome these limitations is to solve the Kohn-Sham +equations using a finite-difference (FD) approach on real-space +grids. The locality of the FD grids makes real-space DFT methods +inherently scalable, paving the way for the development of +linearly-scaling solutions to the Kohn-Sham equations. +Real-space DFT also naturally supports both periodic and Dirichlet +boundary conditions, allowing for the flexible treatment of systems in +any dimensionality. + +In the past few years, the SPARC-X project +([https://github.com/SPARC-X](https://github.com/SPARC-X)) has +pioneered efforts to develop an open-source, real-space DFT code that +is both user-friendly and competitive with state-of-the-art plane-wave +codes. The philosophy of the SPARC-X project is to provide codes that +are easy to implement new algorithms, highly portable, and +straightforward to install and use across various computational +environments. In line with this, SPARC-X offers real-space DFT +algorithms through two implementations: 1) Matlab-based M-SPARC +[@xu_m-sparc-1.0_2020; @zhang_m-sparc-2.0_2023] for algorithm +prototyping and small-system simulations, with no external +dependencies other than Matlab itself, and 2) C-based SPARC [@xu_sparc-1.0_2021; @zhang_sparc-2.0_2024] for large-scale production -calculations that can accommodate a wide range of system -sizes. Although SPARC has demonstrated its computational efficiency -and features a rich set of algorithms, its adoption has been limited -by the lack of a user-friendly interface that can connect the code to -a broader audience of users and computational tools. +calculations that can accommodate a wide range of system sizes and +requires only MPI and MKL/BLAS for compilation. New development of +SPARC has covered topics including spin-orbit coupling, dispersion +interactions, and advanced exchange-correlation (xc) functionals +[@zhang_sparc-2.0_2024], linear-scaling Spectral Quadrature (SQ) +method [@suryanarayana_sparc_sq_2018], cyclic/helical symmetry +[@sharma_sparc_cyclix_2021], real-space density functional +perturbation theory (DFPT) [@sharma_sparc_dfpt_2023], orbital-free DFT +(ODFT) [@ghosh_sparc_ofdft_2016; @kumar_ofdft_delta_ml_2023], +on-the-fly machine-learning force fields (OTF-MLFF) +[@timmerman_sparc_mlff_2024; @kumar_sparc_mlff_2024]. The rapid +development of SPARC has led to the need for a fully functional and +user-friendly interface that fully automate SPARC calculation in +high-throughput scenarios. To address this, we introduce SPARC-X-API, +a Python interface designed to bridge the SPARC code with a +broader range of scientific workflows. SPARC-X-API builds upon the +Python wrapper originally shipped with SPARC version 1.0 +[@xu_sparc-1.0_2021], offering an API compatible with the widely-used +Atomic Simulation Environment (ASE [@larsen_ase_2017]) standard and +updated with the latest versions of SPARC. With ASE's support for +various popular DFT methods, including both plane-wave (e.g. VASP +[@kresse_vasp_1996], Quantum ESPRESSO [@giannozzi_qe_2017], and Abinit +[@gonze_abinit_2020]), and real-space (e.g. GPAW +[@enkovaara_gpaw_1_2011; @mortensen_gpaw_2_2024] and Octopus +[@tancogne_dejean_octopus_2020]) implementations, SPARC-X-API enables +seamless integration of SPARC into existing workflows, allowing users +to incorporate real-space DFT calculations with minimal adjustments. A summary of the role +SPARC-X-API in the SPARC-X project is shown in +\ref{fig:sparc-overview}. +In addition to the capabilities inherited from ASE, SPARC-X-API seeks +to enhance the user experience in a few key aspects, including 1) +supporting SPARC-specific features in an ASE-comatible API, 2) a +parameter validation mechanism based on SPARC's `LaTeX` documentation, +and 3) a versatile socket communication layer for efficient +high-throughput calculations. Details will be discussed in the Features and Functionalities section. -To address this, we introduce SPARC-X-API, a Python-based interface -designed to bridge the SPARC-X code with a broader range of scientific -workflows. Built on the Atomic Simulation Environment (ASE -[@larsen_ase_2017]) standard, SPARC-X-API provides seamless file -read/write support for SPARC files and a feature-complete calculator -interface to the SPARC code. With SPARC-X-API, researchers can easily -incorporate real-space DFT into their workflows using familiar tools -and interfaces, making real-space DFT more accessible to a wider range -of users. + + + + + + + + + + + + + + + + + + + + + + + + + + + + - +![**NEED REVISION** Overview of SPARC-X-API in the SPARC-X project system +\label{fig:sparc-overview} +](){ width=100% } - # Features and Functionalities -SPARC-X-API offers two key functionalities: - -- File I/O: Through the sparc.io submodule, SPARC-X-API implements - file read/write support for SPARC file formats, including .inpt and - .ion files. SPARC-X-API operates on the directory level, treating - each calculation directory as a "SPARC bundle." From version 1.0 - onwards, SPARC-X-API is fully integrated with ASE (version 3.23), - automatically registering SPARC as an external I/O format. -- Calculator Interface: The sparc.calculator submodule provides a full - ASE-compatible calculator interface for running SPARC calculations, - enabling integration with ASE workflows. - -Unique Features of SPARC-X-API: - -1) Support for Bundled File Formats: Unlike typical single-file DFT -implementations, SPARC requires both .inpt and .ion -files. SPARC-X-API's design simplifies this by reading and writing at -the directory level, streamlining the handling of SPARC bundles. - -2) JSON Schema for Parameter Validation: SPARC-X-API ensures parameter -consistency through a JSON schema derived from SPARC's LaTeX -documentation. This guarantees compatibility with SPARC's source code, -offering a robust mechanism for validating and converting parameters. -3) Unit Conversions: SPARC-X-API manages the conversion between atomic -units (Hartree, Bohr) used in SPARC and the eV/Å units in ASE. - -## Socket-Communication Calculator Interface -SPARC-X-API’s socket-communication layer allows for efficient and -flexible workflows by reducing the overhead of file I/O. This feature -is particularly useful for iterative calculations, such as structural -optimizations and saddle point searches, where traditional file-based -communication can become a bottleneck. - -Key advantages: - -Efficiency: Eliminates intermediate file I/O by streaming data -directly between processes. Speed: Enhances performance in iterative -calculations, critical for large-scale simulations. Flexibility: -Enables real-time modification of calculation parameters without -restarting processes. SPARC-X-API implements a backward-compatible -i-PI protocol, allowing both low-level and high-level interfacing with -SPARC's DFT code. +SPARC-X-API is structured as a Python package `sparc`. A summary of +its key functionalities is provided below, and for detailed +documentation, please refer to the [official +documentation](https://github.com/SPARC-X/SPARC-X-API/blob/master/README.md). + +## `sparc.io`: File I/O Manupulation + +In SPARC and M-SPARC calculations, the input information are provided +by two files: a `.inpt` (cell dimensions, boundary conditions, +calculation flags), and a `.ion` file (atomic configurations and +locations to pseudopotential). Depending on the type of calculation, +various output files may be written, such as`.static`, `.geopt` or +`.aimd`. The separation of information across multiple files means +converting ASE `Atoms` objects to SPARC input files or retrieving +energy and forces information from SPARC calculations requires +handling more than just a single file, as is common in most ASE I/O +formats. To manage this, SPARC-X-API operates on the directory level, +treating each calculation directory as a "SPARC bundle". The +`sparc.io.SparcBundle` class facilitates reading from and writing to +this bundle, ensuring that all necessary input and output files are +properly handled. By default, SPARC-X-API also copies relevant +pseudopotential files into the calculation directory, making the SPARC +bundle portable across different machines. From version 2.0 onwards, +SPARC-X-API leverages the new features introduced in ASE version 3.23 +to register as an external I/O format, allowing reading and writing +SPARC files directly using `ase.io` submodule: + +```py +from ase.io import read, write +# 1. Read a SPARC bundle by specifying the `sparc` format +atoms = read("sparc_output_dir", format="sparc") +# 2. Write to a SPARC bundle from aboth object +write("sparc_input_dir", atoms, format="sparc") +``` + +SPARC-X-API supports parsing complex boundary conditions from the +`.inpt` file. The periodic (P) and Dirichlet (D) boundary conditions +are translated into `True` and `False` values, respectively, in the +corresponding `pbc` direction of an `Atoms` object. While standard ASE +objects do not natively support cyclic (C) or helical (H) boundary +conditions, SPARC-X-API treats them similarly to Dirichlet boundaries +and stores the original boundary condition information in the `info` +attribute of the atomic object, to preserve the correct boundary +combinations when re-writing to SPARC input files. + + +## `sparc.api`: Parameter Validation + +In the ASE ecosystem, the default calculator interface such as +`FileIOCalculator` does not implement parameter validation, which can +lead to issues such as incorrect parameter settings or incompatibility +when providing running calculations. To address this, SPARC-X-API +introduces a robust parameter validation system using a JSON schema +generated from SPARC’s [LaTeX +documentation](https://github.com/SPARC-X/SPARC/tree/master/doc/.LaTeX). A +JSON schema contains the version of the SPARC software, a list of +input parameters used in `.inpt` and `.ion` files, as well as +supported data types and parameter categorizes. The validation is handled via the `sparc.api.SparcAPI` class, including: +- Verify that the schema is compatible with the version of SPARC binary +- Convert `.inpt` fields into Python data types +- Validate input parameters in both string and numerical formats +- Output help information about specific parameter(s) + +Each release of SPARC-X-API contains a copy of a JSON schema linked +with the latest SPARC release as the default validator, although the +user is free to choose a different combination of SPARC versions and +schemas. The separation between SPARC-X-API and SPARC not only +prevents the need for hard-coding parameter lists into the API, but +also facilitates easier maintenance: the "central truth" of parameters +remains in the SPARC documentation, maintained by the SPARC core +developers, while SPARC-X-API can focus on providing a user-friendly +interface without being tied to constant updates, maximizing +flexibility. + +## `sparc.calculator`: Socket-Communication Calculator Interface + +The submodule `sparc.calculator` provides a class `SPARC` as the main +entry point for driving SPARC calculations, which provides two modes +of operation: 1) a file I/O-based calculator extending the +`ase.calculators.FileIOCalculator` class, and 2) a comprehensive +socket communication layer that allows direct communication between +the Python API and low-level C-code. + +In file I/O mode, the SPARC calculator object utilizes the +`sparc.io.SparcBundle` for generating input files and +`sparc.api.SparcAPI` for parameter validation, while the mode of +calculation (single-point, relaxation or molecular dynamics) is +controlled by the input flags. For users transitioning from other DFT +packages and their ASE calculators, SPARC-X-API is designed to +minimize adaptation effort. `SPARC` calculator class supports two sets +of input parameters: 1) lower-case special parameters that follow +conventions from other ASE DFT calculators (e.g. real-space grid +spacing `h` from GPAW, and exchange-correlation keyword `xc` from +VASP) that use Angstrom-eV system, and 2) case-insensitive raw SPARC +input parameters in Bohr-Hartree units for fine-grained control. This +dual approach is designed so that users familiar with other DFT codes +can adopt SPARC with minimal changes to their existing +workflows. Basic DFT calculations can be covered by using special +parameter sets alone in SPARC-X-API, as shown by the side-by-side +constructor with VASP and GPAW, using the same +exchange-correlation functional and compatible convergence settings: + + +```py +#1. Using VASP +from ase.calculators.vasp import Vasp +calc = Vasp(xc="pbe", kpts=(9, 9, 9), ecut=450, ediff=1.e-4) + +#2. Using GPAW +from gpaw import GPAW +calc = GPAW(xc="pbe", kpts=(9, 9, 9), h=0.25, convergence={"energy": 1.e-4}) + +#3. Using SPARC +from sparc.calculator import SPARC +calc = SPARC(xc="pbe", kpts=(9, 9, 9), h=0.25, convergence={"energy": 1.e-4}) +``` + +In high-throughput frameworks requiring thousands of single-point DFT +evaluations, relying on file I/O mode can be inefficient, as +calculations are restarted at each DFT call and the total number of +files easily exceeds SPARC's file count limit. The socket layer in +SPARC-X-API avoids this overhead by direct communicating with a +long-running SPARC process for updating atomic positions, while +keeping density and orbitals in memory and reducing self-consistent +field (SCF) cycles. While alternative communication methods exist, +such as C-binding approaches seen in GPAW [@mortensen_gpaw_2_2024] and +Psi4 [@smith_psi4_2020], these typically involve complex compilation +and integration steps when installing the Python package. We chose a +socket-based communication layer for its simplicity, which allows for +a clear separation between the Python and SPARC codebases, minimal +modifications to the existing C code, and ease of installation without +requiring recompilation. + +The communication protocol used in SPARC-X-API, referred to as the +SPARC protocol, is based on the i-PI protocol +[@ceriotti_i-pi-1.0_2014; @kapil_i-pi-2.0_2019], which is also adapted +by a wide range of ASE calculators. The SPARC protocol introduces +additional header types and supporting binary data transfers via +Python's pickle format. While SPARC’s C-code maintains compatibility +with the original i-PI standard, SPARC-X-API leverages this extended +version with pickle decoding. The two-tier design offers flexibility +for socket calculations. At its core, the SPARC binary can communicate +directly with any i-PI-compatible server, such as +`ase.calculators.socketio.SocketIOCalculator` in ASE, using the basic +protocol, though this requires careful setup by the user. SPARC-X-API +further makes running socket mode calculations at ease. Leveraging the +SPARC protocol, the API internally relays socket data to the SPARC +binary, handling object decoding and socket resets automatically. When +running socket calculations on a single machine, users can activate +socket mode by simply adding `use_socket=True` to the `SPARC` +calculator constructor, enabling UNIX socket communication without +additional setup. More importantly, the design of SPARC protocol +allows easy and seamless integration in distributed computational +systems, offering the following features: 1) flexible client +initialization / restart 2) efficient data transfer 3) heterogeneous +computational setup \autoref{fig:socket-hetero} summarizes the +server-client setup across hybrid computing platforms. + +![**NEED REVISION** Example of socket communication across hybrid computing platforms using SPARC-X-API +\label{fig:socket-hetero} +](fig/fig_socket_hetero.svg){ width=100% } + + +The design of the SPARC protocol allows insertion of bidirectional +additional routines between two DFT calls, allowing further control +over the low-level C-code. + + + +## Miscellaneous Helper Functionalities + +SPARC-X-API provides several helper functions to facilitate user +installation and testing, inlcuding: + +- `sparc.quicktest`: a utility to verify the installation and + environment setups for `SPARC-X-API` and `SPARC`. +- `sparc.docparser`: a submodule to convert existing `LaTeX` + documentation included in SPARC source code into JSON schema. +- `sparc.download_data`: a tool to download the latest ONCV + pseudopotentials released by SPARC. +- `sparc-ase`: an extension to the commandline `ase` tool, adding + compatibility with SPARC file formats. # Code Release and Maintenance -SPARC-X-API maximizes accessibility for users by providing streamlined -installation via the conda-forge channel, where the sparc-x-api -package can be installed with default ONCV pseudopotentials. It also -integrates continuous integration (CI) and continuous deployment (CD) -workflows for: + +SPARC-X-API is released as source code in github repository +[https://github.com/SPARC-X/SPARC-X-API](https://github.com/SPARC-X/SPARC-X-API), +and as a `conda-forge` package +[`sparc-x-api`](https://anaconda.org/conda-forge/sparc-x-api). When +installing using `conda-forge`, the package is bundled with the +optimized pseudopotentials [@shojaei_sparc_pseudopot_2023], and +compatible with the +[`sparc`](https://anaconda.org/conda-forge/sparc-x) package that +contains the compiled SPARC binary. + +It also integrates continuous integration (CI) workflows for: - Unit testing and code coverage - Fetching the latest SPARC documentation for updating the JSON schema