diff --git a/README.md b/README.md index a0371b8..fed93d0 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,7 @@ The package has been tested for Python 3.7. Required packages are available in ` `litstudy` supports several data sources. Some of these sources (such as semantic Scholar, CrossRef, and arXiv) are openly available. However to access the Scopus API, you (or your institute) requires a Scopus subscription and you need to request an Elsevier Developer API key (see [Elsevier Developers](https://dev.elsevier.com/index.jsp)). +For more information, see the [guide](https://pybliometrics.readthedocs.io/en/stable/access.html) by `pybliometrics`. ## License diff --git a/docs/faq.rst b/docs/faq.rst index 85644f2..c12c84e 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -13,6 +13,7 @@ To use the Scopus API, you will need two things: * Be connected to the network of your University or Research Institute for which you obtained the API key. LitStudy will ask for the API key on the first time that it launches. +For more information, see the [guide](https://pybliometrics.readthedocs.io/en/stable/access.html) by pybliometrics. diff --git a/litstudy/sources/ieee.py b/litstudy/sources/ieee.py index 0c24e3a..df06584 100644 --- a/litstudy/sources/ieee.py +++ b/litstudy/sources/ieee.py @@ -1,6 +1,7 @@ from ..types import Document, Author, DocumentSet, DocumentIdentifier, Affiliation from ..common import robust_open import csv +import logging class IEEEDocument(Document): @@ -19,9 +20,28 @@ def title(self) -> str: def authors(self): authors = self.entry.get("Authors", "").split("; ") affs = self.entry.get("Author Affiliations", "").split("; ") - assert len(authors) == len(affs) + + # Bug fix #55: + # In some cases, the number of affiliations does not match the number of authors + # given by the CSV file. Since there is no way of knowing which affiliations belong + # to which authors, we just ignore all affiliations in this case. + if len(authors) != len(affs): + logging.warn( + ( + f"affiliations for entry '{self.title}' are invalid: the number of authors " + f"({len(authors)}) does not match the number of author affilications ({len(affs)})" + ) + ) + + affs = [None] * len(authors) + return [IEEEAuthor(a, b) for a, b in zip(authors, affs)] + @property + def affiliations(self): + affs = self.entry.get("Author Affiliations", "").split("; ") + return [IEEEAffiliation(a) for a in affs] + @property def publisher(self): return self.entry.get("Publisher") @@ -85,8 +105,8 @@ def name(self): @property def affiliations(self): - # Special case where affiliation is NA (not applicable) - if self._affiliation == "NA": + # Handle special case where affiliation is NA (not applicable) + if not self._affiliation or self._affiliation == "NA": return None return [IEEEAffiliation(self._affiliation)] diff --git a/litstudy/types.py b/litstudy/types.py index b052e5b..81f1b25 100644 --- a/litstudy/types.py +++ b/litstudy/types.py @@ -460,16 +460,19 @@ def affiliations(self) -> Optional[List["Affiliation"]]: """The affiliations associated with the authors of this document.""" authors = self.authors - if authors is not None: - items = dict() + if authors is None: + return None - for author in authors: - for aff in author.affiliations: - items[aff.name] = aff + items = dict() + for author in authors: + affiliations = author.affiliations - return list(items.values()) + if affiliations: + for aff in affiliations: + if aff.name: + items[aff.name] = aff - return None + return list(items.values()) @property def publisher(self) -> Optional[str]: diff --git a/tests/resources/ieee.csv b/tests/resources/ieee.csv index 78b5e7b..dcd78df 100644 --- a/tests/resources/ieee.csv +++ b/tests/resources/ieee.csv @@ -1,6 +1,6 @@ "Document Title",Authors,"Author Affiliations","Publication Title",Date Added To Xplore,"Publication Year","Volume","Issue","Start Page","End Page","Abstract","ISSN",ISBNs,"DOI",Funding Information,PDF Link,"Author Keywords","IEEE Terms","INSPEC Controlled Terms","INSPEC Non-Controlled Terms","Mesh_Terms",Article Citation Count,Patent Citation Count,"Reference Count","License",Online Date,Issue Date,"Meeting Date","Publisher",Document Identifier "Exascale Computing Trends: Adjusting to the ""New Normal""' for Computer Architecture","P. Kogge; J. Shalf",University of Notre Dame; Lawrence Berkeley National Laboratory,"Computing in Science & Engineering","4 Feb 2014","2013","15","6","16","26","We now have 20 years of data under our belt about the performance of supercomputers against at least a single floating-point benchmark from dense linear algebra. Until about 2004, a single model of parallel programming, bulk synchronous using the MPI model, was sufficient to permit translation into reasonable parallel programs for more complex applications. Starting in 2004, however, a confluence of events changed forever the architectural landscape that underpinned MPI. The first half of this article goes into the underlying reasons for these changes, and what they mean for system architectures. The second half then addresses the view going forward in terms of our standard scaling models and their profound implications for future programming and algorithm design.","1558-366X","","10.1109/MCSE.2013.95","","https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6634083","Computer architecture;Market research;Transistors;Programming;Computational modeling;Memory management;Systems engineering and theory;scientific computing;exascale;HPC;computer architecture;programming models","Computer architecture;Market research;Transistors;Programming;Computational modeling;Memory management;Systems engineering and theory","application program interfaces;computer architecture;floating point arithmetic;linear algebra;mainframes;message passing;parallel machines;parallel programming","standard scaling models;system architectures;MPI model;bulk synchronous;parallel programming;linear algebra;single floating-point benchmark;supercomputer;computer architecture;exascale computing","","51","","13","","16 Oct 2013","","","IEEE","IEEE Magazines" -"European HPC Landscape","F. Berberich; J. Liebmann; J. -P. Nominé; O. Pineda; P. Segers; V. Teodor","PRACE aisbl and Jülich Supercomputing Center, Forschungszetrum Juelich GmbH; Jülich Supercomputing Center, Forschungszetrum Juelich GmbH; ETP4HPC and Commissariat à l'énergie atomique et aux énergies alternatives; PRACE aisbl and Barcelona Supercomputing Center; Grand équipement national de calcul intensif; Jülich Supercomputing Center, Forschungszetrum Juelich GmbH","2019 15th International Conference on eScience (eScience)","19 Mar 2020","2019","","","471","478","This paper provides an overview on the European HPC landscape supported by a survey, designed by the PRACE-5IP project, accessing more than 80 of the most influential stakeholders of HPC in Europe. It focuses on Tier-0 systems on a European level providing high-end computing and data analysis resources. The different actors are presented and their provided services are analyzed in order to identify overlaps and gaps, complementarity and opportunities for collaborations. A new pan-European HPC portal is proposed in order to get all information on one place and access the different services.","","978-1-7281-2451-3","10.1109/eScience.2019.00062","","https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9041695","European;High Performance Computing;HPC;Ecosystem;Exascale;services;platform","Europe;Ecosystems;Industries;Investment;Artificial intelligence;Supercomputers;Technological innovation","data analysis;energy conservation;mainframes;parallel processing;portals;power aware computing","provided services;pan-European HPC portal;European HPC landscape;PRACE-5IP project;European level;high-end computing;data analysis resources;tier-0 systems","","","","27","","19 Mar 2020","","","IEEE","IEEE Conferences" +"European HPC Landscape","F. Berberich; J. Liebmann; J. -P. Nominé; O. Pineda; P. Segers; V. Teodor","PRACE aisbl and Jülich Supercomputing Center, Forschungszetrum Juelich GmbH; Jülich Supercomputing Center, Forschungszetrum Juelich GmbH; ETP4HPC and Commissariat à l'énergie atomique et aux énergies alternatives; PRACE aisbl and Barcelona Supercomputing Center; Grand équipement national de calcul intensif; Jülich Supercomputing Center, Forschungszetrum Juelich GmbH; another affiliation","2019 15th International Conference on eScience (eScience)","19 Mar 2020","2019","","","471","478","This paper provides an overview on the European HPC landscape supported by a survey, designed by the PRACE-5IP project, accessing more than 80 of the most influential stakeholders of HPC in Europe. It focuses on Tier-0 systems on a European level providing high-end computing and data analysis resources. The different actors are presented and their provided services are analyzed in order to identify overlaps and gaps, complementarity and opportunities for collaborations. A new pan-European HPC portal is proposed in order to get all information on one place and access the different services.","","978-1-7281-2451-3","10.1109/eScience.2019.00062","","https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9041695","European;High Performance Computing;HPC;Ecosystem;Exascale;services;platform","Europe;Ecosystems;Industries;Investment;Artificial intelligence;Supercomputers;Technological innovation","data analysis;energy conservation;mainframes;parallel processing;portals;power aware computing","provided services;pan-European HPC portal;European HPC landscape;PRACE-5IP project;European level;high-end computing;data analysis resources;tier-0 systems","","","","27","","19 Mar 2020","","","IEEE","IEEE Conferences" "Tracking Performance Portability on the Yellow Brick Road to Exascale","T. Deakin; A. Poenaru; T. Lin; S. McIntosh-Smith","University of Bristol,Department of Computer Science,UK; University of Bristol,Department of Computer Science,UK; University of Bristol,Department of Computer Science,UK; University of Bristol,Department of Computer Science,UK","2020 IEEE/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC)","1 Jan 2021","2020","","","1","13","With Exascale machines on our immediate horizon, there is a pressing need for applications to be made ready to best exploit these systems. However, there will be multiple paths to Exascale, with each system relying on processor and accelerator technologies from different vendors. As such, applications will be required to be portable between these different architectures, but it is also critical that they are efficient too. These double requirements for portability and efficiency begets the need for performance portability. In this study we survey the performance portability of different programming models, including the open standards OpenMP and SYCL, across the diverse landscape of Exascale and pre-Exascale processors from Intel, AMD, NVIDIA, Fujitsu, Marvell, and Amazon, together encompassing GPUs and CPUs based on both x86 and Arm architectures. We also take a historical view and analyse how performance portability has changed over the last year.","","978-1-6654-2287-1","10.1109/P3HPC51967.2020.00006","","https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9309052","performance portability;programming models","Graphics processing units;Kernel;Biological system modeling;Computer architecture;Bandwidth;Parallel programming;Performance evaluation","application program interfaces;coprocessors;graphics processing units;library automation;microprocessor chips;multiprocessing systems;parallel architectures;parallel processing;parallel programming;power aware computing","performance portability;different programming models;pre-Exascale processors;yellow brick road;Exascale machines;processor;accelerator technologies","","5","","18","","1 Jan 2021","","","IEEE","IEEE Conferences" "Predicting the Energy Consumption of CUDA Kernels using SimGrid","D. Boughzala; L. Lefèvre; A. -C. Orgerie","Univ Lyon, EnsL, UCBL, CNRS, Inria, LIP; Univ Lyon, EnsL, UCBL, CNRS, Inria, LIP; CNRS, IRISA","2020 IEEE 32nd International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","22 Oct 2020","2020","","","191","198","Building a sustainable Exascale machine is a very promising target in High Performance Computing (HPC). To tackle the energy consumption challenge while continuing to provide tremendous performance, the HPC community have rapidly adopted GPU-based systems. Today, GPUs have became the most prevailing components in the massively parallel HPC landscape thanks to their high computational power and energy efficiency. Modeling the energy consumption of applications running on GPUs has gained a lot of attention for the last years. Alas, the HPC community lacks simple yet accurate simulators to predict the energy consumption of general purpose GPU applications. In this work, we address the prediction of the energy consumption of CUDA kernels via simulation. We propose in this paper a simple and lightweight energy model that we implemented using the open-source framework SimGrid. Our proposed model is validated across a diverse set of CUDA kernels and on two different NVIDIA GPUs (Tesla M2075 and Kepler K20Xm). As our modeling approach is not based on performance counters or detailed-architecture parameters, we believe that our model can be easily approved by users who take care of the energy consumption of their GPGPU applications.","2643-3001","978-1-7281-9924-5","10.1109/SBAC-PAD49847.2020.00035","","https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9235065","GPGPU computing, CUDA kernels, Energy modeling, Simulation","Graphics processing units;Kernel;Energy consumption;Computational modeling;Instruction sets;Computer architecture;Scheduling","application program interfaces;coprocessors;graphics processing units;grid computing;parallel architectures;parallel processing;power aware computing;public domain software","high performance computing;energy consumption challenge;CUDA kernels;lightweight energy model;massively parallel HPC;GPU-based system;SimGrid;NVIDIA GPU;Tesla M2075 GPU;Kepler K20Xm GPU;GPGPU applications;exascale machine","","","","33","","22 Oct 2020","","","IEEE","IEEE Conferences" "Towards HPC and Big Data Analytics Convergence: Design and Experimental Evaluation of a HPDA Framework for eScience at Scale","D. Elia; S. Fiore; G. Aloisio","Euro-Mediterranean Centre on Climate Change (CMCC) Foundation, Lecce, Italy; Department of Information Engineering and Computer Science, University of Trento, Trento, Italy; Euro-Mediterranean Centre on Climate Change (CMCC) Foundation, Lecce, Italy","IEEE Access","21 May 2021","2021","9","","73307","73326","Over the last two decades, scientific discovery has increasingly been driven by the large availability of data from a multitude of sources, including high-resolution simulations, observations and instruments, as well as an enormous network of sensors and edge components. In such a dynamic and growing landscape where data continue to expand, advances in Science have become intertwined with the capacity of analysis tools to effectively handle and extract valuable information from this ocean of data. In view of the exascale era of supercomputers that is rapidly approaching, it is of the utmost importance to design novel solutions that can take full advantage of the upcoming computing infrastructures. The convergence of High Performance Computing (HPC) and data-intensive analytics is key to delivering scalable High Performance Data Analytics (HPDA) solutions for scientific and engineering applications. The aim of this paper is threefold: reviewing some of the most relevant challenges towards HPDA at scale, presenting a HPDA-enabled version of the Ophidia framework and validating the scalability of the proposed framework through an experimental performance evaluation carried out in the context of the Centre of Excellence in Simulation of Weather and Climate in Europe (ESiWACE). The experimental results show that the proposed solution is capable of scaling over several thousand cores and hundreds of cluster nodes. The proposed work is a contribution in support of scientific large-scale applications along the wider convergence path of HPC and Big Data followed by the scientific research community.","2169-3536","","10.1109/ACCESS.2021.3079139","European Union’s Horizon 2020 Research and Innovation Programme through the Project ESiWACE2(grant numbers:823988); European Union’s Horizon 2020 Research and Innovation Programme through the Project EXDCI-2(grant numbers:800957); ","https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9428012","Extreme-scale data challenges;HPC and big data convergence;high performance data analytics (HPDA);performance evaluation;scientific data analysis","Big Data;Data analysis;Convergence;Data models;Europe;Ecosystems;Software","Big Data;data analysis;natural sciences computing;parallel processing;Web services","scientific research community;wider convergence path;large-scale applications;experimental performance evaluation;Ophidia framework;HPDA-enabled version;scientific engineering applications;scalable High Performance Data Analytics solutions;data-intensive analytics;HPC;High Performance Computing;upcoming computing infrastructures;dynamic growing landscape;edge components;high-resolution simulations;scientific discovery;HPDA framework;experimental evaluation","","","","105","CCBY","11 May 2021","","","IEEE","IEEE Journals" diff --git a/tests/test_sources_ieee.py b/tests/test_sources_ieee.py index 0f09e92..a88ea42 100644 --- a/tests/test_sources_ieee.py +++ b/tests/test_sources_ieee.py @@ -22,3 +22,12 @@ def test_load_ieee_csv(): assert author.name == "P. Kogge" assert len(author.affiliations) == 1 assert author.affiliations[0].name == "University of Notre Dame" + + # For the second document, the number of authors does not match the number of + # affiliations. This means we can get the affiliations via `doc.affiliations`, + # but the authors do not have affilations themselves + doc = docs[1] + assert doc.title == "European HPC Landscape" + assert len(doc.affiliations) == 7 + assert len(doc.authors) == 6 + assert doc.authors[0].affiliations is None