Skip to content

Commit

Permalink
add OpenCitationsCitedCountDatabase
Browse files Browse the repository at this point in the history
  • Loading branch information
miku committed Feb 9, 2022
1 parent eb99ef9 commit cb4cbbf
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions python/labe/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,33 @@ def on_success(self):
self.create_symlink(name="current")


class OpenCitationsCitedCountDatabase(Task):
"""
Generate a database mapping DOI to inbound link count. Could be used to
augment metadata with citation count, which in turn could be used to sort
by this data.
"""

def requires(self):
return OpenCitationsCitedCountTable()

def run(self):
output = shellout(r"""
zstdcat -T0 {input} |
makta -T INTEGER -init -o {output}
""",
input=self.input().path)
luigi.LocalTarget(output).move(self.output().path)

def output(self):
fingerprint = self.open_citations_url_hash()
filename = "{}.db".format(fingerprint)
return luigi.LocalTarget(path=self.path(filename=filename), format=Zstd)

def on_success(self):
self.create_symlink(name="current")


class SolrFetchDocs(Task):
"""
Fetch JSON data from SOLR; uses solrdump (https://github.com/ubleipzig/solrdump).
Expand Down Expand Up @@ -411,5 +438,7 @@ def requires(self):
yield SolrDatabase(date=self.date, name="slub-production", short=False)
yield IdMappingDatabase(date=self.date)
yield OpenCitationsDatabase()
# This is generated, but not used yet.
yield OpenCitationsCitedCountDatabase()
# We want OpenCitationsRanked for cache warmup.
yield OpenCitationsRanked()

0 comments on commit cb4cbbf

Please sign in to comment.