From 325efda28ee2238a89a4edf22031662515a732e0 Mon Sep 17 00:00:00 2001 From: jhpoelen Date: Mon, 20 Jun 2022 16:45:57 -0500 Subject: [PATCH] add schema.jq file --- README.md | 6 ++---- schema.jq | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 schema.jq diff --git a/README.md b/README.md index a2a51c5..53f9601 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,8 @@ Machine readable version of Handbook of the Mammals of the World Physical copies of Handbook of the Mammals of the World are a valuable taxonomic resource for mammals. -This repository uses Plazi digitized versions of these physical copies to generate [hmw.json](hmw.json), and [hmw.csv](hmw.csv). For your convenience, the first 10 records can be found [hmw-sample.json](hmw-sample.json), [hmw-sample-pretty.json](hmw-sample-pretty.json) and [hmw-sample.csv](hmw-sample.csv). The json and csv files are derived from a Preston tracked version of Plazi's treatment bank available via https://github.com/plazi/treatments-xml. You can find a copy of a tracked copy of this in the [Releases](../releases) section of this repository. - - -[Preston](https://preston.guoda.bio), [jq](https://stedolan.github.io/jq/), Miller (https://miller.readthedocs.io/en/latest/), and other commandline tools (e.g., ```time```, ```grep```, ```gzip```, ```tee```). +This repository uses Plazi digitized versions of these physical copies to generate [hmw.json](hmw.json), and [hmw.csv](hmw.csv). For your convenience, the first 10 records can be found [hmw-sample.json](hmw-sample.json), [hmw-sample-pretty.json](hmw-sample-pretty.json) and [hmw-sample.csv](hmw-sample.csv). The json and files are derived from a Preston tracked version of Plazi's treatment bank available via https://github.com/plazi/treatments-xml. You can find a copy of a tracked copy of this in the [Releases](../releases) section of this repository. +The csv files are generated using [jq](https://stedolan.github.io/jq/) using [schema.jq](schema.jq) and Miller (https://miller.readthedocs.io/en/latest/). ```bash diff --git a/schema.jq b/schema.jq new file mode 100644 index 0000000..6367f07 --- /dev/null +++ b/schema.jq @@ -0,0 +1,27 @@ +{ + "docId" : .docId, + "docOrigin" : .docOrigin, + "docISBN" : .docISBN, + "docName" : .docName, + "docMasterId" : .docMasterId, + "docPageNumber" : .docPageNumber, + "derivedFrom" : .["http://www.w3.org/ns/prov#wasDerivedFrom"], + "name" : .name, + "interpretedGenus" : .interpretedGenus, + "interpretedSpecies" : .interpretedSpecies, + "interpretedAuthorityName" : .interpretedAuthorityName, + "interpretedAuthorityYear" : .interpretedAuthorityYear, + "commonNames" : .commonNames, + "taxonomy" : .taxonomy, + "subspeciesAndDistribution" : .subspeciesAndDistribution, + "descriptiveNotes": .descriptiveNotes, + "habitat" : .habitat, + "foodAndFeeding" : .foodAndFeeding, + "breeding" : .breeding, + "activityPatterns" : .activityPatterns, + "movementsHomeRangeAndSocialOrganization" : .movementsHomeRangeAndSocialOrganization, + "statusAndConservation": .statusAndConservation, + "bibliography" : .bibliography, + "distributionImageURL" : .distributionImageURL, + "verbatimText" : .verbatimText +}