From 7ed4ff427013e1acde70d72aa886f852127bb687 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Fri, 21 Oct 2016 20:01:27 -0400 Subject: [PATCH 01/73] Junk, mostly. Some salvagable --- gratipay/chomp/__init__.py | 61 ++++++++++++++++++++++++++++++++++++++ gratipay/models/package.py | 12 ++++++++ gratipay/wireup.py | 4 ++- setup.py | 1 + sql/branch.sql | 26 ++++++++++++++++ 5 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 gratipay/chomp/__init__.py create mode 100644 gratipay/models/package.py create mode 100644 sql/branch.sql diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py new file mode 100644 index 0000000000..de9ad36528 --- /dev/null +++ b/gratipay/chomp/__init__.py @@ -0,0 +1,61 @@ +"""Process npm artifacts. + +The main function is exposed as a console script named `chomp` via setup.py. + +""" +from __future__ import absolute_import, division, print_function, unicode_literals + +import argparse +import sys + +import requests +from gratipay.utils import markdown + + +def from_npm(package): + """Given an npm package dict, return a dict of info and a list of emails. + """ + out= {} + out['name'] = package['name'] + out['description'] = package['description'] + out['long_description'] = markdown.marky(package['readme']) + out['long_description_raw'] = package['readme'] + out['long_description_type'] = 'x-text/marky-markdown' + + emails = [] + for key in ('authors', 'maintainers'): + for person in package.get(key, []): + if type(person) is dict: + email = person.get('email') + if email: + emails.append(email) + + return out, emails + + +def process_catalog(catalog): + SQL = '' + return SQL + + +def fetch_catalog(): + r = requests.get('https://registry.npmjs.com/-/all') + r.raise_for_status() + return r.json() + + +def update_database(SQL): + pass + + +def parse_args(argv): + p = argparse.ArgumentParser() + p.add_argument( 'if_modified_since' + , help='a number of minutes in the past, past which we need new updates' + ) + return p.parse_args(argv) + + +def main(argv=sys.argv): + ims = parse_args(argv[1:]).if_modified_since + process_catalog(fetch_catalog(), ims) diff --git a/gratipay/models/package.py b/gratipay/models/package.py new file mode 100644 index 0000000000..af5b0be893 --- /dev/null +++ b/gratipay/models/package.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +from postgres import orm + + +class Package(orm.Model): + typ_name = 'packages'; + + @classmethod + def from_platform_and_name(cls, platform, name): + return cls.db.one("SELECT packages.*::packages from packages where " + "platform=%s and name=%s;", (platform, name,)) diff --git a/gratipay/wireup.py b/gratipay/wireup.py index 8aa1dd220e..a64f9d2a8b 100644 --- a/gratipay/wireup.py +++ b/gratipay/wireup.py @@ -29,6 +29,7 @@ from gratipay.elsewhere.openstreetmap import OpenStreetMap from gratipay.elsewhere.twitter import Twitter from gratipay.elsewhere.venmo import Venmo +from gratipay.models.npm_package import Package from gratipay.models.account_elsewhere import AccountElsewhere from gratipay.models.community import Community from gratipay.models.country import Country @@ -56,7 +57,8 @@ def db(env): maxconn = env.database_maxconn db = GratipayDB(dburl, maxconn=maxconn) - for model in (AccountElsewhere, Community, Country, ExchangeRoute, Participant, Team): + models = (AccountElsewhere, Community, Country, ExchangeRoute, Participant, Team, Package) + for model in models: db.register_model(model) gratipay.billing.payday.Payday.db = db diff --git a/setup.py b/setup.py index 888240d06d..e4615bd19f 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ , entry_points = { 'console_scripts' : [ 'payday=gratipay.cli:payday' , 'fake_data=gratipay.utils.fake_data:main' + , 'chomp=gratipay.chomp:main' ] } ) diff --git a/sql/branch.sql b/sql/branch.sql new file mode 100644 index 0000000000..f1d643ab82 --- /dev/null +++ b/sql/branch.sql @@ -0,0 +1,26 @@ +BEGIN; + + CREATE TABLE package_managers + ( id bigserial PRIMARY KEY + , name text NOT NULL UNIQUE + ); + + CREATE TABLE packages + ( id bigserial PRIMARY KEY + , package_manager_id bigint NOT NULL + , name text NOT NULL + , description text NOT NULL DEFAULT '' + , long_description text NOT NULL DEFAULT '' + , long_description_raw text NOT NULL DEFAULT '' + , long_description_type text NOT NULL DEFAULT '' + , mtime timestamp with time zone NOT NULL + , UNIQUE (package_manager_id, name) + ); + + CREATE TABLE package_emails + ( package_id bigint NOT NULL + , email text NOT NULL + , UNIQUE (package_id, email) + ) + +END; From 9113adb16da58af696927d3b36f0b0b932f8ee30 Mon Sep 17 00:00:00 2001 From: aandis Date: Sat, 22 Oct 2016 23:13:48 +0530 Subject: [PATCH 02/73] Stub registry request. --- gratipay/chomp/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index de9ad36528..4154837894 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -39,9 +39,11 @@ def process_catalog(catalog): def fetch_catalog(): - r = requests.get('https://registry.npmjs.com/-/all') - r.raise_for_status() - return r.json() + # r = requests.get('https://registry.npmjs.com/-/all', verify=False) + # r.raise_for_status() + # return r.json() + import json + return json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"robdel12","email":"Robertdeluca19@gmail.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Robert DeLuca"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') def update_database(SQL): From 82f0c8ac0ae18f889950b620bf4d3cdf6b31516d Mon Sep 17 00:00:00 2001 From: aandis Date: Sat, 22 Oct 2016 23:30:16 +0530 Subject: [PATCH 03/73] Don't read readme since it's not there. --- gratipay/chomp/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index 4154837894..dbc6de5eb4 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -18,9 +18,6 @@ def from_npm(package): out= {} out['name'] = package['name'] out['description'] = package['description'] - out['long_description'] = markdown.marky(package['readme']) - out['long_description_raw'] = package['readme'] - out['long_description_type'] = 'x-text/marky-markdown' emails = [] for key in ('authors', 'maintainers'): From 423cea8f4eb35f75b292f724aff1f6bbd9bd67e6 Mon Sep 17 00:00:00 2001 From: aandis Date: Sat, 22 Oct 2016 23:58:12 +0530 Subject: [PATCH 04/73] Write insertion data to io for copy. --- gratipay/chomp/__init__.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index dbc6de5eb4..cbf35f7aaf 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -8,16 +8,24 @@ import argparse import sys +import psycopg2 + import requests -from gratipay.utils import markdown +from gratipay.utils import markdown +from io import StringIO -def from_npm(package): +def from_npm(package, package_id): """Given an npm package dict, return a dict of info and a list of emails. """ - out= {} + out = {} + out['id'] = str(package_id) out['name'] = package['name'] out['description'] = package['description'] + out['package_manager_id'] = "1" # Todo - generalize + out['long_description'] = 'null' + out['long_description_raw'] = 'null' + out['long_description_type'] = 'null' emails = [] for key in ('authors', 'maintainers'): @@ -25,14 +33,23 @@ def from_npm(package): if type(person) is dict: email = person.get('email') if email: - emails.append(email) + emails.append({ 'package_id': str(package_id), 'email': email }) return out, emails +def stringify(obj): + return '\t'.join([v for k, v in obj.iteritems()]) -def process_catalog(catalog): - SQL = '' - return SQL +def insert_catalog(catalog): + package_id = 0 + package_data = StringIO() + email_data = StringIO() + for k, v in catalog.iteritems(): + package_id += 1 + package, emails = from_npm(v, package_id) + package_data.write(stringify(package) + '\n') + for email in emails: + email_data.write(stringify(email) + '\n') def fetch_catalog(): @@ -57,4 +74,4 @@ def parse_args(argv): def main(argv=sys.argv): ims = parse_args(argv[1:]).if_modified_since - process_catalog(fetch_catalog(), ims) + insert_catalog(fetch_catalog()) From 2b06b0328c1dba20c3e314263ad7f77c563935e8 Mon Sep 17 00:00:00 2001 From: aandis Date: Sun, 23 Oct 2016 19:25:26 +0530 Subject: [PATCH 05/73] Add foreign key constraint in packages. --- sql/branch.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/branch.sql b/sql/branch.sql index f1d643ab82..30be668b59 100644 --- a/sql/branch.sql +++ b/sql/branch.sql @@ -7,7 +7,7 @@ BEGIN; CREATE TABLE packages ( id bigserial PRIMARY KEY - , package_manager_id bigint NOT NULL + , package_manager_id bigint NOT NULL REFERENCES package_managers ON UPDATE CASCADE ON DELETE RESTRICT , name text NOT NULL , description text NOT NULL DEFAULT '' , long_description text NOT NULL DEFAULT '' From 7774bc0b0d39e7085662edf3a3ccb1e09d38f4a1 Mon Sep 17 00:00:00 2001 From: aandis Date: Sun, 23 Oct 2016 19:27:05 +0530 Subject: [PATCH 06/73] Add id column in emails and foreign key constraint to pacakges. --- sql/branch.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/branch.sql b/sql/branch.sql index 30be668b59..bb173f1e6b 100644 --- a/sql/branch.sql +++ b/sql/branch.sql @@ -18,8 +18,9 @@ BEGIN; ); CREATE TABLE package_emails - ( package_id bigint NOT NULL - , email text NOT NULL + ( id bigserial PRIMARY KEY + , package_id bigint NOT NULL REFERENCES packages ON UPDATE CASCADE ON DELETE RESTRICT + , email text NOT NULL , UNIQUE (package_id, email) ) From 43c5831cb8a33002f98549621fae8261c76cb34c Mon Sep 17 00:00:00 2001 From: aandis Date: Sun, 23 Oct 2016 20:05:29 +0530 Subject: [PATCH 07/73] Create npm class and modify insert method. --- gratipay/chomp/__init__.py | 106 +++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 41 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index cbf35f7aaf..69405dd56f 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -4,6 +4,7 @@ """ from __future__ import absolute_import, division, print_function, unicode_literals +from collections import OrderedDict import argparse import sys @@ -15,50 +16,68 @@ from gratipay.utils import markdown from io import StringIO -def from_npm(package, package_id): - """Given an npm package dict, return a dict of info and a list of emails. +class NPM(object): + """Represent npm. """ - out = {} - out['id'] = str(package_id) - out['name'] = package['name'] - out['description'] = package['description'] - out['package_manager_id'] = "1" # Todo - generalize - out['long_description'] = 'null' - out['long_description_raw'] = 'null' - out['long_description_type'] = 'null' - - emails = [] - for key in ('authors', 'maintainers'): - for person in package.get(key, []): - if type(person) is dict: - email = person.get('email') - if email: - emails.append({ 'package_id': str(package_id), 'email': email }) - - return out, emails + def __init__(self): + self.name = 'npm' + + def parse(self, package, package_manager_id, package_id): + """Given an npm package dict, return a dict of info and a list of emails to be imported. + """ + out = OrderedDict() + out['id'] = str(package_id) + out['package_manager_id'] = str(package_manager_id) + out['name'] = package['name'] + out['description'] = package['description'] if 'description' in package else '' + + emails = [] + for key in ('authors', 'maintainers'): + for person in package.get(key, []): + if type(person) is dict: + email = person.get('email') + if email: + emails.append(OrderedDict([ + ('package_id', str(package_id)), + ('email', email) + ])) + + return out, emails + + def fetch_catalog(self): + r = requests.get('https://registry.npmjs.com/-/all', verify=False) + r.raise_for_status() + return r.json() + def stringify(obj): return '\t'.join([v for k, v in obj.iteritems()]) -def insert_catalog(catalog): - package_id = 0 - package_data = StringIO() - email_data = StringIO() - for k, v in catalog.iteritems(): - package_id += 1 - package, emails = from_npm(v, package_id) - package_data.write(stringify(package) + '\n') - for email in emails: - email_data.write(stringify(email) + '\n') - - -def fetch_catalog(): - # r = requests.get('https://registry.npmjs.com/-/all', verify=False) - # r.raise_for_status() - # return r.json() - import json - return json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"robdel12","email":"Robertdeluca19@gmail.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Robert DeLuca"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') - +def insert_catalog_for(pm): + catalog = pm.fetch_catalog() + conn = psycopg2.connect(host='localhost', database='gratipay') + if catalog: + cursor = conn.cursor() + cursor.execute(""" + INSERT INTO package_managers (name) + VALUES (%s) RETURNING id + """, (pm.name,)) + package_manager_id = cursor.fetchone()[0] + package_id = 0 # Mass assign ids so they are usable during email insertion + package_data = StringIO() + email_data = StringIO() + for k, v in catalog.iteritems(): + package_id += 1 + package, emails = pm.parse(v, package_manager_id, package_id) + package_data.write(stringify(package) + '\n') + for email in emails: + email_data.write(stringify(email) + '\n') + package_data.seek(0) + email_data.seek(0) + + cursor.copy_from(package_data, 'packages', columns=["id", "package_manager_id", "name", "description"]) + cursor.copy_from(email_data, 'package_emails', columns=["package_id", "email"]) + conn.commit() def update_database(SQL): pass @@ -69,9 +88,14 @@ def parse_args(argv): p.add_argument( 'if_modified_since' , help='a number of minutes in the past, past which we need new updates' ) + p.add_argument('package_manager' + , help='the name of the package manager to import from' + ) return p.parse_args(argv) def main(argv=sys.argv): - ims = parse_args(argv[1:]).if_modified_since - insert_catalog(fetch_catalog()) + pm_arg = parse_args(argv[1:]).package_manager + if pm_arg == 'npm': + pm = NPM() + insert_catalog_for(pm) From 7c1425739464d8b4e644223b376aa4f50b728268 Mon Sep 17 00:00:00 2001 From: aandis Date: Sun, 23 Oct 2016 22:42:25 +0530 Subject: [PATCH 08/73] Insert mtime if it exists else default to now. --- gratipay/chomp/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index 69405dd56f..8a51c02668 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -30,6 +30,11 @@ def parse(self, package, package_manager_id, package_id): out['package_manager_id'] = str(package_manager_id) out['name'] = package['name'] out['description'] = package['description'] if 'description' in package else '' + if 'time' in package: + if type(package['time']) is dict and 'modified' in package['time']: + out['mtime'] = package['time']['modified'] + else: + out['mtime'] = 'now()' emails = [] for key in ('authors', 'maintainers'): From 1a927f0fc484d4bf0978caf60af6d26e7d7dd795 Mon Sep 17 00:00:00 2001 From: aandis Date: Sun, 23 Oct 2016 22:43:56 +0530 Subject: [PATCH 09/73] Check if package exists before inserting. --- gratipay/chomp/__init__.py | 77 ++++++++++++++++++++++++-------------- sql/branch.sql | 2 +- 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index 8a51c02668..39d3c507d1 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -1,20 +1,18 @@ -"""Process npm artifacts. +"""Process package artifacts. The main function is exposed as a console script named `chomp` via setup.py. """ from __future__ import absolute_import, division, print_function, unicode_literals from collections import OrderedDict +from io import StringIO +from gratipay.utils import markdown import argparse import sys - import psycopg2 - import requests -from gratipay.utils import markdown -from io import StringIO class NPM(object): """Represent npm. @@ -22,12 +20,12 @@ class NPM(object): def __init__(self): self.name = 'npm' - def parse(self, package, package_manager_id, package_id): + def parse(self, package, package_id): """Given an npm package dict, return a dict of info and a list of emails to be imported. """ out = OrderedDict() out['id'] = str(package_id) - out['package_manager_id'] = str(package_manager_id) + out['package_manager_id'] = str(self.id) out['name'] = package['name'] out['description'] = package['description'] if 'description' in package else '' if 'time' in package: @@ -50,44 +48,55 @@ def parse(self, package, package_manager_id, package_id): return out, emails def fetch_catalog(self): + # import json + # return json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"robdel12","email":"Robertdeluca19@gmail.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Robert DeLuca"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') r = requests.get('https://registry.npmjs.com/-/all', verify=False) r.raise_for_status() return r.json() def stringify(obj): - return '\t'.join([v for k, v in obj.iteritems()]) + return '\t'.join([v for k, v in obj.iteritems()]) + '\n' -def insert_catalog_for(pm): +def insert_package_manager(pm, cursor): + """Insert a new package manager returning it's id. + """ + cursor.execute(""" + INSERT INTO package_managers (name) + VALUES (%s) RETURNING id + """, (pm.name,)) + return cursor.fetchone()[0] + +def package_manager_exists(pm, cursor): + cursor.execute(""" + SELECT package_managers.id + FROM package_managers + WHERE package_managers.name = (%s) + """, (pm.name,)) + return cursor.fetchone() + +def insert_catalog_for(pm, cursor): + """Insert all packages for a given package manager. + """ catalog = pm.fetch_catalog() - conn = psycopg2.connect(host='localhost', database='gratipay') if catalog: - cursor = conn.cursor() - cursor.execute(""" - INSERT INTO package_managers (name) - VALUES (%s) RETURNING id - """, (pm.name,)) - package_manager_id = cursor.fetchone()[0] package_id = 0 # Mass assign ids so they are usable during email insertion - package_data = StringIO() - email_data = StringIO() + package_stream, email_stream = StringIO(), StringIO() for k, v in catalog.iteritems(): package_id += 1 - package, emails = pm.parse(v, package_manager_id, package_id) - package_data.write(stringify(package) + '\n') + package, emails = pm.parse(v, package_id) + package_stream.write(stringify(package)) for email in emails: - email_data.write(stringify(email) + '\n') - package_data.seek(0) - email_data.seek(0) + email_stream.write(stringify(email)) + package_stream.seek(0) + email_stream.seek(0) - cursor.copy_from(package_data, 'packages', columns=["id", "package_manager_id", "name", "description"]) - cursor.copy_from(email_data, 'package_emails', columns=["package_id", "email"]) - conn.commit() + cursor.copy_from(package_stream, 'packages', columns=["id", "package_manager_id", "name", "description", "mtime"]) + cursor.copy_from(email_stream, 'package_emails', columns=["package_id", "email"]) def update_database(SQL): pass - def parse_args(argv): p = argparse.ArgumentParser() p.add_argument( 'if_modified_since' @@ -98,9 +107,19 @@ def parse_args(argv): ) return p.parse_args(argv) - def main(argv=sys.argv): pm_arg = parse_args(argv[1:]).package_manager if pm_arg == 'npm': pm = NPM() - insert_catalog_for(pm) + + conn = psycopg2.connect(host='localhost', database='gratipay') + cursor = conn.cursor() + + if package_manager_exists(pm, cursor): + print("This package manager already exists!") + else: + package_manager_id = insert_package_manager(pm, cursor) + setattr(pm, 'id', package_manager_id) + insert_catalog_for(pm, cursor) + + conn.commit() diff --git a/sql/branch.sql b/sql/branch.sql index bb173f1e6b..b37ca54c54 100644 --- a/sql/branch.sql +++ b/sql/branch.sql @@ -22,6 +22,6 @@ BEGIN; , package_id bigint NOT NULL REFERENCES packages ON UPDATE CASCADE ON DELETE RESTRICT , email text NOT NULL , UNIQUE (package_id, email) - ) + ); END; From e5270076ef43cd54680f80c6c1fa01c3a3c29163 Mon Sep 17 00:00:00 2001 From: aandis Date: Mon, 24 Oct 2016 00:04:02 +0530 Subject: [PATCH 10/73] Make sure it's a package before inserting. --- gratipay/chomp/__init__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index 39d3c507d1..f98544419f 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -83,11 +83,12 @@ def insert_catalog_for(pm, cursor): package_id = 0 # Mass assign ids so they are usable during email insertion package_stream, email_stream = StringIO(), StringIO() for k, v in catalog.iteritems(): - package_id += 1 - package, emails = pm.parse(v, package_id) - package_stream.write(stringify(package)) - for email in emails: - email_stream.write(stringify(email)) + if type(v) is dict: + package_id += 1 + package, emails = pm.parse(v, package_id) + package_stream.write(stringify(package)) + for email in emails: + email_stream.write(stringify(email)) package_stream.seek(0) email_stream.seek(0) From 076458b609b07aa185423c25854b89cddd2e813e Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 09:23:50 -0400 Subject: [PATCH 11/73] Prune dead function --- gratipay/chomp/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index f98544419f..32f84b07bb 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -95,9 +95,6 @@ def insert_catalog_for(pm, cursor): cursor.copy_from(package_stream, 'packages', columns=["id", "package_manager_id", "name", "description", "mtime"]) cursor.copy_from(email_stream, 'package_emails', columns=["package_id", "email"]) -def update_database(SQL): - pass - def parse_args(argv): p = argparse.ArgumentParser() p.add_argument( 'if_modified_since' From ca5f81fc4d3fdf55295e0df106a9351539716af5 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 09:26:58 -0400 Subject: [PATCH 12/73] Clean up a couple obvious bugs --- gratipay/models/package.py | 2 +- gratipay/wireup.py | 2 +- tests/py/test_chomp.py | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 tests/py/test_chomp.py diff --git a/gratipay/models/package.py b/gratipay/models/package.py index af5b0be893..2113de3ab5 100644 --- a/gratipay/models/package.py +++ b/gratipay/models/package.py @@ -4,7 +4,7 @@ class Package(orm.Model): - typ_name = 'packages'; + typname = 'packages'; @classmethod def from_platform_and_name(cls, platform, name): diff --git a/gratipay/wireup.py b/gratipay/wireup.py index a64f9d2a8b..28f0b9db3b 100644 --- a/gratipay/wireup.py +++ b/gratipay/wireup.py @@ -29,7 +29,7 @@ from gratipay.elsewhere.openstreetmap import OpenStreetMap from gratipay.elsewhere.twitter import Twitter from gratipay.elsewhere.venmo import Venmo -from gratipay.models.npm_package import Package +from gratipay.models.package import Package from gratipay.models.account_elsewhere import AccountElsewhere from gratipay.models.community import Community from gratipay.models.country import Country diff --git a/tests/py/test_chomp.py b/tests/py/test_chomp.py new file mode 100644 index 0000000000..a626ee2d3f --- /dev/null +++ b/tests/py/test_chomp.py @@ -0,0 +1,7 @@ +from gratipay.testing import Harness + + +class Tests(Harness): + + def test_insert_catalog_for(self): + pass From 9d3ee685e5a326f33d6937aa79130aa626512e1d Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 09:58:27 -0400 Subject: [PATCH 13/73] Write a test for insert_catalog_for --- gratipay/chomp/__init__.py | 3 --- gratipay/models/package.py | 12 +++++++++--- tests/py/test_chomp.py | 25 +++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index 32f84b07bb..8337f55efd 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -6,7 +6,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals from collections import OrderedDict from io import StringIO -from gratipay.utils import markdown import argparse import sys @@ -48,8 +47,6 @@ def parse(self, package, package_id): return out, emails def fetch_catalog(self): - # import json - # return json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"robdel12","email":"Robertdeluca19@gmail.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Robert DeLuca"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') r = requests.get('https://registry.npmjs.com/-/all', verify=False) r.raise_for_status() return r.json() diff --git a/gratipay/models/package.py b/gratipay/models/package.py index 2113de3ab5..b28c08eafa 100644 --- a/gratipay/models/package.py +++ b/gratipay/models/package.py @@ -7,6 +7,12 @@ class Package(orm.Model): typname = 'packages'; @classmethod - def from_platform_and_name(cls, platform, name): - return cls.db.one("SELECT packages.*::packages from packages where " - "platform=%s and name=%s;", (platform, name,)) + def from_names(cls, package_manager_name, package_name): + return cls.db.one(""" + + SELECT p.*::packages + FROM packages p + JOIN package_managers pm ON p.package_manager_id = pm.id + WHERE pm.name=%s AND p.name=%s + + """, (package_manager_name, package_name)) diff --git a/tests/py/test_chomp.py b/tests/py/test_chomp.py index a626ee2d3f..39f7305ac9 100644 --- a/tests/py/test_chomp.py +++ b/tests/py/test_chomp.py @@ -1,7 +1,28 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import json + +import mock + +from gratipay import chomp from gratipay.testing import Harness +from gratipay.models.package import Package + + +A11Y_ANNOUNCER = json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"robdel12","email":"Robertdeluca19@gmail.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Robert DeLuca"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') class Tests(Harness): - def test_insert_catalog_for(self): - pass + # icf - insert_catalog_for + + @mock.patch('gratipay.chomp.NPM.fetch_catalog') + def test_icf_inserts_catalog_for(self, fetch_catalog): + fetch_catalog.return_value = A11Y_ANNOUNCER + with self.db.get_cursor() as cursor: + pm = chomp.NPM() + package_manager_id = chomp.insert_package_manager(pm, cursor) + setattr(pm, 'id', package_manager_id) + chomp.insert_catalog_for(pm, cursor) + p = Package.from_names('npm', 'a11y-announcer') + assert p.name == 'a11y-announcer' From 872c1495c0cbdbf8528b6a6a31e0f2adecfe122b Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 11:42:18 -0400 Subject: [PATCH 14/73] Separate cursor usage from catalog reading --- gratipay/chomp/__init__.py | 13 +++++++++---- tests/py/test_chomp.py | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index 8337f55efd..abbcdfd427 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -53,6 +53,7 @@ def fetch_catalog(self): def stringify(obj): + # XXX What if there is a `\t` in `v`? return '\t'.join([v for k, v in obj.iteritems()]) + '\n' def insert_package_manager(pm, cursor): @@ -72,7 +73,7 @@ def package_manager_exists(pm, cursor): """, (pm.name,)) return cursor.fetchone() -def insert_catalog_for(pm, cursor): +def read_catalog_for(pm, cursor): """Insert all packages for a given package manager. """ catalog = pm.fetch_catalog() @@ -88,9 +89,12 @@ def insert_catalog_for(pm, cursor): email_stream.write(stringify(email)) package_stream.seek(0) email_stream.seek(0) + return package_stream, email_stream - cursor.copy_from(package_stream, 'packages', columns=["id", "package_manager_id", "name", "description", "mtime"]) - cursor.copy_from(email_stream, 'package_emails', columns=["package_id", "email"]) +def copy_into_db(cursor, package_stream, email_stream): + cursor.copy_from(package_stream, 'packages', + columns=["id", "package_manager_id", "name", "description", "mtime"]) + cursor.copy_from(email_stream, 'package_emails', columns=["package_id", "email"]) def parse_args(argv): p = argparse.ArgumentParser() @@ -115,6 +119,7 @@ def main(argv=sys.argv): else: package_manager_id = insert_package_manager(pm, cursor) setattr(pm, 'id', package_manager_id) - insert_catalog_for(pm, cursor) + packages, emails = read_catalog_for(pm) + copy_into_db(cursor, packages, emails) conn.commit() diff --git a/tests/py/test_chomp.py b/tests/py/test_chomp.py index 39f7305ac9..de6071b3ce 100644 --- a/tests/py/test_chomp.py +++ b/tests/py/test_chomp.py @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import json +from cStringIO import StringIO import mock @@ -9,20 +10,44 @@ from gratipay.models.package import Package -A11Y_ANNOUNCER = json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"robdel12","email":"Robertdeluca19@gmail.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Robert DeLuca"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') +A11Y_ANNOUNCER = json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"alice","email":"alice@example.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Alice Hacker"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') class Tests(Harness): - # icf - insert_catalog_for + # rcf - read_catalog_for @mock.patch('gratipay.chomp.NPM.fetch_catalog') - def test_icf_inserts_catalog_for(self, fetch_catalog): + def test_rcf_reads_catalog_for(self, fetch_catalog): fetch_catalog.return_value = A11Y_ANNOUNCER with self.db.get_cursor() as cursor: pm = chomp.NPM() package_manager_id = chomp.insert_package_manager(pm, cursor) setattr(pm, 'id', package_manager_id) - chomp.insert_catalog_for(pm, cursor) - p = Package.from_names('npm', 'a11y-announcer') - assert p.name == 'a11y-announcer' + packages, emails = chomp.read_catalog_for(pm, cursor) + assert packages.read() == '\t'.join([ '1' + , str(package_manager_id) + , 'a11y-announcer' + , 'An accessible ember route change announcer' + , '2016-08-13T23:03:37.135Z' + ]) + '\n' + assert emails.read() == '\t'.join([ '1', 'alice@example.com']) + '\n' + + + # cid - copy_into_db + + @mock.patch('gratipay.chomp.NPM.fetch_catalog') + def test_cid_copies_to_database(self, fetch_catalog): + with self.db.get_cursor() as cursor: + pm = chomp.NPM() + package_manager_id = chomp.insert_package_manager(pm, cursor) + packages = StringIO('\t'.join([ '0' + , str(package_manager_id) + , 'foobar' + , 'Foos barringly' + , '2016-08-13T23:03:37.135Z' + ])) + emails = StringIO('\t'.join(['0', 'alice@example.com'])) + chomp.copy_into_db(cursor, packages, emails) + p = Package.from_names('npm', 'foobar') + assert p.name == 'foobar' From dbca36dd136e0a63c5f3921d052a599325b66114 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 12:06:24 -0400 Subject: [PATCH 15/73] Remove need to mock in tests --- gratipay/chomp/__init__.py | 29 ++++++++++++++--------------- tests/py/test_chomp.py | 15 +++++---------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index abbcdfd427..c0e82d1ac1 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -73,22 +73,20 @@ def package_manager_exists(pm, cursor): """, (pm.name,)) return cursor.fetchone() -def read_catalog_for(pm, cursor): +def extract_info_from_catalog(pm, catalog): """Insert all packages for a given package manager. """ - catalog = pm.fetch_catalog() - if catalog: - package_id = 0 # Mass assign ids so they are usable during email insertion - package_stream, email_stream = StringIO(), StringIO() - for k, v in catalog.iteritems(): - if type(v) is dict: - package_id += 1 - package, emails = pm.parse(v, package_id) - package_stream.write(stringify(package)) - for email in emails: - email_stream.write(stringify(email)) - package_stream.seek(0) - email_stream.seek(0) + package_id = 0 # Mass assign ids so they are usable during email insertion + package_stream, email_stream = StringIO(), StringIO() + for k, v in catalog.iteritems(): + if type(v) is dict: + package_id += 1 + package, emails = pm.parse(v, package_id) + package_stream.write(stringify(package)) + for email in emails: + email_stream.write(stringify(email)) + package_stream.seek(0) + email_stream.seek(0) return package_stream, email_stream def copy_into_db(cursor, package_stream, email_stream): @@ -119,7 +117,8 @@ def main(argv=sys.argv): else: package_manager_id = insert_package_manager(pm, cursor) setattr(pm, 'id', package_manager_id) - packages, emails = read_catalog_for(pm) + catalog = pm.fetch_catalog() + packages, emails = extract_info_from_catalog(catalog) copy_into_db(cursor, packages, emails) conn.commit() diff --git a/tests/py/test_chomp.py b/tests/py/test_chomp.py index de6071b3ce..d914abff8a 100644 --- a/tests/py/test_chomp.py +++ b/tests/py/test_chomp.py @@ -3,28 +3,24 @@ import json from cStringIO import StringIO -import mock - from gratipay import chomp from gratipay.testing import Harness from gratipay.models.package import Package -A11Y_ANNOUNCER = json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"alice","email":"alice@example.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Alice Hacker"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') +CATALOG = json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"alice","email":"alice@example.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Alice Hacker"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') class Tests(Harness): - # rcf - read_catalog_for + # eifc - extract_info_from_catalog - @mock.patch('gratipay.chomp.NPM.fetch_catalog') - def test_rcf_reads_catalog_for(self, fetch_catalog): - fetch_catalog.return_value = A11Y_ANNOUNCER + def test_eifc_extracts_info_from_catalog(self): with self.db.get_cursor() as cursor: pm = chomp.NPM() package_manager_id = chomp.insert_package_manager(pm, cursor) setattr(pm, 'id', package_manager_id) - packages, emails = chomp.read_catalog_for(pm, cursor) + packages, emails = chomp.extract_info_from_catalog(pm, CATALOG) assert packages.read() == '\t'.join([ '1' , str(package_manager_id) , 'a11y-announcer' @@ -36,8 +32,7 @@ def test_rcf_reads_catalog_for(self, fetch_catalog): # cid - copy_into_db - @mock.patch('gratipay.chomp.NPM.fetch_catalog') - def test_cid_copies_to_database(self, fetch_catalog): + def test_cid_copies_into_database(self): with self.db.get_cursor() as cursor: pm = chomp.NPM() package_manager_id = chomp.insert_package_manager(pm, cursor) From f2ab400a13dda6c5e4817bd0b059178501b2152c Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 13:08:09 -0400 Subject: [PATCH 16/73] Denormalize package_manager This is simpler and is also parallel to how we currently do accounts elsewhere. I want to make this system as similar as possible to that system so that we can reuse patterns and not have to think as much. --- gratipay/chomp/__init__.py | 17 +++-------------- gratipay/models/package.py | 7 +++---- sql/branch.sql | 9 ++------- tests/py/test_chomp.py | 15 +++++---------- 4 files changed, 13 insertions(+), 35 deletions(-) diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py index c0e82d1ac1..5bd944d803 100644 --- a/gratipay/chomp/__init__.py +++ b/gratipay/chomp/__init__.py @@ -24,7 +24,7 @@ def parse(self, package, package_id): """ out = OrderedDict() out['id'] = str(package_id) - out['package_manager_id'] = str(self.id) + out['package_manager'] = self.name out['name'] = package['name'] out['description'] = package['description'] if 'description' in package else '' if 'time' in package: @@ -56,15 +56,6 @@ def stringify(obj): # XXX What if there is a `\t` in `v`? return '\t'.join([v for k, v in obj.iteritems()]) + '\n' -def insert_package_manager(pm, cursor): - """Insert a new package manager returning it's id. - """ - cursor.execute(""" - INSERT INTO package_managers (name) - VALUES (%s) RETURNING id - """, (pm.name,)) - return cursor.fetchone()[0] - def package_manager_exists(pm, cursor): cursor.execute(""" SELECT package_managers.id @@ -74,7 +65,7 @@ def package_manager_exists(pm, cursor): return cursor.fetchone() def extract_info_from_catalog(pm, catalog): - """Insert all packages for a given package manager. + """Extract info from the catalog for the given package manager. """ package_id = 0 # Mass assign ids so they are usable during email insertion package_stream, email_stream = StringIO(), StringIO() @@ -91,7 +82,7 @@ def extract_info_from_catalog(pm, catalog): def copy_into_db(cursor, package_stream, email_stream): cursor.copy_from(package_stream, 'packages', - columns=["id", "package_manager_id", "name", "description", "mtime"]) + columns=["id", "package_manager", "name", "description", "mtime"]) cursor.copy_from(email_stream, 'package_emails', columns=["package_id", "email"]) def parse_args(argv): @@ -115,8 +106,6 @@ def main(argv=sys.argv): if package_manager_exists(pm, cursor): print("This package manager already exists!") else: - package_manager_id = insert_package_manager(pm, cursor) - setattr(pm, 'id', package_manager_id) catalog = pm.fetch_catalog() packages, emails = extract_info_from_catalog(catalog) copy_into_db(cursor, packages, emails) diff --git a/gratipay/models/package.py b/gratipay/models/package.py index b28c08eafa..6dd40164f5 100644 --- a/gratipay/models/package.py +++ b/gratipay/models/package.py @@ -7,12 +7,11 @@ class Package(orm.Model): typname = 'packages'; @classmethod - def from_names(cls, package_manager_name, package_name): + def from_names(cls, package_manager, name): return cls.db.one(""" SELECT p.*::packages FROM packages p - JOIN package_managers pm ON p.package_manager_id = pm.id - WHERE pm.name=%s AND p.name=%s + WHERE p.package_manager=%s AND p.name=%s - """, (package_manager_name, package_name)) + """, (package_manager, name)) diff --git a/sql/branch.sql b/sql/branch.sql index b37ca54c54..5bc4e53063 100644 --- a/sql/branch.sql +++ b/sql/branch.sql @@ -1,20 +1,15 @@ BEGIN; - CREATE TABLE package_managers - ( id bigserial PRIMARY KEY - , name text NOT NULL UNIQUE - ); - CREATE TABLE packages ( id bigserial PRIMARY KEY - , package_manager_id bigint NOT NULL REFERENCES package_managers ON UPDATE CASCADE ON DELETE RESTRICT + , package_manager text NOT NULL , name text NOT NULL , description text NOT NULL DEFAULT '' , long_description text NOT NULL DEFAULT '' , long_description_raw text NOT NULL DEFAULT '' , long_description_type text NOT NULL DEFAULT '' , mtime timestamp with time zone NOT NULL - , UNIQUE (package_manager_id, name) + , UNIQUE (package_manager, name) ); CREATE TABLE package_emails diff --git a/tests/py/test_chomp.py b/tests/py/test_chomp.py index d914abff8a..23027a71fc 100644 --- a/tests/py/test_chomp.py +++ b/tests/py/test_chomp.py @@ -1,7 +1,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import json -from cStringIO import StringIO +from io import StringIO from gratipay import chomp from gratipay.testing import Harness @@ -16,13 +16,10 @@ class Tests(Harness): # eifc - extract_info_from_catalog def test_eifc_extracts_info_from_catalog(self): - with self.db.get_cursor() as cursor: - pm = chomp.NPM() - package_manager_id = chomp.insert_package_manager(pm, cursor) - setattr(pm, 'id', package_manager_id) - packages, emails = chomp.extract_info_from_catalog(pm, CATALOG) + pm = chomp.NPM() + packages, emails = chomp.extract_info_from_catalog(pm, CATALOG) assert packages.read() == '\t'.join([ '1' - , str(package_manager_id) + , 'npm' , 'a11y-announcer' , 'An accessible ember route change announcer' , '2016-08-13T23:03:37.135Z' @@ -34,10 +31,8 @@ def test_eifc_extracts_info_from_catalog(self): def test_cid_copies_into_database(self): with self.db.get_cursor() as cursor: - pm = chomp.NPM() - package_manager_id = chomp.insert_package_manager(pm, cursor) packages = StringIO('\t'.join([ '0' - , str(package_manager_id) + , 'npm' , 'foobar' , 'Foos barringly' , '2016-08-13T23:03:37.135Z' From f2b5578a6ce906e513f1394402e07fbbaf263c18 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 14:32:42 -0400 Subject: [PATCH 17/73] Here's a version that uses ijson --- ijson-version.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 ijson-version.py diff --git a/ijson-version.py b/ijson-version.py new file mode 100644 index 0000000000..e74a86700f --- /dev/null +++ b/ijson-version.py @@ -0,0 +1,74 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys +import time +import ijson.backends.yajl2_cffi as ijson +from collections import OrderedDict + + +path = sys.argv[1] +parser = ijson.parse(open(path)) +log = lambda *a: print(*a, file=sys.stderr) + + +def emit(package, emails): + """Takes a package and returns a serialization suitable for COPY. + """ + if not package or package['name'].startswith('_'): + log('skipping', package) + return 0 + + package['emails'] = emails + out = [] + for k,v in package.iteritems(): + if type(v) is unicode: + v = v.encode('utf8') + else: + v = str(v) + out.append(v) + print(b'\t'.join(out)) + return 1 + + +def main(): + start = time.time() + package = emails = None + nprocessed = 0 + + def log_stats(): + log("processed {} packages in {:3.0f} seconds" + .format(nprocessed, time.time() - start)) + + for prefix, event, value in parser: + + prefix = prefix.decode('utf8') + if type(value) is str: + value = value.decode('utf8') + + if not prefix and event == 'map_key': + + # Flush the previous package. We count on the first package being garbage. + processed = emit(package, emails) + nprocessed += processed + if processed and not(nprocessed % 1000): + log_stats() + + # Start a new package. + package = OrderedDict({'name': value}) + emails = [] + + key = lambda k: package['name'] + '.' + k + + if event == 'string': + if prefix == key('description'): + package['description'] = value + elif prefix == key('license'): + package['license'] = value + elif prefix == key('time.modified'): + package['mtime'] = value + elif prefix in (key('author.item.email'), key('maintainers.item.email')): + emails.append(value) + + +if __name__ == '__main__': + main() From 776dd3cf5e0d758ab1db882ede72eaeb76188e63 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 14:37:14 -0400 Subject: [PATCH 18/73] Log stats at the end, too, for final count/time --- ijson-version.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ijson-version.py b/ijson-version.py index e74a86700f..dfa5dc5fa9 100644 --- a/ijson-version.py +++ b/ijson-version.py @@ -69,6 +69,8 @@ def log_stats(): elif prefix in (key('author.item.email'), key('maintainers.item.email')): emails.append(value) + log_stats() + if __name__ == '__main__': main() From 16d52fc5d08a83f2e992a12093835fd6b938f857 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 16:42:31 -0400 Subject: [PATCH 19/73] Success! We have data from npm getting into pg! --- bin/npm.py | 116 +++++++++++++++++++++++++++++++++++++++++++++++ bin/npm.sh | 8 ++++ ijson-version.py | 76 ------------------------------- sql/branch.sql | 11 +---- 4 files changed, 126 insertions(+), 85 deletions(-) create mode 100755 bin/npm.py create mode 100644 bin/npm.sh delete mode 100644 ijson-version.py diff --git a/bin/npm.py b/bin/npm.py new file mode 100755 index 0000000000..918993425c --- /dev/null +++ b/bin/npm.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +from __future__ import absolute_import, division, print_function, unicode_literals + +import argparse +import sys +import time +from collections import OrderedDict +from psycopg2.extensions import adapt + +import ijson.backends.yajl2_cffi as ijson + + +log = lambda *a: print(*a, file=sys.stderr) + + +def serialize_one(package): + """Takes a package and returns a serialization suitable for COPY. + """ + if not package or package['name'].startswith('_'): + log('skipping', package) + return 0 + + out = [] + for k,v in package.iteritems(): + if type(v) is unicode: + v = v.replace('\n', r'\n') + v = v.replace('\r', r'\r') + v = v.encode('utf8') + out.append(adapt(v).getquoted()) + if type(v) is list: + # Gah. I am a dog. I have no idea what I'm doing. O.O + stripped = out[-1][len('ARRAY['):-1] + out[-1] = b'{' + stripped + b'}' + print(b'\t'.join(out)) + return 1 + + +def serialize(args): + """ + """ + path = args.path + parser = ijson.parse(open(path)) + start = time.time() + package = None + nprocessed = 0 + + def log_stats(): + log("processed {} packages in {:3.0f} seconds" + .format(nprocessed, time.time() - start)) + + for prefix, event, value in parser: + + prefix = prefix.decode('utf8') + if type(value) is str: + value = value.decode('utf8') + + if not prefix and event == 'map_key': + + # Flush the current package. We count on the first package being garbage. + processed = serialize_one(package) + nprocessed += processed + if processed and not(nprocessed % 1000): + log_stats() + + if nprocessed == 4444: + break # XXX + + # Start a new package. + package = OrderedDict([ ('package_manager', 'npm') + , ('name', value) + , ('description', '') + , ('emails', []) + ]) + + key = lambda k: package['name'] + '.' + k + + if event == 'string': + if prefix == key('description'): + package['description'] = value + elif prefix in (key('author.item.email'), key('maintainers.item.email')): + package['emails'].append(value) + + log_stats() + + +def upsert(args): + from gratipay import wireup + db = wireup.db(wireup.env()) + fp = open(args.path) + with db.get_cursor() as cursor: + cursor.copy_from( fp + , 'packages' + , columns=['package_manager', 'name', 'description', 'emails'] + ) + + +def parse_args(argv): + p = argparse.ArgumentParser() + p.add_argument('command', choices=['serialize', 'upsert']) + p.add_argument('path', help="the path to the input file") + p.add_argument( '-i', '--if_modified_since' + , help='a number of minutes in the past, past which we would like to see new ' + 'updates (only meaningful for `serialize`; -1 means all!)' + , type=int + , default=-1 + ) + return p.parse_args(argv) + + +def main(argv=sys.argv): + args = parse_args(argv[1:]) + globals()[args.command](args) + + +if __name__ == '__main__': + main() diff --git a/bin/npm.sh b/bin/npm.sh new file mode 100644 index 0000000000..0c76ca1379 --- /dev/null +++ b/bin/npm.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +set -e +cd "`dirname $0`/.." + +wget https://registry.npmjs.com/-/all +./env/bin/python ./bin/npm.py serialize all > serialized +./env/bin/python ./bin/npm.py upsert serialized diff --git a/ijson-version.py b/ijson-version.py deleted file mode 100644 index dfa5dc5fa9..0000000000 --- a/ijson-version.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import sys -import time -import ijson.backends.yajl2_cffi as ijson -from collections import OrderedDict - - -path = sys.argv[1] -parser = ijson.parse(open(path)) -log = lambda *a: print(*a, file=sys.stderr) - - -def emit(package, emails): - """Takes a package and returns a serialization suitable for COPY. - """ - if not package or package['name'].startswith('_'): - log('skipping', package) - return 0 - - package['emails'] = emails - out = [] - for k,v in package.iteritems(): - if type(v) is unicode: - v = v.encode('utf8') - else: - v = str(v) - out.append(v) - print(b'\t'.join(out)) - return 1 - - -def main(): - start = time.time() - package = emails = None - nprocessed = 0 - - def log_stats(): - log("processed {} packages in {:3.0f} seconds" - .format(nprocessed, time.time() - start)) - - for prefix, event, value in parser: - - prefix = prefix.decode('utf8') - if type(value) is str: - value = value.decode('utf8') - - if not prefix and event == 'map_key': - - # Flush the previous package. We count on the first package being garbage. - processed = emit(package, emails) - nprocessed += processed - if processed and not(nprocessed % 1000): - log_stats() - - # Start a new package. - package = OrderedDict({'name': value}) - emails = [] - - key = lambda k: package['name'] + '.' + k - - if event == 'string': - if prefix == key('description'): - package['description'] = value - elif prefix == key('license'): - package['license'] = value - elif prefix == key('time.modified'): - package['mtime'] = value - elif prefix in (key('author.item.email'), key('maintainers.item.email')): - emails.append(value) - - log_stats() - - -if __name__ == '__main__': - main() diff --git a/sql/branch.sql b/sql/branch.sql index 5bc4e53063..9c0a951dd2 100644 --- a/sql/branch.sql +++ b/sql/branch.sql @@ -4,19 +4,12 @@ BEGIN; ( id bigserial PRIMARY KEY , package_manager text NOT NULL , name text NOT NULL - , description text NOT NULL DEFAULT '' + , description text NOT NULL , long_description text NOT NULL DEFAULT '' , long_description_raw text NOT NULL DEFAULT '' , long_description_type text NOT NULL DEFAULT '' - , mtime timestamp with time zone NOT NULL + , emails text[] NOT NULL , UNIQUE (package_manager, name) ); - CREATE TABLE package_emails - ( id bigserial PRIMARY KEY - , package_id bigint NOT NULL REFERENCES packages ON UPDATE CASCADE ON DELETE RESTRICT - , email text NOT NULL - , UNIQUE (package_id, email) - ); - END; From e201a3c8a07be0f0e8eff88972b5a2601fe7b87d Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 16:44:38 -0400 Subject: [PATCH 20/73] Can we just call it readme? Let's call it readme. --- sql/branch.sql | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/branch.sql b/sql/branch.sql index 9c0a951dd2..8ec184e0f6 100644 --- a/sql/branch.sql +++ b/sql/branch.sql @@ -1,14 +1,14 @@ BEGIN; CREATE TABLE packages - ( id bigserial PRIMARY KEY - , package_manager text NOT NULL - , name text NOT NULL - , description text NOT NULL - , long_description text NOT NULL DEFAULT '' - , long_description_raw text NOT NULL DEFAULT '' - , long_description_type text NOT NULL DEFAULT '' - , emails text[] NOT NULL + ( id bigserial PRIMARY KEY + , package_manager text NOT NULL + , name text NOT NULL + , description text NOT NULL + , readme text NOT NULL DEFAULT '' + , readme_raw text NOT NULL DEFAULT '' + , readme_type text NOT NULL DEFAULT '' + , emails text[] NOT NULL , UNIQUE (package_manager, name) ); From 7dfc975fe1a7098e733d450ffae71d495b4760d3 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 17:13:47 -0400 Subject: [PATCH 21/73] I think it worked! :O --- bin/npm.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/bin/npm.py b/bin/npm.py index 918993425c..5828850cda 100755 --- a/bin/npm.py +++ b/bin/npm.py @@ -62,7 +62,7 @@ def log_stats(): if processed and not(nprocessed % 1000): log_stats() - if nprocessed == 4444: + if nprocessed == 5555: break # XXX # Start a new package. @@ -88,10 +88,30 @@ def upsert(args): db = wireup.db(wireup.env()) fp = open(args.path) with db.get_cursor() as cursor: + cursor.run("CREATE TEMP TABLE updates (LIKE packages INCLUDING ALL) ON COMMIT DROP") cursor.copy_from( fp - , 'packages' + , 'updates' , columns=['package_manager', 'name', 'description', 'emails'] ) + cursor.run(""" + + WITH updated AS ( + UPDATE packages p + SET package_manager = u.package_manager + , description = u.description + , emails = u.emails + FROM updates u + WHERE p.name = u.name + RETURNING p.name + ) + INSERT INTO packages(package_manager, name, description, emails) + SELECT package_manager, name, description, emails + FROM updates u LEFT JOIN updated USING(name) + WHERE updated.name IS NULL + GROUP BY u.package_manager, u.name, u.description, u.emails + + """) + def parse_args(argv): From e55c58ea9f73e918bd193ce98620df038e70278b Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 17:15:17 -0400 Subject: [PATCH 22/73] Let's give credit --- bin/npm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/npm.py b/bin/npm.py index 5828850cda..1a53422e7b 100755 --- a/bin/npm.py +++ b/bin/npm.py @@ -88,6 +88,7 @@ def upsert(args): db = wireup.db(wireup.env()) fp = open(args.path) with db.get_cursor() as cursor: + # http://tapoueh.org/blog/2013/03/15-batch-update.html cursor.run("CREATE TEMP TABLE updates (LIKE packages INCLUDING ALL) ON COMMIT DROP") cursor.copy_from( fp , 'updates' From e52e0e4550cb20fca7b27691fdf44e4972c7d790 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Mon, 24 Oct 2016 22:36:06 -0400 Subject: [PATCH 23/73] Test of tantalizing closeness! --- bin/npm.py | 4 +++- tests/py/test_chomp.py | 43 ------------------------------------------ tests/py/test_npm.py | 39 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 44 deletions(-) delete mode 100644 tests/py/test_chomp.py create mode 100644 tests/py/test_npm.py diff --git a/bin/npm.py b/bin/npm.py index 1a53422e7b..b77d797aaf 100755 --- a/bin/npm.py +++ b/bin/npm.py @@ -80,6 +80,7 @@ def log_stats(): elif prefix in (key('author.item.email'), key('maintainers.item.email')): package['emails'].append(value) + nprocessed += serialize_one(package) log_stats() @@ -112,7 +113,8 @@ def upsert(args): GROUP BY u.package_manager, u.name, u.description, u.emails """) - + log(cursor.all('select * from packages')) + log(db.all('select * from packages')) def parse_args(argv): diff --git a/tests/py/test_chomp.py b/tests/py/test_chomp.py deleted file mode 100644 index 23027a71fc..0000000000 --- a/tests/py/test_chomp.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import json -from io import StringIO - -from gratipay import chomp -from gratipay.testing import Harness -from gratipay.models.package import Package - - -CATALOG = json.loads('{"a11y-announcer":{"name":"a11y-announcer","description":"An accessible ember route change announcer","dist-tags":{"latest":"1.0.2"},"maintainers":[{"name":"alice","email":"alice@example.com"}],"homepage":"https://github.com/ember-a11y/a11y-announcer#readme","keywords":["ember-addon","ember accessibility","ember router","a11y-announcer"],"repository":{"type":"git","url":"git+https://github.com/ember-a11y/a11y-announcer.git"},"author":{"name":"Alice Hacker"},"bugs":{"url":"https://github.com/ember-a11y/a11y-announcer/issues"},"license":"MIT","readmeFilename":"README.md","users":{"jalcine":true,"unwiredbrain":true},"time":{"modified":"2016-08-13T23:03:37.135Z"},"versions":{"1.0.2":"latest"}}}') - - -class Tests(Harness): - - # eifc - extract_info_from_catalog - - def test_eifc_extracts_info_from_catalog(self): - pm = chomp.NPM() - packages, emails = chomp.extract_info_from_catalog(pm, CATALOG) - assert packages.read() == '\t'.join([ '1' - , 'npm' - , 'a11y-announcer' - , 'An accessible ember route change announcer' - , '2016-08-13T23:03:37.135Z' - ]) + '\n' - assert emails.read() == '\t'.join([ '1', 'alice@example.com']) + '\n' - - - # cid - copy_into_db - - def test_cid_copies_into_database(self): - with self.db.get_cursor() as cursor: - packages = StringIO('\t'.join([ '0' - , 'npm' - , 'foobar' - , 'Foos barringly' - , '2016-08-13T23:03:37.135Z' - ])) - emails = StringIO('\t'.join(['0', 'alice@example.com'])) - chomp.copy_into_db(cursor, packages, emails) - p = Package.from_names('npm', 'foobar') - assert p.name == 'foobar' diff --git a/tests/py/test_npm.py b/tests/py/test_npm.py new file mode 100644 index 0000000000..933f0765dc --- /dev/null +++ b/tests/py/test_npm.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +from subprocess import Popen, PIPE + +from gratipay.testing import Harness + + +CATALOG = b'''\ +{ "_updated": 1234567890 +, "testing-package": + { "name":"testing-package" + , "description":"A package for testing" + , "maintainers":[{"email":"alice@example.com"}] + , "author": {"email":"bob@example.com"} + , "time":{"modified":"2015-09-12T03:03:03.135Z"} + } + } +''' + + +class Tests(Harness): + + def test_packages_starts_empty(self): + assert self.db.all('select * from packages') == [] + + + def test_npm_inserts_packages(self): + serialize = Popen( ('env/bin/python', 'bin/npm.py', 'serialize', '/dev/stdin') + , stdin=PIPE + , stdout=PIPE + ) + upsert = Popen( ('env/bin/python', 'bin/npm.py', 'upsert', '/dev/stdin') + , stdin=serialize.stdout + , stdout=PIPE + ) + serialize.communicate(CATALOG) + serialize.wait() + upsert.wait() + assert self.db.one('select * from packages').name == 'testing-package' From 13f5f5ffa089e34689418b6f6a6a7bf31d61d260 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 09:53:15 -0400 Subject: [PATCH 24/73] Clean up handling of escape/quote characters --- bin/npm.py | 89 ++++++++++++++++++++++---------------------- tests/py/test_npm.py | 68 +++++++++++++++++++++------------ 2 files changed, 89 insertions(+), 68 deletions(-) diff --git a/bin/npm.py b/bin/npm.py index b77d797aaf..3e0e967e8f 100755 --- a/bin/npm.py +++ b/bin/npm.py @@ -2,10 +2,9 @@ from __future__ import absolute_import, division, print_function, unicode_literals import argparse +import csv import sys import time -from collections import OrderedDict -from psycopg2.extensions import adapt import ijson.backends.yajl2_cffi as ijson @@ -13,25 +12,33 @@ log = lambda *a: print(*a, file=sys.stderr) -def serialize_one(package): - """Takes a package and returns a serialization suitable for COPY. +def arrayize(seq): + """Given a sequence of str, return a Postgres array literal str. + """ + array = [] + for item in seq: + assert type(item) is str + escaped = item.replace(b'\\', b'\\\\').replace(b'"', b'\\"') + quoted = b'"' + escaped + b'"' + array.append(quoted) + joined = b', '.join(array) + return b'{' + joined + b'}' + + +def serialize_one(out, package): + """Takes a package and emits a serialization suitable for COPY. """ if not package or package['name'].startswith('_'): log('skipping', package) return 0 - out = [] - for k,v in package.iteritems(): - if type(v) is unicode: - v = v.replace('\n', r'\n') - v = v.replace('\r', r'\r') - v = v.encode('utf8') - out.append(adapt(v).getquoted()) - if type(v) is list: - # Gah. I am a dog. I have no idea what I'm doing. O.O - stripped = out[-1][len('ARRAY['):-1] - out[-1] = b'{' + stripped + b'}' - print(b'\t'.join(out)) + row = ( package['package_manager'] + , package['name'] + , package['description'] + , arrayize(package['emails']) + ) + + out.writerow(row) return 1 @@ -43,44 +50,40 @@ def serialize(args): start = time.time() package = None nprocessed = 0 + out = csv.writer(sys.stdout) def log_stats(): log("processed {} packages in {:3.0f} seconds" .format(nprocessed, time.time() - start)) for prefix, event, value in parser: - - prefix = prefix.decode('utf8') - if type(value) is str: - value = value.decode('utf8') - - if not prefix and event == 'map_key': + log(prefix, event, value) + if not prefix and event == b'map_key': # Flush the current package. We count on the first package being garbage. - processed = serialize_one(package) + processed = serialize_one(out, package) nprocessed += processed if processed and not(nprocessed % 1000): log_stats() - if nprocessed == 5555: - break # XXX - # Start a new package. - package = OrderedDict([ ('package_manager', 'npm') - , ('name', value) - , ('description', '') - , ('emails', []) - ]) - - key = lambda k: package['name'] + '.' + k - - if event == 'string': - if prefix == key('description'): + package = { 'package_manager': b'npm' + , 'name': value + , 'description': b'' + , 'emails': [] + } + + key = lambda k: package['name'] + b'.' + k + + if event == b'string': + assert type(value) is unicode # Who knew? Seems to decode only for `string`. + value = value.encode('utf8') + if prefix == key(b'description'): package['description'] = value - elif prefix in (key('author.item.email'), key('maintainers.item.email')): + elif prefix in (key(b'author.email'), key(b'maintainers.item.email')): package['emails'].append(value) - nprocessed += serialize_one(package) + nprocessed += serialize_one(out, package) # Don't forget the last one! log_stats() @@ -89,12 +92,12 @@ def upsert(args): db = wireup.db(wireup.env()) fp = open(args.path) with db.get_cursor() as cursor: + assert cursor.connection.encoding == 'UTF8' + # http://tapoueh.org/blog/2013/03/15-batch-update.html cursor.run("CREATE TEMP TABLE updates (LIKE packages INCLUDING ALL) ON COMMIT DROP") - cursor.copy_from( fp - , 'updates' - , columns=['package_manager', 'name', 'description', 'emails'] - ) + cursor.copy_expert('COPY updates (package_manager, name, description, emails) ' + 'FROM STDIN WITH (FORMAT csv)', fp) cursor.run(""" WITH updated AS ( @@ -113,8 +116,6 @@ def upsert(args): GROUP BY u.package_manager, u.name, u.description, u.emails """) - log(cursor.all('select * from packages')) - log(db.all('select * from packages')) def parse_args(argv): diff --git a/tests/py/test_npm.py b/tests/py/test_npm.py index 933f0765dc..9c6680087c 100644 --- a/tests/py/test_npm.py +++ b/tests/py/test_npm.py @@ -5,17 +5,13 @@ from gratipay.testing import Harness -CATALOG = b'''\ -{ "_updated": 1234567890 -, "testing-package": - { "name":"testing-package" - , "description":"A package for testing" - , "maintainers":[{"email":"alice@example.com"}] - , "author": {"email":"bob@example.com"} - , "time":{"modified":"2015-09-12T03:03:03.135Z"} - } - } -''' +def load(raw): + serialized = Popen( ('env/bin/python', 'bin/npm.py', 'serialize', '/dev/stdin') + , stdin=PIPE, stdout=PIPE + ).communicate(raw)[0] + Popen( ('env/bin/python', 'bin/npm.py', 'upsert', '/dev/stdin') + , stdin=PIPE, stdout=PIPE + ).communicate(serialized)[0] class Tests(Harness): @@ -23,17 +19,41 @@ class Tests(Harness): def test_packages_starts_empty(self): assert self.db.all('select * from packages') == [] - def test_npm_inserts_packages(self): - serialize = Popen( ('env/bin/python', 'bin/npm.py', 'serialize', '/dev/stdin') - , stdin=PIPE - , stdout=PIPE - ) - upsert = Popen( ('env/bin/python', 'bin/npm.py', 'upsert', '/dev/stdin') - , stdin=serialize.stdout - , stdout=PIPE - ) - serialize.communicate(CATALOG) - serialize.wait() - upsert.wait() - assert self.db.one('select * from packages').name == 'testing-package' + load(br''' + { "_updated": 1234567890 + , "testing-package": + { "name":"testing-package" + , "description":"A package for testing" + , "maintainers":[{"email":"alice@example.com"}] + , "author": {"email":"bob@example.com"} + , "time":{"modified":"2015-09-12T03:03:03.135Z"} + } + } + ''') + + package = self.db.one('select * from packages') + assert package.package_manager == 'npm' + assert package.name == 'testing-package' + assert package.description == 'A package for testing' + assert package.name == 'testing-package' + + + def test_npm_handles_quoting(self): + load(br''' + { "_updated": 1234567890 + , "testi\\\"ng-pa\\\"ckage": + { "name":"testi\\\"ng-pa\\\"ckage" + , "description":"A package for \"testing\"" + , "maintainers":[{"email":"alice@\"example\".com"}] + , "author": {"email":"\\\\\"bob\\\\\"@example.com"} + , "time":{"modified":"2015-09-12T03:03:03.135Z"} + } + } + ''') + + package = self.db.one('select * from packages') + assert package.package_manager == 'npm' + assert package.name == r'testi\"ng-pa\"ckage' + assert package.description == 'A package for "testing"' + assert package.emails == ['alice@"example".com', r'\\"bob\\"@example.com'] From 7cf1d6c8a5eb64f800b301391c8dfd43dfe1c843 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 10:05:33 -0400 Subject: [PATCH 25/73] Merge newer work with older --- .travis.yml | 4 +- gratipay/chomp/__init__.py | 113 ------------------ gratipay/models/package.py | 17 --- gratipay/package_managers/__init__.py | 0 .../package_managers/sync.py | 7 +- gratipay/wireup.py | 4 +- setup.py | 2 +- tests/py/{test_npm.py => test_npm_sync.py} | 10 +- 8 files changed, 13 insertions(+), 144 deletions(-) delete mode 100644 gratipay/chomp/__init__.py delete mode 100644 gratipay/models/package.py create mode 100644 gratipay/package_managers/__init__.py rename bin/npm.py => gratipay/package_managers/sync.py (98%) mode change 100755 => 100644 rename tests/py/{test_npm.py => test_npm_sync.py} (88%) diff --git a/.travis.yml b/.travis.yml index f1a36e45b0..3dfdbf82c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,9 @@ branches: - master before_install: - git branch -vv | grep '^*' + - npm install -g marky-markdown + - apt-get install yajl + - pip install ijson cache: directories: - env/bin @@ -14,7 +17,6 @@ install: - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rf env; fi - touch requirements.txt package.json - make env - - npm install -g marky-markdown before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' diff --git a/gratipay/chomp/__init__.py b/gratipay/chomp/__init__.py deleted file mode 100644 index 5bd944d803..0000000000 --- a/gratipay/chomp/__init__.py +++ /dev/null @@ -1,113 +0,0 @@ -"""Process package artifacts. - -The main function is exposed as a console script named `chomp` via setup.py. - -""" -from __future__ import absolute_import, division, print_function, unicode_literals -from collections import OrderedDict -from io import StringIO - -import argparse -import sys -import psycopg2 -import requests - - -class NPM(object): - """Represent npm. - """ - def __init__(self): - self.name = 'npm' - - def parse(self, package, package_id): - """Given an npm package dict, return a dict of info and a list of emails to be imported. - """ - out = OrderedDict() - out['id'] = str(package_id) - out['package_manager'] = self.name - out['name'] = package['name'] - out['description'] = package['description'] if 'description' in package else '' - if 'time' in package: - if type(package['time']) is dict and 'modified' in package['time']: - out['mtime'] = package['time']['modified'] - else: - out['mtime'] = 'now()' - - emails = [] - for key in ('authors', 'maintainers'): - for person in package.get(key, []): - if type(person) is dict: - email = person.get('email') - if email: - emails.append(OrderedDict([ - ('package_id', str(package_id)), - ('email', email) - ])) - - return out, emails - - def fetch_catalog(self): - r = requests.get('https://registry.npmjs.com/-/all', verify=False) - r.raise_for_status() - return r.json() - - -def stringify(obj): - # XXX What if there is a `\t` in `v`? - return '\t'.join([v for k, v in obj.iteritems()]) + '\n' - -def package_manager_exists(pm, cursor): - cursor.execute(""" - SELECT package_managers.id - FROM package_managers - WHERE package_managers.name = (%s) - """, (pm.name,)) - return cursor.fetchone() - -def extract_info_from_catalog(pm, catalog): - """Extract info from the catalog for the given package manager. - """ - package_id = 0 # Mass assign ids so they are usable during email insertion - package_stream, email_stream = StringIO(), StringIO() - for k, v in catalog.iteritems(): - if type(v) is dict: - package_id += 1 - package, emails = pm.parse(v, package_id) - package_stream.write(stringify(package)) - for email in emails: - email_stream.write(stringify(email)) - package_stream.seek(0) - email_stream.seek(0) - return package_stream, email_stream - -def copy_into_db(cursor, package_stream, email_stream): - cursor.copy_from(package_stream, 'packages', - columns=["id", "package_manager", "name", "description", "mtime"]) - cursor.copy_from(email_stream, 'package_emails', columns=["package_id", "email"]) - -def parse_args(argv): - p = argparse.ArgumentParser() - p.add_argument( 'if_modified_since' - , help='a number of minutes in the past, past which we need new updates' - ) - p.add_argument('package_manager' - , help='the name of the package manager to import from' - ) - return p.parse_args(argv) - -def main(argv=sys.argv): - pm_arg = parse_args(argv[1:]).package_manager - if pm_arg == 'npm': - pm = NPM() - - conn = psycopg2.connect(host='localhost', database='gratipay') - cursor = conn.cursor() - - if package_manager_exists(pm, cursor): - print("This package manager already exists!") - else: - catalog = pm.fetch_catalog() - packages, emails = extract_info_from_catalog(catalog) - copy_into_db(cursor, packages, emails) - - conn.commit() diff --git a/gratipay/models/package.py b/gratipay/models/package.py deleted file mode 100644 index 6dd40164f5..0000000000 --- a/gratipay/models/package.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -from postgres import orm - - -class Package(orm.Model): - typname = 'packages'; - - @classmethod - def from_names(cls, package_manager, name): - return cls.db.one(""" - - SELECT p.*::packages - FROM packages p - WHERE p.package_manager=%s AND p.name=%s - - """, (package_manager, name)) diff --git a/gratipay/package_managers/__init__.py b/gratipay/package_managers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/npm.py b/gratipay/package_managers/sync.py old mode 100755 new mode 100644 similarity index 98% rename from bin/npm.py rename to gratipay/package_managers/sync.py index 3e0e967e8f..4c4f740fdd --- a/bin/npm.py +++ b/gratipay/package_managers/sync.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python +"""Sync our database with package managers. Just npm for now. +""" from __future__ import absolute_import, division, print_function, unicode_literals import argparse @@ -134,7 +135,3 @@ def parse_args(argv): def main(argv=sys.argv): args = parse_args(argv[1:]) globals()[args.command](args) - - -if __name__ == '__main__': - main() diff --git a/gratipay/wireup.py b/gratipay/wireup.py index 28f0b9db3b..8aa1dd220e 100644 --- a/gratipay/wireup.py +++ b/gratipay/wireup.py @@ -29,7 +29,6 @@ from gratipay.elsewhere.openstreetmap import OpenStreetMap from gratipay.elsewhere.twitter import Twitter from gratipay.elsewhere.venmo import Venmo -from gratipay.models.package import Package from gratipay.models.account_elsewhere import AccountElsewhere from gratipay.models.community import Community from gratipay.models.country import Country @@ -57,8 +56,7 @@ def db(env): maxconn = env.database_maxconn db = GratipayDB(dburl, maxconn=maxconn) - models = (AccountElsewhere, Community, Country, ExchangeRoute, Participant, Team, Package) - for model in models: + for model in (AccountElsewhere, Community, Country, ExchangeRoute, Participant, Team): db.register_model(model) gratipay.billing.payday.Payday.db = db diff --git a/setup.py b/setup.py index e4615bd19f..d0cba45270 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ , entry_points = { 'console_scripts' : [ 'payday=gratipay.cli:payday' , 'fake_data=gratipay.utils.fake_data:main' - , 'chomp=gratipay.chomp:main' + , 'sync-npm=gratipay.package_managers.sync:main' ] } ) diff --git a/tests/py/test_npm.py b/tests/py/test_npm_sync.py similarity index 88% rename from tests/py/test_npm.py rename to tests/py/test_npm_sync.py index 9c6680087c..aec9a10a57 100644 --- a/tests/py/test_npm.py +++ b/tests/py/test_npm_sync.py @@ -6,10 +6,10 @@ def load(raw): - serialized = Popen( ('env/bin/python', 'bin/npm.py', 'serialize', '/dev/stdin') + serialized = Popen( ('env/bin/sync-npm', 'serialize', '/dev/stdin') , stdin=PIPE, stdout=PIPE ).communicate(raw)[0] - Popen( ('env/bin/python', 'bin/npm.py', 'upsert', '/dev/stdin') + Popen( ('env/bin/sync-npm', 'upsert', '/dev/stdin') , stdin=PIPE, stdout=PIPE ).communicate(serialized)[0] @@ -19,7 +19,9 @@ class Tests(Harness): def test_packages_starts_empty(self): assert self.db.all('select * from packages') == [] - def test_npm_inserts_packages(self): + # sn - sync-npm + + def test_sn_inserts_packages(self): load(br''' { "_updated": 1234567890 , "testing-package": @@ -39,7 +41,7 @@ def test_npm_inserts_packages(self): assert package.name == 'testing-package' - def test_npm_handles_quoting(self): + def test_sn_handles_quoting(self): load(br''' { "_updated": 1234567890 , "testi\\\"ng-pa\\\"ckage": From 864c4d676e564c8408ca6b315517fb75a10ab810 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 10:12:29 -0400 Subject: [PATCH 26/73] How about this, Travis? https://docs.travis-ci.com/user/installing-dependencies/#Installing-Packages-on-Container-Based-Infrastructure --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3dfdbf82c6..a4ae711e30 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,15 @@ language: python addons: postgresql: 9.3 + apt: + packages: + - yajl branches: only: - master before_install: - git branch -vv | grep '^*' - npm install -g marky-markdown - - apt-get install yajl - pip install ijson cache: directories: From a388158a4851f6695d9b14691d5fabf3eefe8048 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 10:23:09 -0400 Subject: [PATCH 27/73] This? --- .travis.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index a4ae711e30..d98fc6d573 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,13 @@ language: python addons: postgresql: 9.3 - apt: - packages: - - yajl branches: only: - master before_install: - git branch -vv | grep '^*' - npm install -g marky-markdown + - apt-get install yajl cffi - pip install ijson cache: directories: @@ -27,4 +25,4 @@ script: make bgrun test doc notifications: email: false irc: false -sudo: false +#sudo: false -- TODO bring back if/when we can work around yajl From 1dffa6f9135129db8163b4e17569c144adbe78cc Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 10:27:27 -0400 Subject: [PATCH 28/73] Bash, smash, ... --- .travis.yml | 2 +- tests/py/test_npm_sync.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d98fc6d573..fd76c3db75 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ branches: before_install: - git branch -vv | grep '^*' - npm install -g marky-markdown - - apt-get install yajl cffi + - sudo apt-get install yajl cffi - pip install ijson cache: directories: diff --git a/tests/py/test_npm_sync.py b/tests/py/test_npm_sync.py index aec9a10a57..124c019571 100644 --- a/tests/py/test_npm_sync.py +++ b/tests/py/test_npm_sync.py @@ -1,3 +1,5 @@ +"""Tests for syncing npm. Requires a `pip install ijson`, which requires yajl. +""" from __future__ import absolute_import, division, print_function, unicode_literals from subprocess import Popen, PIPE From 639405c7415df7f9c2df6934d5d93e68b52829b3 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 10:32:24 -0400 Subject: [PATCH 29/73] Smash, bash, ... --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fd76c3db75..b864fa7cda 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ branches: before_install: - git branch -vv | grep '^*' - npm install -g marky-markdown - - sudo apt-get install yajl cffi + - sudo apt-get install libyajl-dev - pip install ijson cache: directories: From 3163f8cde19b8f9c6b564d18bd36aedd8147faba Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 10:45:28 -0400 Subject: [PATCH 30/73] Floo --- .travis.yml | 2 +- Procfile | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b864fa7cda..49d95e9bd2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ branches: before_install: - git branch -vv | grep '^*' - npm install -g marky-markdown - - sudo apt-get install libyajl-dev + - sudo apt-get install libyajl2 - pip install ijson cache: directories: diff --git a/Procfile b/Procfile index 3ae419183b..7ca78724a6 100644 --- a/Procfile +++ b/Procfile @@ -1 +1,2 @@ web: gunicorn gratipay.main:website --conf hide_gunicorn_version.py --bind :$PORT $GUNICORN_OPTS +npm-syncer: sync-npm From a0b387e10f0008f558e581b9bd1f5274c4aa0ff2 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 10:53:45 -0400 Subject: [PATCH 31/73] AAAAHHHHHHHHHHH --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 49d95e9bd2..5f2566842d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,15 @@ language: python addons: postgresql: 9.3 + apt: + packages: + - libyajl2 branches: only: - master before_install: - git branch -vv | grep '^*' - npm install -g marky-markdown - - sudo apt-get install libyajl2 - pip install ijson cache: directories: From 93efb641407cf7bdf516f2a155fcb11aabb39599 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 20:52:27 -0400 Subject: [PATCH 32/73] SJHFJDKKJK --- .travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.travis.yml b/.travis.yml index 5f2566842d..44f1ee466c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,12 @@ branches: before_install: - git branch -vv | grep '^*' - npm install -g marky-markdown + + # Groan. + - sudo add-apt-repository ppa:nilarimogard/webupd8 + - sudo apt-get update + - sudo apt-get libyajl-dev + - pip install ijson cache: directories: From 91d5e75970894fe915b67c735452cb258e56786b Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 20:53:06 -0400 Subject: [PATCH 33/73] Or this? --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 44f1ee466c..c1981a73b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ before_install: # Groan. - sudo add-apt-repository ppa:nilarimogard/webupd8 - sudo apt-get update - - sudo apt-get libyajl-dev + - sudo apt-get libyajl2 - pip install ijson cache: From fe0d6b834b139bfb622811841d70c57eb53ab033 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 21:01:04 -0400 Subject: [PATCH 34/73] Is this it? --- .travis.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index c1981a73b5..91345c9a62 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,10 +11,13 @@ before_install: - git branch -vv | grep '^*' - npm install -g marky-markdown - # Groan. - - sudo add-apt-repository ppa:nilarimogard/webupd8 - - sudo apt-get update - - sudo apt-get libyajl2 + # Sometimes ya just halfta ... + - git clone git@github.com:lloyd/yajl.git + - cd yajl + - git checkout 2.1.0 + - ./configure --prefix=/usr/local + - make + - sudo make install - pip install ijson cache: From e33ac7e373c1cd0b827ecce5ef6e9d786e8ff30a Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 21:01:59 -0400 Subject: [PATCH 35/73] Oh yeah ... --- .travis.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 91345c9a62..4385c9709d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,6 @@ language: python addons: postgresql: 9.3 - apt: - packages: - - libyajl2 branches: only: - master From 428e5bafd0a309decd6b8645e137c614d1cfe401 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 21:04:31 -0400 Subject: [PATCH 36/73] Try this URL ... --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4385c9709d..c4b08c9cde 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ before_install: - npm install -g marky-markdown # Sometimes ya just halfta ... - - git clone git@github.com:lloyd/yajl.git + - git clone https://github.com/lloyd/yajl.git - cd yajl - git checkout 2.1.0 - ./configure --prefix=/usr/local From d9400afdf1cf35deb504c08712f54f69630405a5 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 21:09:31 -0400 Subject: [PATCH 37/73] Close! Close? --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index c4b08c9cde..4144b2ef1e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ before_install: - ./configure --prefix=/usr/local - make - sudo make install + - cd .. - pip install ijson cache: From 3e1d4a175afdf83071dbde7a26dc5ad523acc00e Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 22:55:05 -0400 Subject: [PATCH 38/73] Eeeeeeeeeeee!!!!! --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4144b2ef1e..15993df627 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,7 @@ before_install: - sudo make install - cd .. - - pip install ijson + - env/bin/pip install ijson==2.1.0 cache: directories: - env/bin From fc39b24399c71c253399b43e395de38c1d47e270 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 23:04:16 -0400 Subject: [PATCH 39/73] One of these commits ... --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 15993df627..e52857649f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,16 +16,16 @@ before_install: - make - sudo make install - cd .. - - - env/bin/pip install ijson==2.1.0 cache: directories: - env/bin - env/lib/python2.7/site-packages + - yajl install: - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rf env; fi - touch requirements.txt package.json - make env + - env/bin/pip install ijson==2.1.0 before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' From 1a9f2453a51d57a44e4110d265b1d20115e8c3c2 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 23:14:39 -0400 Subject: [PATCH 40/73] More flailing. :( --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e52857649f..a5ec043ae5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,7 +25,7 @@ install: - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rf env; fi - touch requirements.txt package.json - make env - - env/bin/pip install ijson==2.1.0 + - env/bin/pip install --global-option=build_ext --global-option="-I/usr/local/include/" --global-option="-L/usr/local/lib" ijson==2.1.0 before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' From 2ab8f7d484efa0fbbea703223cee0f7a5a9120b9 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 23:24:56 -0400 Subject: [PATCH 41/73] Floo flah --- .travis.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index a5ec043ae5..fcaa1ca86e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,13 +9,7 @@ before_install: - npm install -g marky-markdown # Sometimes ya just halfta ... - - git clone https://github.com/lloyd/yajl.git - - cd yajl - - git checkout 2.1.0 - - ./configure --prefix=/usr/local - - make - - sudo make install - - cd .. + - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix=/usr/local && make && sudo make install && cd .. cache: directories: - env/bin From 954564bcf84cda94c0082488fe821e30b172eb61 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 23:31:08 -0400 Subject: [PATCH 42/73] fjdkslf --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index fcaa1ca86e..2c7b7ab36e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,8 @@ before_install: - npm install -g marky-markdown # Sometimes ya just halfta ... - - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix=/usr/local && make && sudo make install && cd .. + - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix=/usr/local && make && cd .. + - sudo make --directory=yajl install cache: directories: - env/bin @@ -19,7 +20,7 @@ install: - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rf env; fi - touch requirements.txt package.json - make env - - env/bin/pip install --global-option=build_ext --global-option="-I/usr/local/include/" --global-option="-L/usr/local/lib" ijson==2.1.0 + - env/bin/pip install --upgrade --global-option=build_ext --global-option="-I/usr/local/include" --global-option="-L/usr/local/lib" ijson==2.1.0 before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' From 44e41c4dcf35ec4ee7e9b5f834a32650be14048c Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 25 Oct 2016 23:34:39 -0400 Subject: [PATCH 43/73] fdosmjklklj --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 2c7b7ab36e..350db0dff3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,7 @@ install: - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rf env; fi - touch requirements.txt package.json - make env + - env/bin/pip uninstall --yes ijson - env/bin/pip install --upgrade --global-option=build_ext --global-option="-I/usr/local/include" --global-option="-L/usr/local/lib" ijson==2.1.0 before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env From 9df2c9f2c420d1032e730261157cc1d37b6b97c5 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 06:09:34 -0400 Subject: [PATCH 44/73] THIS IS IT I JUST KNOW ITall --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 350db0dff3..6651f58802 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,8 +20,7 @@ install: - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rf env; fi - touch requirements.txt package.json - make env - - env/bin/pip uninstall --yes ijson - - env/bin/pip install --upgrade --global-option=build_ext --global-option="-I/usr/local/include" --global-option="-L/usr/local/lib" ijson==2.1.0 + - env/bin/pip install --upgrade --global-option=build_ext --global-option="-I/usr/local/include" --global-option="-L/usr/local/lib" ijson==2.3.0 before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' From da0639b4676ef9558b86d591b05ab9c31e1407d8 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 06:19:14 -0400 Subject: [PATCH 45/73] Okay that wasn't quite it --- .travis.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6651f58802..095b6dec03 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,8 +9,10 @@ before_install: - npm install -g marky-markdown # Sometimes ya just halfta ... - - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix=/usr/local && make && cd .. - - sudo make --directory=yajl install + - rm -rf yajl + - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix=. && make && cd .. + - make --directory=yajl install + - echo LD_LIBRARY_PATH: $LD_LIBRARY_PATH cache: directories: - env/bin @@ -20,13 +22,12 @@ install: - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rf env; fi - touch requirements.txt package.json - make env - - env/bin/pip install --upgrade --global-option=build_ext --global-option="-I/usr/local/include" --global-option="-L/usr/local/lib" ijson==2.3.0 + - env/bin/pip install --upgrade ijson==2.3.0 before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi -script: make bgrun test doc +script: LD_LIBRARY_PATH=:$LD_LIBRARY_PATH make bgrun test doc notifications: email: false irc: false -#sudo: false -- TODO bring back if/when we can work around yajl From 302562ae736ecf00642ee0280ec112cca36bbbad Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 06:27:48 -0400 Subject: [PATCH 46/73] Fooooooooo --- .travis.yml | 4 ++-- Makefile | 2 +- Procfile | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 095b6dec03..f5108cbaad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,7 @@ before_install: # Sometimes ya just halfta ... - rm -rf yajl - - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix=. && make && cd .. + - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix=/home/travis/yajl && cd .. - make --directory=yajl install - echo LD_LIBRARY_PATH: $LD_LIBRARY_PATH cache: @@ -27,7 +27,7 @@ before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi -script: LD_LIBRARY_PATH=:$LD_LIBRARY_PATH make bgrun test doc +script: LD_LIBRARY_PATH=yajl:$LD_LIBRARY_PATH make bgrun test doc notifications: email: false irc: false diff --git a/Makefile b/Makefile index aa1d280d49..c2a30d2687 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ pyflakes: env $(env_bin)/pyflakes *.py bin gratipay tests test: test-schema - $(py_test) --cov gratipay ./tests/ + $(py_test) --cov gratipay ./tests/test_npm_sync.py @$(MAKE) --no-print-directory pyflakes pytest: env diff --git a/Procfile b/Procfile index 7ca78724a6..3ae419183b 100644 --- a/Procfile +++ b/Procfile @@ -1,2 +1 @@ web: gunicorn gratipay.main:website --conf hide_gunicorn_version.py --bind :$PORT $GUNICORN_OPTS -npm-syncer: sync-npm From b19c5b04099e44c219bfe3ebf6570ab6a3f5f8a3 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 06:37:22 -0400 Subject: [PATCH 47/73] Fjdsklfjlds --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f5108cbaad..c533116e54 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,8 +10,8 @@ before_install: # Sometimes ya just halfta ... - rm -rf yajl - - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix=/home/travis/yajl && cd .. - - make --directory=yajl install + - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure && cd .. + - sudo make --directory=yajl install - echo LD_LIBRARY_PATH: $LD_LIBRARY_PATH cache: directories: From ba4b925a18bd799d7afb83adc27764e781ccbc60 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 06:46:42 -0400 Subject: [PATCH 48/73] ECHO! Echo! echo! ... --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index c533116e54..3e3dccf157 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,13 +6,15 @@ branches: - master before_install: - git branch -vv | grep '^*' - - npm install -g marky-markdown # Sometimes ya just halfta ... + - echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + - exit 1 - rm -rf yajl - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure && cd .. - sudo make --directory=yajl install - - echo LD_LIBRARY_PATH: $LD_LIBRARY_PATH + + - npm install -g marky-markdown cache: directories: - env/bin From 10f5746ddb920e4c1e8a2585313cae10802b2944 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 06:53:45 -0400 Subject: [PATCH 49/73] :cry: --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3e3dccf157..3663afc8d0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,8 @@ before_install: - git branch -vv | grep '^*' # Sometimes ya just halfta ... - - echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + - echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + - env - exit 1 - rm -rf yajl - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure && cd .. From 343bdc7961580a05a81f7c8e5f3abef025a1dc71 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 06:57:41 -0400 Subject: [PATCH 50/73] :horse: --- .travis.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3663afc8d0..edc905b44b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,9 +8,7 @@ before_install: - git branch -vv | grep '^*' # Sometimes ya just halfta ... - - echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" - - env - - exit 1 + - pwd - rm -rf yajl - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure && cd .. - sudo make --directory=yajl install @@ -30,7 +28,7 @@ before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi -script: LD_LIBRARY_PATH=yajl:$LD_LIBRARY_PATH make bgrun test doc +script: LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH make bgrun test doc notifications: email: false irc: false From 67429431eeb2d0c308b35665a37e3742aad6f799 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 07:01:52 -0400 Subject: [PATCH 51/73] Fdjskfjjjjjjjjj --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c2a30d2687..846cd0c9d8 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ pyflakes: env $(env_bin)/pyflakes *.py bin gratipay tests test: test-schema - $(py_test) --cov gratipay ./tests/test_npm_sync.py + $(py_test) --cov gratipay ./tests/py/test_npm_sync.py @$(MAKE) --no-print-directory pyflakes pytest: env From 1ca281108f737ba17aa57d0d7fedca91ae247986 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 07:10:17 -0400 Subject: [PATCH 52/73] Is this necessary? --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index edc905b44b..713bfd7892 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi -script: LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH make bgrun test doc +script: make bgrun test doc notifications: email: false irc: false From fcdbe28f8e8379cc3de0b16a4459d7b82dca1247 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 07:18:28 -0400 Subject: [PATCH 53/73] Cleanup --- .travis.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 713bfd7892..1a8525aed5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,12 +6,10 @@ branches: - master before_install: - git branch -vv | grep '^*' + - pwd # Sometimes ya just halfta ... - - pwd - - rm -rf yajl - - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure && cd .. - - sudo make --directory=yajl install + - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix `pwd` && make install && cd .. - npm install -g marky-markdown cache: @@ -28,7 +26,7 @@ before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi -script: make bgrun test doc +script: LD_LIBRARY_PATH=`pwd`/yajl:$LD_LIBRARY_PATH make bgrun test doc notifications: email: false irc: false From 57aebff59ee3e2e367307816cd1ed44d75074e4b Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 07:23:20 -0400 Subject: [PATCH 54/73] Work around yajl limitation --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1a8525aed5..8ba8b701e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ before_install: - pwd # Sometimes ya just halfta ... - - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure --prefix `pwd` && make install && cd .. + - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure -p `pwd` && make install && cd .. - npm install -g marky-markdown cache: From 05e0c4585272ef2a379164518e0ad7afe2e50564 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 07:32:16 -0400 Subject: [PATCH 55/73] Fix LD_LIBRARY_PATH --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8ba8b701e2..d9b99d8328 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,8 @@ before_install: - pwd # Sometimes ya just halfta ... - - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure -p `pwd` && make install && cd .. + - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 + - test -f Makefile || ./configure -p `pwd` && make install && cd .. - npm install -g marky-markdown cache: @@ -26,7 +27,7 @@ before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi -script: LD_LIBRARY_PATH=`pwd`/yajl:$LD_LIBRARY_PATH make bgrun test doc +script: LD_LIBRARY_PATH=`pwd`/yajl/lib:$LD_LIBRARY_PATH make bgrun test doc notifications: email: false irc: false From 52e5dde1cf93125f4cb7ead0e96a1dca09ca3de4 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 07:38:11 -0400 Subject: [PATCH 56/73] Is this enough? --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d9b99d8328..95ec248088 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,7 +27,7 @@ before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi -script: LD_LIBRARY_PATH=`pwd`/yajl/lib:$LD_LIBRARY_PATH make bgrun test doc +script: LD_LIBRARY_PATH=/home/travis/build/gratipay/gratipay.com/yajl/lib make bgrun test doc notifications: email: false irc: false From 3595c18f0ef1aecd54f35c942ead2785592b581f Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 07:46:25 -0400 Subject: [PATCH 57/73] Harumph. I guess we drop back to /usr/local? --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 95ec248088..a20fd34c7a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,7 @@ before_install: # Sometimes ya just halfta ... - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 - - test -f Makefile || ./configure -p `pwd` && make install && cd .. + - test -f Makefile || ./configure && sudo make install && cd .. - npm install -g marky-markdown cache: @@ -27,7 +27,7 @@ before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi -script: LD_LIBRARY_PATH=/home/travis/build/gratipay/gratipay.com/yajl/lib make bgrun test doc +script: LD_LIBRARY_PATH=/usr/local/lib make bgrun test doc notifications: email: false irc: false From f5c4f3765232852dc00ed51947c2b87710ae6b6c Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 07:56:09 -0400 Subject: [PATCH 58/73] Force a rebuild of yajl --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a20fd34c7a..0d039fbab5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ before_install: # Sometimes ya just halfta ... - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 + - rm Makefile - test -f Makefile || ./configure && sudo make install && cd .. - npm install -g marky-markdown From 366dc862c741a428b412bc327ad8c6907b4176e4 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 08:00:16 -0400 Subject: [PATCH 59/73] Cool. So does this work now? --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0d039fbab5..a20fd34c7a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,6 @@ before_install: # Sometimes ya just halfta ... - test -d yajl || git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 - - rm Makefile - test -f Makefile || ./configure && sudo make install && cd .. - npm install -g marky-markdown From dde52dbfed1f035e4e6f32c70e2dd763aa2b8956 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 08:06:04 -0400 Subject: [PATCH 60/73] Turn the rest of the tests back on --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 846cd0c9d8..aa1d280d49 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ pyflakes: env $(env_bin)/pyflakes *.py bin gratipay tests test: test-schema - $(py_test) --cov gratipay ./tests/py/test_npm_sync.py + $(py_test) --cov gratipay ./tests/ @$(MAKE) --no-print-directory pyflakes pytest: env From ea143be5dcc98d9a6c5ed2a5d79cf32044ca3029 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 08:07:45 -0400 Subject: [PATCH 61/73] Remove a logging line --- gratipay/package_managers/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gratipay/package_managers/sync.py b/gratipay/package_managers/sync.py index 4c4f740fdd..79156b249f 100644 --- a/gratipay/package_managers/sync.py +++ b/gratipay/package_managers/sync.py @@ -58,7 +58,7 @@ def log_stats(): .format(nprocessed, time.time() - start)) for prefix, event, value in parser: - log(prefix, event, value) + if not prefix and event == b'map_key': # Flush the current package. We count on the first package being garbage. From 283a26f11a80c2a8bde912b4481114aaa61eb967 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 08:17:33 -0400 Subject: [PATCH 62/73] Start working up a script for Heroku --- bin/npm.sh | 8 -------- bin/sync-npm.sh | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) delete mode 100644 bin/npm.sh create mode 100755 bin/sync-npm.sh diff --git a/bin/npm.sh b/bin/npm.sh deleted file mode 100644 index 0c76ca1379..0000000000 --- a/bin/npm.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -set -e -cd "`dirname $0`/.." - -wget https://registry.npmjs.com/-/all -./env/bin/python ./bin/npm.py serialize all > serialized -./env/bin/python ./bin/npm.py upsert serialized diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh new file mode 100755 index 0000000000..b37dfc7e1f --- /dev/null +++ b/bin/sync-npm.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -e +cd "`dirname $0`/.." + +# Install dependencies. +git clone https://github.com/lloyd/yajl.git +cd yajl +git checkout 2.1.0 +./configure +sudo make install +cd .. + +./env/bin/pip install -i ijson==2.3.0 + + +wget https://registry.npmjs.com/-/all +./env/bin/sync-npm serialize all > serialized +./env/bin/sync-npm upsert serialized From 2ec25ae7f2c303c696431b549dd64ec57f906b8e Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 09:06:43 -0400 Subject: [PATCH 63/73] Okay! Let the Heroku bashing commence! --- bin/sync-npm.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index b37dfc7e1f..859ee8d32f 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -4,6 +4,8 @@ set -e cd "`dirname $0`/.." # Install dependencies. +pip install -e . +pip install -i ijson==2.3.0 git clone https://github.com/lloyd/yajl.git cd yajl git checkout 2.1.0 @@ -11,9 +13,7 @@ git checkout 2.1.0 sudo make install cd .. -./env/bin/pip install -i ijson==2.3.0 +URL=https://registry.npmjs.com/-/all +URL=https://gist.githubusercontent.com/whit537/fec53fb1f0618b3d5757f0ab687b7476/raw/25de82f6197df49b47d180db0d62b4e8c6f7f9f8/one - -wget https://registry.npmjs.com/-/all -./env/bin/sync-npm serialize all > serialized -./env/bin/sync-npm upsert serialized +curl $URL | sync-npm serialize /dev/stdin | sync-npm upsert /dev/stdin From de7821d97f4c2070fa08800b8d9af13677bc8a31 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 09:21:31 -0400 Subject: [PATCH 64/73] Groan. YAJL requires cmake. O.o --- bin/sync-npm.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 859ee8d32f..2fd2a027c4 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -5,7 +5,9 @@ cd "`dirname $0`/.." # Install dependencies. pip install -e . -pip install -i ijson==2.3.0 +pip install ijson==2.3.0 + +sudo ln -s `which make` /usr/local/bin/cmake git clone https://github.com/lloyd/yajl.git cd yajl git checkout 2.1.0 From d46ec973c157610a7effa6fa5f715e03d3d6fe63 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 09:26:03 -0400 Subject: [PATCH 65/73] We can only write to /app --- bin/sync-npm.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 2fd2a027c4..612c2280e3 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -7,7 +7,8 @@ cd "`dirname $0`/.." pip install -e . pip install ijson==2.3.0 -sudo ln -s `which make` /usr/local/bin/cmake +ln -s `which make` bin/cmake +PATH=/app/bin:$PATH git clone https://github.com/lloyd/yajl.git cd yajl git checkout 2.1.0 From 468e395d3b7f81c54a0ddd7d736d0275f7629b1d Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 09:39:14 -0400 Subject: [PATCH 66/73] Gosh. Big tarball to download 24 times a day! O.o --- bin/sync-npm.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 612c2280e3..2183cbebdf 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -7,8 +7,12 @@ cd "`dirname $0`/.." pip install -e . pip install ijson==2.3.0 -ln -s `which make` bin/cmake -PATH=/app/bin:$PATH +curl https://cmake.org/files/v3.6/cmake-3.6.2-Linux-x86_64.tar.gz +test sha256sum cmake-3.6.2-Linux-x86_64.tar.gz = \ + 5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 +tar zxf cmake-3.6.2-Linux-x86_64.tar.gz +PATH=/app/cmake-3.6.2-Linux-x86_64/bin:$PATH + git clone https://github.com/lloyd/yajl.git cd yajl git checkout 2.1.0 From 4077a386fec23f795040a025b27adfeb6e32d210 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 09:43:20 -0400 Subject: [PATCH 67/73] Curl writes to stdout --- bin/sync-npm.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 2183cbebdf..05c2d231bb 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -7,10 +7,9 @@ cd "`dirname $0`/.." pip install -e . pip install ijson==2.3.0 -curl https://cmake.org/files/v3.6/cmake-3.6.2-Linux-x86_64.tar.gz -test sha256sum cmake-3.6.2-Linux-x86_64.tar.gz = \ - 5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 -tar zxf cmake-3.6.2-Linux-x86_64.tar.gz +curl https://cmake.org/files/v3.6/cmake-3.6.2-Linux-x86_64.tar.gz > cmake.tgz +test sha256sum cmake.tgz = 5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 +tar zxf cmake.tgz PATH=/app/cmake-3.6.2-Linux-x86_64/bin:$PATH git clone https://github.com/lloyd/yajl.git From 7f519c1eab146fba4762d3ea56f3e32ddf34905a Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 09:47:20 -0400 Subject: [PATCH 68/73] Fiddle with checksum check --- bin/sync-npm.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 05c2d231bb..94139b5bad 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -8,7 +8,8 @@ pip install -e . pip install ijson==2.3.0 curl https://cmake.org/files/v3.6/cmake-3.6.2-Linux-x86_64.tar.gz > cmake.tgz -test sha256sum cmake.tgz = 5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 +test echo '5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 cmake.tgz' \ + | sha256sum cmake.tgz -c /dev/stdin tar zxf cmake.tgz PATH=/app/cmake-3.6.2-Linux-x86_64/bin:$PATH From 8cc56023155c059212dbcda44e3f1779583239f3 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 09:55:33 -0400 Subject: [PATCH 69/73] Fix sha256sum call --- bin/sync-npm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 94139b5bad..686ac57e7c 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -9,7 +9,7 @@ pip install ijson==2.3.0 curl https://cmake.org/files/v3.6/cmake-3.6.2-Linux-x86_64.tar.gz > cmake.tgz test echo '5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 cmake.tgz' \ - | sha256sum cmake.tgz -c /dev/stdin + | sha256sum -c /dev/stdin --status tar zxf cmake.tgz PATH=/app/cmake-3.6.2-Linux-x86_64/bin:$PATH From f4f1655b3f225cb1d0eb96ae4d96ae6206b12189 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 09:58:34 -0400 Subject: [PATCH 70/73] We don't have to test, it just fails --- bin/sync-npm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 686ac57e7c..9ad8f2b80f 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -8,7 +8,7 @@ pip install -e . pip install ijson==2.3.0 curl https://cmake.org/files/v3.6/cmake-3.6.2-Linux-x86_64.tar.gz > cmake.tgz -test echo '5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 cmake.tgz' \ +echo '5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 cmake.tgz' \ | sha256sum -c /dev/stdin --status tar zxf cmake.tgz PATH=/app/cmake-3.6.2-Linux-x86_64/bin:$PATH From 03142c00a6c116206c38509ea1b1990a6a2c0b27 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 10:09:49 -0400 Subject: [PATCH 71/73] Put libyajl where we can find it This location is already in LD_LIBRARY_PATH. --- bin/sync-npm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 9ad8f2b80f..44202527c8 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -16,8 +16,8 @@ PATH=/app/cmake-3.6.2-Linux-x86_64/bin:$PATH git clone https://github.com/lloyd/yajl.git cd yajl git checkout 2.1.0 -./configure -sudo make install +./configure -p /app/.heroku/python +make install cd .. URL=https://registry.npmjs.com/-/all From 56ee188c21a9a460edb30c2240ef26698006dcf6 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 10:15:36 -0400 Subject: [PATCH 72/73] Add some comments --- bin/sync-npm.sh | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index 44202527c8..dc271e5f84 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -1,18 +1,21 @@ #!/bin/sh +# This is a script to run under the Heroku Scheduler add-on to periodically +# sync our database with the npm registry. set -e cd "`dirname $0`/.." # Install dependencies. -pip install -e . -pip install ijson==2.3.0 +# ===================== +# cmake - required by ... curl https://cmake.org/files/v3.6/cmake-3.6.2-Linux-x86_64.tar.gz > cmake.tgz echo '5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 cmake.tgz' \ | sha256sum -c /dev/stdin --status tar zxf cmake.tgz PATH=/app/cmake-3.6.2-Linux-x86_64/bin:$PATH +# yajl git clone https://github.com/lloyd/yajl.git cd yajl git checkout 2.1.0 @@ -20,7 +23,14 @@ git checkout 2.1.0 make install cd .. +# python +pip install ijson==2.3.0 +pip install -e . + + +# Sync with npm. +# ============== + URL=https://registry.npmjs.com/-/all URL=https://gist.githubusercontent.com/whit537/fec53fb1f0618b3d5757f0ab687b7476/raw/25de82f6197df49b47d180db0d62b4e8c6f7f9f8/one - curl $URL | sync-npm serialize /dev/stdin | sync-npm upsert /dev/stdin From cc8ea2c24422cf05db65a411ce3fe312c8bc8893 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Wed, 26 Oct 2016 10:22:01 -0400 Subject: [PATCH 73/73] Switch to the real URL! Ready to deploy! :O --- bin/sync-npm.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/sync-npm.sh b/bin/sync-npm.sh index dc271e5f84..ffbef5ec2d 100755 --- a/bin/sync-npm.sh +++ b/bin/sync-npm.sh @@ -32,5 +32,4 @@ pip install -e . # ============== URL=https://registry.npmjs.com/-/all -URL=https://gist.githubusercontent.com/whit537/fec53fb1f0618b3d5757f0ab687b7476/raw/25de82f6197df49b47d180db0d62b4e8c6f7f9f8/one curl $URL | sync-npm serialize /dev/stdin | sync-npm upsert /dev/stdin