Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed syntax and ordering issues #7

Open
wants to merge 1 commit into
base: kubernetes
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 21 additions & 20 deletions adscraper.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,6 @@ CREATE TABLE job (
job_config JSON
);

CREATE TABLE crawl (
id SERIAL PRIMARY KEY,
job_id INTEGER REFERENCES job(id),
name TEXT,
start_time TIMESTAMPTZ,
completed BOOLEAN,
completed_time TIMESTAMPTZ,
crawl_list TEXT,
crawl_list_current_index INTEGER,
crawl_list_length INTEGER,
profile_dir TEXT,
crawler_hostname TEXT,
crawler_ip TEXT
-- geolocation TEXT,
-- vpn_hostname TEXT,
);

-- A row in this table is created for every page visited by the crawler.
CREATE TABLE page (
id SERIAL PRIMARY KEY,
Expand Down Expand Up @@ -71,15 +54,33 @@ CREATE TABLE page (
referrer_page_url TEXT,
-- If this is a subpage or ad landing page, and the parent page was scraped,
-- the id of the parent page.
referrer_page INTEGER references page(id)
referrer_page INTEGER references page(id),
-- If this is an ad landing page, the id of the ad that opened this page.
-- Field is added later, after the ad table is defined.
-- referrer_ad INTEGER references ad(id)

-- Error message, if a fatal error is encountered while crawling this page
error TEXT;
error TEXT
);

CREATE TABLE crawl (
id SERIAL PRIMARY KEY,
job_id INTEGER REFERENCES job(id),
name TEXT,
start_time TIMESTAMPTZ,
completed BOOLEAN,
completed_time TIMESTAMPTZ,
crawl_list TEXT,
crawl_list_current_index INTEGER,
crawl_list_length INTEGER,
profile_dir TEXT,
crawler_hostname TEXT,
crawler_ip TEXT
-- geolocation TEXT,
-- vpn_hostname TEXT,
);


CREATE TABLE chumbox (
id SERIAL PRIMARY KEY,
platform TEXT,
Expand Down Expand Up @@ -167,4 +168,4 @@ CREATE TABLE request (
target_url TEXT,
resource_type TEXT,
sec_fetch_site TEXT
);
);