-
Notifications
You must be signed in to change notification settings - Fork 2
/
sql-dataset-common.sql
47 lines (47 loc) · 1.5 KB
/
sql-dataset-common.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
-- IMPORTANT: table name will be replaced with the property tasks[].tableName
SELECT ed.accession,
ed.sample_accession,
ed.location,
ed.country,
ed.identified_by,
ed.collected_by,
ed.collection_date,
ed.specimen_voucher,
ed.sequence_md5,
ed.scientific_name,
ed.tax_id,
ed.altitude,
ed.sex,
ed.description,
ed.host,
et.kingdom,
et.phylum,
et.class,
et."order",
et.family,
et.genus
FROM (
SELECT row_number()
over (PARTITION BY tax_id, scientific_name, collection_date, location, country, collected_by, identified_by, sample_accession ORDER BY tax_id) as row_num_1,
row_number()
over (PARTITION BY scientific_name, specimen_voucher) as row_num_2,
accession,
sample_accession,
location,
country,
identified_by,
collected_by,
collection_date,
specimen_voucher,
sequence_md5,
scientific_name,
tax_id,
altitude,
sex,
description,
host
FROM embl_data
WHERE scientific_name NOT ILIKE 'Homo%sapiens%') as ed
AND host NOT ILIKE '%Homo%sap%'
LEFT JOIN ena_taxonomy et ON ed.tax_id = et.taxon_id
WHERE ed.row_num_1 < 50 AND (ed.row_num_2 = 1 OR ed.specimen_voucher = '')