-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path03_copytohdfs.sql
76 lines (64 loc) · 2.18 KB
/
03_copytohdfs.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
CREATE DATABASE IF NOT EXISTS hwxdemo;
USE hwxdemo;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.optimize.sort.dynamic.partition=true;
drop table if exists flights_hdfs purge;
drop table if exists airports_hdfs purge;
drop table if exists airlines_hdfs purge;
drop table if exists planes_hdfs purge;
create table airports_hdfs
STORED AS ORC
AS SELECT * from airports
;
create table airlines_hdfs
STORED AS ORC
AS SELECT * from airlines
;
create table planes_hdfs
STORED AS ORC
AS SELECT * from planes
;
create table flights_hdfs (
DateOfFlight date,
DepTime int,
CRSDepTime int,
ArrTime int,
CRSArrTime int,
UniqueCarrier string,
FlightNum int,
TailNum string,
ActualElapsedTime int,
CRSElapsedTime int,
AirTime int,
ArrDelay int,
DepDelay int,
Origin string,
Dest string,
Distance int,
TaxiIn int,
TaxiOut int,
Cancelled int,
CancellationCode varchar(1),
Diverted varchar(1),
CarrierDelay int,
WeatherDelay int,
NASDelay int,
SecurityDelay int,
LateAircraftDelay int
)
PARTITIONED BY (Year int)
STORED AS ORC
TBLPROPERTIES("orc.bloom.filter.columns"="*")
;
INSERT OVERWRITE TABLE flights_hdfs PARTITION(year)
SELECT * FROM flights;
alter table airports_hdfs add constraint airports_hdfs_c1 primary key (iata) disable novalidate;
alter table airlines_hdfs add constraint airlines_hdfs_c1 primary key (code) disable novalidate;
alter table planes_hdfs add constraint planes_hdfs_c1 primary key (tailnum) disable novalidate;
alter table flights_hdfs add constraint flights_hdfs_new_c1 foreign key (Origin) references airports_hdfs(iata) disable novalidate rely;
alter table flights_hdfs add constraint flights_hdfs_new_c2 foreign key (Dest) references airports_hdfs(iata) disable novalidate rely;
alter table flights_hdfs add constraint flights_hdfs_new_c3 foreign key (UniqueCarrier) references airlines_hdfs(code) disable novalidate rely;
alter table flights_hdfs add constraint flights_hdfs_new_c4 foreign key (TailNum) references planes_hdfs(TailNum) disable novalidate rely;
ANALYZE TABLE airports_hdfs COMPUTE STATISTICS FOR COLUMNS;
ANALYZE TABLE airlines_hdfs COMPUTE STATISTICS FOR COLUMNS;
ANALYZE TABLE planes_hdfs COMPUTE STATISTICS FOR COLUMNS;