... | ... | @@ -8,7 +8,7 @@ |
|
|
|
|
|
```sql
|
|
|
create table dnscol.csv_import (
|
|
|
time TIMESTAMP, -- has nanosecond precision in Impala
|
|
|
time DOUBLE, -- convert to timestamp
|
|
|
delay_us INT,
|
|
|
req_dns_len INT,
|
|
|
resp_dns_len INT,
|
... | ... | @@ -27,13 +27,13 @@ qtype INT, |
|
|
qclass INT,
|
|
|
opcode INT,
|
|
|
rcode INT,
|
|
|
resp_aa BOOLEAN,
|
|
|
resp_tc BOOLEAN,
|
|
|
req_rd BOOLEAN,
|
|
|
resp_ra BOOLEAN,
|
|
|
req_z BOOLEAN,
|
|
|
resp_ad BOOLEAN,
|
|
|
req_cd BOOLEAN,
|
|
|
resp_aa INT, -- convert to boolean
|
|
|
resp_tc INT, -- convert to boolean
|
|
|
req_rd INT, -- convert to boolean
|
|
|
resp_ra INT, -- convert to boolean
|
|
|
req_z INT, -- convert to boolean
|
|
|
resp_ad INT, -- convert to boolean
|
|
|
req_cd INT, -- convert to boolean
|
|
|
qname STRING,
|
|
|
resp_ancount INT,
|
|
|
resp_arcount INT,
|
... | ... | @@ -58,11 +58,13 @@ ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' ESCAPED BY '\\'; |
|
|
Upload the data to HDFS:
|
|
|
```
|
|
|
hdfs dfs -mkdir -p /user/tgavenciak/andraste-import1/
|
|
|
hdfs dfs -put andraste/andraste-*.csv.gz /user/tgavenciak/andraste-import1/
|
|
|
hdfs dfs -put andraste-20160401-*.csv.gz /user/tgavenciak/andraste-import1/
|
|
|
hdfs dfs -chmod a+rwx /user/tgavenciak/andraste-import1/
|
|
|
```
|
|
|
|
|
|
Import into impala (moves the files into Impala)
|
|
|
```sql
|
|
|
alter table dnscol.csv_import add PARTITION (server="andraste", batchname="2016-08-24-tgimport1");
|
|
|
LOAD DATA INPATH '/user/tgavenciak/andraste-import1/' INTO TABLE dnscol.csv_import PARTITION (server="andraste", batchname="2016-08-24-tgimport1");
|
|
|
```
|
|
|
|
... | ... | @@ -82,39 +84,39 @@ INSERT INTO dns.queries (id, unixtime, time, qname, domainname, len, frag, ttl, |
|
|
dns_res_len, server_location)
|
|
|
PARTITION (year, month, day, server)
|
|
|
SELECT
|
|
|
-- Entrada: id INT
|
|
|
-- Entrada: unixtime BIGINT
|
|
|
-- Entrada: time BIGINT
|
|
|
-- Entrada: qname STRING
|
|
|
-- Entrada: domainname STRING
|
|
|
-- Entrada: len INT
|
|
|
-- Entrada: frag INT
|
|
|
-- Entrada: ttl INT
|
|
|
-- Entrada: ipv INT
|
|
|
-- Entrada: prot INT
|
|
|
-- Entrada: src STRING
|
|
|
-- Entrada: srcp INT
|
|
|
-- Entrada: dst STRING
|
|
|
-- Entrada: dstp INT
|
|
|
-- Entrada: udp_sum INT
|
|
|
-- Entrada: dns_len INT
|
|
|
-- Entrada: aa BOOLEAN
|
|
|
-- Entrada: tc BOOLEAN
|
|
|
-- Entrada: rd BOOLEAN
|
|
|
-- Entrada: ra BOOLEAN
|
|
|
-- Entrada: z BOOLEAN
|
|
|
-- Entrada: ad BOOLEAN
|
|
|
-- Entrada: cd BOOLEAN
|
|
|
-- Entrada: ancount INT
|
|
|
-- Entrada: arcount INT
|
|
|
-- Entrada: nscount INT
|
|
|
-- Entrada: qdcount INT
|
|
|
-- Entrada: opcode INT
|
|
|
-- Entrada: rcode INT
|
|
|
-- Entrada: qtype INT
|
|
|
-- Entrada: qclass INT
|
|
|
-- Entrada: country STRING
|
|
|
-- Entrada: asn STRING
|
|
|
id, -- Entrada: id INT
|
|
|
time, -- Entrada: unixtime BIGINT
|
|
|
time * 1000, -- Entrada: time BIGINT
|
|
|
lower(qname), -- Entrada: qname STRING
|
|
|
regexp_extract(lower(qname), "([^.]+\\.[^.]+)\\.\\z", 1); -- Entrada: domainname STRING
|
|
|
req_pkt_len, -- Entrada: len INT
|
|
|
NULL, -- Entrada: frag INT
|
|
|
req_ttl, -- Entrada: ttl INT
|
|
|
net_ipv, -- Entrada: ipv INT
|
|
|
net_proto, -- Entrada: prot INT
|
|
|
client_addr, -- Entrada: src STRING
|
|
|
client_port, -- Entrada: srcp INT
|
|
|
server_addr, -- Entrada: dst STRING
|
|
|
server_port, -- Entrada: dstp INT
|
|
|
req_udp_sum, -- Entrada: udp_sum INT
|
|
|
req_dns_len, -- Entrada: dns_len INT
|
|
|
cast(resp_aa as BOOLEAN), -- Entrada: aa BOOLEAN
|
|
|
cast(resp_tc as BOOLEAN), -- Entrada: tc BOOLEAN
|
|
|
cast(req_rd as BOOLEAN), -- Entrada: rd BOOLEAN
|
|
|
cast(resp_ra as BOOLEAN), -- Entrada: ra BOOLEAN
|
|
|
cast(req_z as BOOLEAN), -- Entrada: z BOOLEAN
|
|
|
cast(resp_ad as BOOLEAN), -- Entrada: ad BOOLEAN
|
|
|
cast(req_cd as BOOLEAN), -- Entrada: cd BOOLEAN
|
|
|
resp_ancount, -- Entrada: ancount INT
|
|
|
resp_arcount, -- Entrada: arcount INT
|
|
|
resp_nscount, -- Entrada: nscount INT
|
|
|
1, -- Entrada: qdcount INT
|
|
|
opcode, -- Entrada: opcode INT
|
|
|
rcode, -- Entrada: rcode INT
|
|
|
qtype, -- Entrada: qtype INT
|
|
|
qclass, -- Entrada: qclass INT
|
|
|
NULL, -- Entrada: country STRING
|
|
|
NULL, -- Entrada: asn STRING
|
|
|
-- Entrada: edns_udp INT
|
|
|
-- Entrada: edns_version SMALLINT
|
|
|
-- Entrada: edns_do BOOLEAN
|
... | ... | |