Verified Commit 2baf7a65 authored by Martin Petráček's avatar Martin Petráček
Browse files

create_db script, minor polishing

parent ffc3c6dd
#!/usr/bin/env python
#!/usr/bin/env python2
import fileinput
import os, os.path
......@@ -14,11 +14,12 @@ import signal
import errno
import logging
__ARCHIVE_DB_PATH__ = "/srv/pakon/pakon-archive.db"
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
delimiter = '__uci__delimiter__'
# uci get wrapper
def uci_get(opt):
chld = subprocess.Popen(['/sbin/uci', '-d', delimiter, '-q', 'get', opt],
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
......@@ -29,7 +30,10 @@ def uci_get(opt):
else:
return out
con = sqlite3.connect('/srv/pakon/pakon-archive.db')
if not os.path.isfile(__ARCHIVE_DB_PATH__):
subprocess.call(['/usr/bin/python3', '/usr/libexec/pakon-light/create_db.py'])
con = sqlite3.connect(__ARCHIVE_DB_PATH__)
con.row_factory = sqlite3.Row
def squash(from_details, to_details, start, window, honor_app_proto, honor_app_hostname):
global con
......@@ -86,56 +90,36 @@ def squash(from_details, to_details, start, window, honor_app_proto, honor_app_h
# Create database if it was empty
c = con.cursor()
try:
c.execute('CREATE TABLE traffic '
'(start real, duration integer, details integer,'
'src_mac text, src_ip text, src_port integer, dest_ip text, dest_port integer, '
'proto text, app_proto text, bytes_send integer, '
'bytes_received integer, app_hostname text)')
except:
logging.debug('Table "traffic" already exists')
try:
c.execute('CREATE INDEX traffic_lookup ON traffic(details, start, src_mac)')
except:
logging.debug('Index "traffic_lookup" already exists')
# Main loop
now = int(time.mktime(datetime.datetime.utcnow().timetuple()))
start = now-3600*24 #move flows from live DB to archive after 24hours
try:
c.execute('ATTACH DATABASE "/var/lib/pakon.db" AS live')
c.execute('INSERT INTO traffic SELECT start, duration, 99, src_mac, src_ip, src_port, dest_ip, dest_port, proto, app_proto, bytes_send, bytes_received, app_hostname FROM live.traffic WHERE start < ? AND flow_id IS NULL', (start,))
logging.info("moved {} flows from live to archive".format(c.rowcount))
c.execute('DELETE FROM live.traffic WHERE start < ? AND flow_id IS NULL', (start,))
c.execute('VACUUM live')
con.commit()
#TODO: move constants to configuration
logging.info("squashed from 99 to 80 - deleted {}".format(squash(99,80,now-3600*24,60,True,True)))
logging.info("squashed from 80 to 70 - deleted {}".format(squash(80,70,now-3600*24*3,900,True,True)))
logging.info("squashed from 70 to 60 - deleted {}".format(squash(70,60,now-3600*24*7,3600,False,True)))
logging.info("squashed from 60 to 50 - deleted {}".format(squash(60,50,now-3600*24*14,3600,False,False)))
c.execute('DELETE FROM traffic WHERE start < ?', (3600*24*28,))
c.execute('VACUUM')
con.commit()
c.execute('SELECT COUNT(*) FROM live.traffic')
logging.info("{} flows in live database".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 99')
logging.info("{} flows in archive on details level 99".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 80')
logging.info("{} flows in archive on details level 80".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 70')
logging.info("{} flows in archive on details level 70".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 60')
logging.info("{} flows in archive on details level 60".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 50')
logging.info("{} flows in archive on details level 50".format(c.fetchone()[0]))
except KeyboardInterrupt:
exit_gracefully()
c.execute('ATTACH DATABASE "/var/lib/pakon.db" AS live')
c.execute('INSERT INTO traffic SELECT start, duration, 99, src_mac, src_ip, src_port, dest_ip, dest_port, proto, app_proto, bytes_send, bytes_received, app_hostname FROM live.traffic WHERE start < ? AND flow_id IS NULL', (start,))
logging.info("moved {} flows from live to archive".format(c.rowcount))
c.execute('DELETE FROM live.traffic WHERE start < ? AND flow_id IS NULL', (start,))
c.execute('VACUUM live')
con.commit()
#TODO: move constants to configuration
logging.info("squashed from 99 to 80 - deleted {}".format(squash(99,80,now-3600*24,60,True,True)))
logging.info("squashed from 80 to 70 - deleted {}".format(squash(80,70,now-3600*24*3,900,True,True)))
logging.info("squashed from 70 to 60 - deleted {}".format(squash(70,60,now-3600*24*7,3600,False,True)))
logging.info("squashed from 60 to 50 - deleted {}".format(squash(60,50,now-3600*24*14,3600,False,False)))
c.execute('DELETE FROM traffic WHERE start < ?', (3600*24*28,))
c.execute('VACUUM')
con.commit()
except IOError as e:
if e.errno != errno.EINTR:
raise
c.execute('SELECT COUNT(*) FROM live.traffic')
logging.info("{} flows in live database".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 99')
logging.info("{} flows in archive on details level 99".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 80')
logging.info("{} flows in archive on details level 80".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 70')
logging.info("{} flows in archive on details level 70".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 60')
logging.info("{} flows in archive on details level 60".format(c.fetchone()[0]))
c.execute('SELECT COUNT(*) FROM traffic WHERE details = 50')
logging.info("{} flows in archive on details level 50".format(c.fetchone()[0]))
#!/usr/bin/env python3
import sqlite3
con = sqlite3.connect('/var/lib/pakon.db')
c = con.cursor()
c.execute('CREATE TABLE IF NOT EXISTS traffic (flow_id integer, start real, duration integer, src_mac text, src_ip text, src_port integer, dest_ip text, dest_port integer, proto text, app_proto text, bytes_send integer, bytes_received integer, app_hostname text)')
c.execute('CREATE INDEX IF NOT EXISTS start ON traffic(start)')
c.execute('CREATE UNIQUE INDEX IF NOT EXISTS flow_id ON traffic(flow_id) WHERE flow_id IS NOT NULL')
c.execute('CREATE TABLE IF NOT EXISTS dns (time integer, client text, name text, type text, data text)')
c.execute('CREATE INDEX IF NOT EXISTS tdc ON dns(time,data,client)')
c.execute('PRAGMA user_version=1')
con.commit()
con.close()
con = sqlite3.connect('/srv/pakon/pakon-archive.db')
c = con.cursor()
c.execute('CREATE TABLE IF NOT EXISTS traffic (start real, duration integer, details integer, src_mac text, src_ip text, src_port integer, dest_ip text, dest_port integer, proto text, app_proto text, bytes_send integer, bytes_received integer, app_hostname text)')
c.execute('CREATE INDEX IF NOT EXISTS traffic_lookup ON traffic(details, start, src_mac)')
c.execute('PRAGMA user_version=1')
con.commit()
con.close()
......@@ -35,3 +35,7 @@
".speedtest.net": "speedtest.net"
".dropbox.com": "dropbox.com"
"github.com": "github.com"
"github.io": "github.io"
"githubusercontent.com": "github.com"
#!/usr/bin/env python
#!/usr/bin/env python2
import os
import sys
......@@ -28,8 +28,6 @@ class MultiReplace:
return self.rx.sub(one_xlat, text)
adict={}
try:
with open('/usr/share/pakon-light/domains_replace.conf') as f:
......
#!/usr/bin/env python
#!/usr/bin/env python2
import os
import sys
......@@ -29,10 +29,8 @@ def reverse_lookup(q_in, q_out):
signal.alarm(dns_timeout)
try:
name = socket.gethostbyaddr(ip)[0]
#print(ip+" -> "+name)
q_out.put((ip, name))
except:
#print("Can't resolve "+ip)
pass
signal.alarm(0)
......
#!/usr/bin/env python
#!/usr/bin/env python2
import fileinput
import json
......@@ -29,54 +29,18 @@ def timestamp2unixtime(timestamp):
timestamp = timestamp*1.0 + dt.microsecond*1.0/1000000
return timestamp
con = False
if not os.path.isfile('/var/lib/pakon.db'):
subprocess.call(['/usr/bin/python3', '/usr/libexec/pakon-light/create_db.py'])
con = sqlite3.connect('/var/lib/pakon.db')
# prepare the database for storing logged data
try:
con = sqlite3.connect('/var/lib/pakon.db')
except:
logging.error("Can't open database!")
sys.exit(1)
# Create database if it was empty
c = con.cursor()
try:
c.execute('CREATE TABLE traffic '
'(flow_id integer, start real, duration integer,'
'src_mac text, src_ip text, src_port integer, dest_ip text, dest_port integer, '
'proto text, app_proto text, bytes_send integer, '
'bytes_received integer, app_hostname text)')
#app_hostname_type: 0 - unknown, 1 - tls/http(app level), 2 - dns, 3 - reverse lookup
except:
logging.debug('Table "traffic" already exists')
# flow_ids are only unique (and meaningful) during one run of this script
try:
c.execute('UPDATE traffic SET flow_id = NULL, duration = 0, bytes_send = 0, bytes_received = 0 WHERE flow_id IS NOT NULL')
con.commit()
except:
logging.debug('Error cleaning flow_id')
try:
c.execute('CREATE INDEX start ON traffic(start)')
c.execute('CREATE UNIQUE INDEX flow_id ON traffic(flow_id) WHERE flow_id IS NOT NULL')
except:
logging.debug('Indexes for table "traffic" already exists')
try:
c.execute('CREATE TABLE dns '
'(time integer, client text, name text, type text, data text)')
except:
logging.debug('Table "dns" already exists')
try:
c.execute('CREATE INDEX tdc ON dns(time,data,client)')
except:
logging.debug('Index for table "dns" already exists')
try:
c.execute('CREATE TABLE settings '
'(key text, value integer)')
c.execute('INSERT INTO settings VALUES (?, ?)', ('db_schema_version', 1))
except:
logging.debug('Table "settings" already exists')
# Main loop
def exit_gracefully(signum, frame):
global c, con
......@@ -193,6 +157,3 @@ while True:
except IOError as e:
if e.errno != errno.EINTR:
raise
logging.error("End of data?")
logging.error("This may mean that suricata_conntrack_flows.py doesn't exist/is broken...")
#!/usr/bin/env python
#!/usr/bin/env python3
import os
import sys
......@@ -26,56 +26,55 @@ if (len(sys.argv)<5):
print(" known MAC addresses are:")
for row in c.execute("select src_mac,COUNT(*) from traffic where flow_id IS NULL AND src_mac NOT LIKE '' GROUP BY src_mac"):
print(" "+row[0]+" - "+str(row[1])+" flows total")
sys.exit(1)
else:
now = time.time() - 2*3600
domains = []
time_from = now - int(sys.argv[2])
time_to = now - int(sys.argv[3])
last = [0,0,'',0,0]
last2 = [0,0]
for row in c.execute("""select start,duration,app_hostname,bytes_send,bytes_received from traffic where flow_id IS NULL AND src_mac LIKE ? AND (start BETWEEN ? AND ? OR (start+duration) BETWEEN ? AND ?)
UNION ALL
select start,duration,app_hostname,bytes_send,bytes_received from archive.traffic where src_mac LIKE ? AND (start BETWEEN ? AND ? OR (start+duration) BETWEEN ? AND ?)
ORDER BY app_hostname,start""", (sys.argv[1],time_from,time_to,time_from,time_to,sys.argv[1],time_from,time_to,time_from,time_to)):
row=[i for i in row]
if not row[2]:
row[2]=u''
if row[0]<time_from:
not_contained=time_from-row[0]
part=1.0*(row[1]-not_contained)/row[1]
row[1]-=int(not_contained)
row[3]=int(part*row[3])
row[4]=int(part*row[4])
if row[0]+row[1]>time_to:
not_contained=row[0]+row[1]-time_to
part=1.0*(row[1]-not_contained)/row[1]
row[1]-=int(not_contained)
row[3]=int(part*row[3])
row[4]=int(part*row[4])
if last[2]==row[2]:
if row[0] > last2[1]:
last[1]+=int(last2[1]-last2[0])
last2=[row[0],row[0]+row[1]]
else:
last2[1]=max(last2[1],row[0]+row[1])
last[3]+=int(row[3])
last[4]+=int(row[4])
else:
domains.append(last[1:])
last=[i for i in row]
last2 = [0,0]
domains.append(last[1:])
if sys.argv[4] == "duration":
domains = sorted(domains, key=lambda x: x[0])
elif sys.argv[4] == "bytes_both":
domains = sorted(domains, key=lambda x: x[2]+x[3])
elif sys.argv[4] == "bytes_send":
domains = sorted(domains, key=lambda x: x[2])
elif sys.argv[4] == "bytes_received":
domains = sorted(domains, key=lambda x: x[3])
now = time.time() - 2*3600
domains = []
time_from = now - int(sys.argv[2])
time_to = now - int(sys.argv[3])
last = [0,0,'',0,0]
last2 = [0,0]
for row in c.execute("""select start,duration,app_hostname,bytes_send,bytes_received from traffic where flow_id IS NULL AND src_mac LIKE ? AND (start BETWEEN ? AND ? OR (start+duration) BETWEEN ? AND ?)
UNION ALL
select start,duration,app_hostname,bytes_send,bytes_received from archive.traffic where src_mac LIKE ? AND (start BETWEEN ? AND ? OR (start+duration) BETWEEN ? AND ?)
ORDER BY app_hostname,start""", (sys.argv[1],time_from,time_to,time_from,time_to,sys.argv[1],time_from,time_to,time_from,time_to)):
row=[i for i in row]
if not row[2]:
row[2]=''
if row[0]<time_from:
not_contained=time_from-row[0]
part=1.0*(row[1]-not_contained)/row[1]
row[1]-=int(not_contained)
row[3]=int(part*row[3])
row[4]=int(part*row[4])
if row[0]+row[1]>time_to:
not_contained=row[0]+row[1]-time_to
part=1.0*(row[1]-not_contained)/row[1]
row[1]-=int(not_contained)
row[3]=int(part*row[3])
row[4]=int(part*row[4])
if last[2]==row[2]:
if row[0] > last2[1]:
last[1]+=int(last2[1]-last2[0])
last2=[row[0],row[0]+row[1]]
else:
last2[1]=max(last2[1],row[0]+row[1])
last[3]+=int(row[3])
last[4]+=int(row[4])
else:
print("unknown sorting key: "+sys.argv[4])
sys.exit(1)
for d in domains:
print(d)
domains.append(last[1:])
last=[i for i in row]
last2 = [0,0]
domains.append(last[1:])
if sys.argv[4] == "duration":
domains = sorted(domains, key=lambda x: x[0])
elif sys.argv[4] == "bytes_both":
domains = sorted(domains, key=lambda x: x[2]+x[3])
elif sys.argv[4] == "bytes_send":
domains = sorted(domains, key=lambda x: x[2])
elif sys.argv[4] == "bytes_received":
domains = sorted(domains, key=lambda x: x[3])
else:
print("unknown sorting key: "+sys.argv[4])
sys.exit(1)
for d in domains:
print(d)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment