Verified Commit 69bea908 authored by Martin Petráček's avatar Martin Petráček
Browse files

domain replaces - configurable

parent fe6acbee
{
"facebook.com": "facebook.com",
"fbstatic-a.akamaihd.net": "facebook.com",
".fbcdn.net": "facebook.com",
"fbcdn-": "facebook.com",
".facebook.net": "facebook.com",
"youtube.": "youtube.com",
"youtu.be.": "youtube.com",
"yt3.ggpht.com": "youtube.com",
".googlevideo.com": "youtube.com",
".ytimg.com": "youtube.com",
"youtube-nocookie.": "youtube.com",
".google.": "google.com",
".gstatic.com": "google.com",
".googlesyndication.com": "google.com",
".googletagservices.com": "google.com",
".2mdn.net": "google.com",
".doubleclick.net": "google.com",
"googleads.": "google.com",
"google-analytics.": "google.com",
"googleusercontent.": "google.com",
"googleadservices.": "google.com",
"googleapis.com": "google.com",
}
......@@ -9,6 +9,7 @@ import sqlite3
import signal
import errno
import re
import json
interval = 3600
......@@ -22,24 +23,13 @@ con = sqlite3.connect('/var/lib/pakon.db')
c = con.cursor()
# list from ndpi - https://github.com/ntop/nDPI/blob/dev/src/lib/ndpi_content_match.c.inc#L8025
# TODO: move this to separate config-like file
adict = {
"facebook.com": "facebook.com",
"fbstatic-a.akamaihd.net": "facebook.com",
".fbcdn.net": "facebook.com",
"fbcdn-": "facebook.com",
".facebook.net": "facebook.com",
"youtube.": "youtube.com",
"youtu.be.": "youtube.com",
"yt3.ggpht.com": "youtube.com",
".googlevideo.com": "youtube.com",
".ytimg.com": "youtube.com",
"youtube-nocookie.": "youtube.com",
"ggpht.com": "youtube.com",
"googleusercontent.com": "youtube.com"
}
adict={}
try:
data_file = open('/usr/share/pakon-light/domains_replace.json')
adict = json.load(data_file)
except IOError:
print("can't load domains_services file")
sys.exit(1)
multiple_replace.adict = adict
multiple_replace.rx = re.compile("^.*("+'|'.join(map(re.escape, adict))+").*$")
......@@ -47,11 +37,11 @@ multiple_replace.rx = re.compile("^.*("+'|'.join(map(re.escape, adict))+").*$")
now = int(time.mktime(datetime.datetime.utcnow().timetuple()))
start = now-interval*2
replaced = 0
for row in c.execute('SELECT DISTINCT(app_hostname) FROM traffic WHERE start >= ? AND app_hostname IS NOT NULL', (start,)):
for row in c.execute('SELECT DISTINCT(app_hostname) FROM traffic WHERE start >= ? AND app_hostname IS NOT NULL AND flow_id IS NULL', (start,)):
name = multiple_replace(row[0])
if name!=row[0]:
t = con.cursor()
t.execute("UPDATE traffic SET app_hostname = ? WHERE app_hostname = ?", (name, row[0]))
t.execute("UPDATE traffic SET app_hostname = ? WHERE app_hostname = ? AND flow_id IS NULL", (name, row[0]))
replaced += t.rowcount
con.commit()
print("Replaced "+str(replaced)+" hostnames")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment