import logging
import os.path
from html import escape
from os import listdir
from os.path import isfile, join
from ..domains import url2domain, url2path
logger = logging.getLogger("mdmaug")
class SpyParser:
""" Obohatit vysledky vyhledavani (objekt crawl) o spiona nebezpecnych prikazu"""
def __init__(self, crawl):
# spionazni funkce
# spy = ""
# spyfile = crawl.logDir + "spy.spy"
for file in [f for f in listdir(crawl.cache_dir) if isfile(join(crawl.cache_dir, f)) and os.path.splitext(f)[1] == ".spy"]:
path = crawl.cache_dir + file
with open(path, 'r') as f:
try:
url = f.readline().rstrip() # prvni radek obsahuje url souboru (a odradkovani, o ktere nestojime)
if url == "":
raise ValueError
except ValueError: # v souboru chybi hlavicka, asi byl zabit prohlizec, co to psal, preskocit
logger.debug(f"({crawl.profile}) no contents fetched/found")
continue
crawl[url2domain(url)].urls[url2path(url)].spyfiles.append(path)
# spy = "
Nebezpečné funkce
"
# with open(spyfile, 'r') as f:
# spy += f.read()
@staticmethod
def get_short(spyfile):
""" V jednom .spy souboru muze byt vice js funkci. Vraci od kazde aspon 100 znaků.
Vraci String + True/False, pokud jsme text zkrátili,
"""
shorten = False
with open(spyfile, "r") as f:
events = ("".join(f.readlines()[1:])).split("\n\n\n") # prvni radka jsou metadata, FF deli prikazy tremi mezerami.
result = []
event_count = 0
for event in events:
if not event:
continue # last event is an empty string
event_count += 1
if event_count >= 10:
shorten = True
break
try:
command, parameters = event.split(": ", 1)
except ValueError:
logger.debug("Spy - valuer error. Nepouziju spy.")
logger.debug(event)
logger.debug(event.split(": ", 1))
# logger.debug(len(event.split(": ",1)))
# ff =event.split(": ",1)
continue
if len(parameters) > 100: # tento prikaz je delsi nez 100, zkratit
shorten = True
parameters = parameters[:100]
result.append("{} {}".format(command, escape(parameters)))
# return cgi.escape(("\n").join(result)), shorten
return "\n,
".join(result), shorten