import logging import os.path from html import escape from os import listdir from os.path import isfile, join from ..domains import url2domain, url2path logger = logging.getLogger("mdmaug") class SpyParser: """ Obohatit vysledky vyhledavani (objekt crawl) o spiona nebezpecnych prikazu""" def __init__(self, crawl): # spionazni funkce # spy = "" # spyfile = crawl.logDir + "spy.spy" for file in [f for f in listdir(crawl.cache_dir) if isfile(join(crawl.cache_dir, f)) and os.path.splitext(f)[1] == ".spy"]: path = crawl.cache_dir + file with open(path, 'r') as f: try: url = f.readline().rstrip() # prvni radek obsahuje url souboru (a odradkovani, o ktere nestojime) if url == "": raise ValueError except ValueError: # v souboru chybi hlavicka, asi byl zabit prohlizec, co to psal, preskocit logger.debug(f"({crawl.profile}) no contents fetched/found") continue crawl[url2domain(url)].urls[url2path(url)].spyfiles.append(path) # spy = "

Nebezpečné funkce

" # with open(spyfile, 'r') as f: # spy += f.read() @staticmethod def get_short(spyfile): """ V jednom .spy souboru muze byt vice js funkci. Vraci od kazde aspon 100 znaků. Vraci String + True/False, pokud jsme text zkrátili, """ shorten = False with open(spyfile, "r") as f: events = ("".join(f.readlines()[1:])).split("\n\n\n") # prvni radka jsou metadata, FF deli prikazy tremi mezerami. result = [] event_count = 0 for event in events: if not event: continue # last event is an empty string event_count += 1 if event_count >= 10: shorten = True break try: command, parameters = event.split(": ", 1) except ValueError: logger.debug("Spy - valuer error. Nepouziju spy.") logger.debug(event) logger.debug(event.split(": ", 1)) # logger.debug(len(event.split(": ",1))) # ff =event.split(": ",1) continue if len(parameters) > 100: # tento prikaz je delsi nez 100, zkratit shorten = True parameters = parameters[:100] result.append("{} {}".format(command, escape(parameters))) # return cgi.escape(("\n").join(result)), shorten return "\n,
".join(result), shorten