Commit 3db7b09e authored by Edvard Rejthar's avatar Edvard Rejthar

better deployment

parent c3e84053
......@@ -23,6 +23,7 @@ Scans a website for a sign of a parasite hosts or commands.
## Tips
* You may use /static/demopage.html as a testing page.
* You may launch MDMaug with environmental variable `PORT` to change the port the applicaction is bound to
### Troubleshooting
#!/usr/bin/env python3
import atexit
import datetime
import logging
import os
......@@ -13,6 +14,15 @@ from xvfbwrapper import Xvfb
from .lib.config import Config
from .lib.controller.api import Api
fileHandler = logging.FileHandler("mdmmmmmmm.log")
fileHandler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logging.Formatter('%(levelname)s - %(message)s'))
handlers = [fileHandler, consoleHandler]
logging.basicConfig(level=logging.INFO, handlers=handlers)
# assure the logging dir
if not os.path.exists(Config.LOG_DIR):
......@@ -47,13 +57,18 @@ if result is not 0:
#Please run `sudo bin/` to launch a server."
# we may want to specify another port for testing purposes I.E. to not compete with port defined in nginx configuration
if os.environ.get("PORT"):
Config.APP_PORT = int(os.environ.get("PORT"))
# run display and flask server
run_display = False if os.environ.get("FIREFOX_DEBUG", 0) == "1" else True
if run_display:
display = Xvfb()
if 1:
# print(f'Listening at https://{address}:{Config.APP_PORT}')
from .lib.controller.server import app as server
......@@ -66,6 +81,11 @@ try:
ssl_context=(Config.DIR + 'cert-mdmaug.pem', Config.DIR + 'key-mdmaug.pem'), threaded=True)
# for _ in range(Config.profile_count):
# threading.Thread(target=httpd.serve_forever).start()
except (KeyboardInterrupt, SystemExit):
if run_display:
# except (KeyboardInterrupt, SystemExit):
# if run_display:
# display.stop()
# clean up - shut down the display.
# (This can't be caught as a simple exception since uWSGI runs multiple workers.
# Note that the display gets stopped for every worker (superfluously).)
atexit.register(lambda: display.stop() if run_display else None)
......@@ -27,5 +27,5 @@ while True:
method = "w"
with open(file, method +"b") as f: # without writing as bytes, some pages end up with UnicodeEncodeError: 'ascii' codec can't encode character '\xe9' in position 197: ordinal not in range(128)
with open(file, method + "b") as f: # without writing as bytes, some pages end up with UnicodeEncodeError: 'ascii' codec can't encode character '\xe9' in position 197: ordinal not in range(128)
......@@ -6,8 +6,8 @@ from glob import escape
from flask import request
from peewee import IntegrityError
from import domain2dir
from mdmaug.lib.model.crawl import Crawl
from import domain2dir
from ..model.crawl import Crawl
from .scan_controller import ScanController
from ..config import Config
from ..model.dbp import Encounter, Whitelist, Status
......@@ -75,7 +75,6 @@ class ScanController:
:param url: scanned url
:type cached: True = Any cached version, int = cached version X days old. If None or not found, site will be reanalysed
print("tttttttttt", url)
url = assure_url(url)
if not url:
return f'Invalid URL {escape(url)} {url}'
......@@ -84,12 +83,15 @@ class ScanController:
if cached:
domain = domain2dir(url)
scans = self.get_domain_scans(domain)
if scans:
# get the most recent snapdir and check if it's not too old
# scans = self.get_domain_scans(domain)
prep = Config.CACHE_DIR + domain + "/"
scan = max(scans, key=lambda s: os.path.getmtime(prep + s))
if not autoprune or not self.clean_scan(domain, scan):
scans = sorted(self.get_domain_scans(domain), key=lambda s: os.path.getmtime(prep + s))
while scans:
# get the most recent snapdir and check if it's not too old
scan = scans.pop()
if autoprune and self.clean_scan(domain, scan):
# if not autoprune or not self.clean_scan(domain, scan):
if cached is True or os.path.getmtime(prep + scan) > time.time() - (3600 * 24 * cached):
if creation_spree:
return f"Scan for {domain} already exists."
......@@ -98,7 +100,7 @@ class ScanController:
crawl = Crawl.load_from_scan(domain, scan)
return crawl
except ValueError:
logger.debug(f"({-1}) Convenient cached analysis not found for url {url}")
# perform fresh analysis
......@@ -134,24 +136,23 @@ class ScanController:
# ,nsSocketTransport:5,nsStreamPump:5,nsHostResolver:5
logger.debug("({}) FF -P {} -no-remote {}".format(self.profile, self.profile, self.url))
# http://localhost/redirect/ gets stripped by the extension
command = f"export NSPR_LOG_MODULES=timestamp,nsHttp:5 ; export NSPR_LOG_FILE={logfile} ;" \
f" export CACHE_DIR={cache_dir}; export PROFILE={self.profile};" \
f"{Config.browser} -P {self.profile} -no-remote 'http://localhost/redirect/{self.url}'"
command = f"NSPR_LOG_MODULES=timestamp,nsHttp:5 NSPR_LOG_FILE={logfile} CACHE_DIR={cache_dir} PROFILE={self.profile}" \
f" {Config.browser} -P {self.profile} -no-remote 'http://localhost/redirect/{self.url}'"
# terminate Config.browser if he's not able to
# (everything has to be in single command because there is no heritance of $! amongst subprocesses)
# (everything has to be in single command because there is no inheritance of $! amongst subprocesses)
command += f" & echo $!;ii=0; while [ -n \"`ps -p $! | grep {Config.browser}`\" ];" \
f"do echo \"({self.profile}) running\" ;ii=$((ii+1)); if [ $ii -gt {Config.MAX_BROWSER_RUN_TIME} ];" \
f" do echo \"({self.profile}) running\" ;ii=$((ii+1)); if [ $ii -gt {Config.MAX_BROWSER_RUN_TIME} ];" \
f" then echo '({self.profile}) kill';kill $!; break;fi; sleep 1; done" # > /dev/null
logger.debug(command)[command], shell=True)
logger.debug(f"({self.profile}) stopped!")
# shromazdit informace z analyz
# gather analysis information
crawl = Crawl(host=self.url, log_dir=log_dir, cache_dir=cache_dir, profile=self.profile)
expiration = 0
while not os.path.isfile(logfile): # i po zavreni FF nekdy trva, nez se soubor zapise
while not os.path.isfile(logfile): # it may take some time to write the file even after the FF closes
expiration += 1
logger.debug(f"({self.profile}) waiting to close...")
if expiration > Config.MAX_BROWSER_EXPIRATION:
......@@ -284,13 +285,14 @@ class ScanController:
def clean_scan(cls, domain, scan: str):
""" If there is only 1 file in the directory, deletes files of a scan.
""" If analysis.json is missing or there is only 1 file in the directory, deletes files of a scan.
If that was the only scan, domain directory is deleted as well. (DB stays intact.)
Because if a scan fails, there is only analysis.json or nothing in the dir.
Return True if scan was deleted, False if nothing was deleted.
scan_path = os.path.join(Config.CACHE_DIR, domain, scan)
if len(os.listdir(scan_path)) <= 1:
if not os.path.isfile(os.path.join(scan_path, Config.CRAWL_FILE)) or len(os.listdir(scan_path)) <= 1:
domain_path = os.path.join(Config.CACHE_DIR, domain)
if len(os.listdir(domain_path)) == 0:
......@@ -8,6 +8,7 @@
# except ImportError:
# from yaml import Loader, Dumper
import logging
from os.path import join
from collections import defaultdict
import jsonpickle
......@@ -48,7 +49,7 @@ class Crawl(defaultdict):
def load_from_scan(domain, scan):
filename = Config.CACHE_DIR + domain + "/" + scan + "/" + Config.CRAWL_FILE
filename = join(Config.CACHE_DIR, domain, scan, Config.CRAWL_FILE)
with open(filename, 'r') as f:
# return Crawl(state=load(, Loader=Loader))
......@@ -14,7 +14,7 @@ class ScreenshotParser:
screenfile = crawl.cache_dir+'screenshot.base64'
if os.path.isfile(screenfile):
with open(screenfile,"r+") as f:
with open(screenfile, "r+") as f:
data = (b64decode(
im =
im.thumbnail(Config.THUMBNAIL_SIZE) # 0.3 s
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment