Commit 6a11c37f authored by Jakub Ružička's avatar Jakub Ružička
Browse files

cache: automagically cache and reuse output files

Following commands got support for automating cache and reuse of
produced output files:

* make-archive
* get-archive
* srcpkg
* build

Cache uses checksum of current project state as a key and also stores
and validates cached files' checksums. Currently the project checksum is
obtained using git magicks so the cache is only enabled for projects
using git.

Cache lives in pkg/.cache.json and it's updated automatically when
a new archive/srcpkg/package is created or when invalid cache entry is
requested.

Caching can be disabled using new --no-cache option.

Cache can be cleared by removing pkg/.cache.json file.

Self test on make-archive is included.
parent d22feb0b
Pipeline #75592 passed with stages
in 1 minute and 42 seconds
"""
apkg packaging file cache
"""
import hashlib
import json
from pathlib import Path
from apkg.compat import py35path
from apkg.log import getLogger
log = getLogger(__name__)
def file_checksum(path):
chsum = hashlib.sha256()
with open(py35path(path), "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
chsum.update(chunk)
return chsum.hexdigest()[:20]
class ProjectCache:
def __init__(self, project):
self.project = project
self.loaded = False
self.cache = {}
self.checksum = None
def save(self):
json.dump(self.cache, self.project.cache_path.open('w'))
def load(self):
cache_path = self.project.cache_path
if not cache_path.exists():
log.verbose("cache not found: %s", cache_path)
return
log.verbose("loading cache: %s", cache_path)
self.cache = json.load(self.project.cache_path.open('r'))
def _ensure_load(self):
"""
ensure cache is loaded on demand and only once
you don't need to call this directly
"""
if self.loaded:
return
self.load()
self.loaded = True
def update(self, cache_name, key, path):
assert key
log.verbose("cache update for %s: %s -> %s",
cache_name, key, path)
self._ensure_load()
if cache_name not in self.cache:
self.cache[cache_name] = {}
entry = self.path2entry(path)
self.cache[cache_name][key] = entry
self.save()
def get(self, cache_name, key):
log.verbose("cache query for %s: %s",
cache_name, key)
def validate(path, checksum):
if not path.exists():
log.info("removing missing file from cache: %s", path)
self.delete(cache_name, key)
return False
real_checksum = file_checksum(path)
if real_checksum != checksum:
log.info("removing invalid cache entry: %s", path)
self.delete(cache_name, key)
return False
return True
self._ensure_load()
entry = self.cache.get(cache_name, {}).get(key)
if not entry:
return None
path = self.entry2path(entry, validate_fun=validate)
return path
def delete(self, cache_name, key):
self.cache[cache_name].pop(key, None)
self.save()
def path2entry(self, path):
"""
convert path or a list of paths to corresponding cache entry
return (fn, checksum) or a list of that on multiple paths
"""
is_list = True
if not isinstance(path, list):
path = [path]
is_list = False
e = list(map(
lambda x: (str(x), file_checksum(x)),
path))
if is_list:
return e
return e[0]
def entry2path(self, entry, validate_fun=None):
"""
convert cache entry to file path or list of paths
if validate is True, make sure file has correct checksum
and flush invalid cache entry if it doesn't
"""
is_list = True
if not isinstance(entry[0], list):
entry = [entry]
is_list = False
paths = []
for fn, checksum in entry:
p = Path(fn)
if validate_fun:
if not validate_fun(p, checksum):
return None
paths.append(p)
if is_list:
return paths
return paths[0]
def enabled(self, use_cache=True):
"""
helper to tell and log if caching is enabled and supported
optional use_cache argument provided for shared
argument parsing and logging from apkg.lib
"""
if use_cache:
vcs = self.project.vcs
if vcs:
log.verbose("%s VCS detected -> cache ENABLED", vcs)
return True
else:
log.verbose("VCS not detected -> cache DISABLED")
else:
log.verbose("cache DISABLED")
return False
......@@ -3,7 +3,7 @@ build package
usage: apkg build ([-u] | [-s <srcpkg>] | [-a <ar>] )
[-v <ver>] [-r <rls>] [-d <distro>]
[-i] [-I]
[-i] [-I] [--no-cache]
options:
-s <srcpkg>, --srcpkg <srcpkg> use specified source package (path or name)
......@@ -17,6 +17,7 @@ options:
-i, --install-dep install build dependencies
-I, --isolated use isolated builder (pbuilder/mock) if supported
default: use direct build
--no-cache disable cache
""" # noqa
from docopt import docopt
......@@ -34,7 +35,8 @@ def run_command(cargs):
release=args['--release'],
distro=args['--distro'],
install_dep=args['--install-dep'],
isolated=args['--isolated'])
isolated=args['--isolated'],
use_cache=not args['--no-cache'])
for pkg in pkgs:
print("%s" % pkg)
return pkgs
"""
Download upstream archive for current project
usage: apkg get-archive [-v <ver>]
usage: apkg get-archive [-v <ver>] [--no-cache]
options:
-v <ver>, --version <ver> version of archive to download
--no-cache disable cache
""" # noqa
from docopt import docopt
......@@ -14,6 +15,8 @@ from apkg.lib import ar
def run_command(cargs):
args = docopt(__doc__, argv=cargs)
out_path = ar.get_archive(version=args['--version'])
out_path = ar.get_archive(
version=args['--version'],
use_cache=not args['--no-cache'])
print(out_path)
return out_path
"""
Create dev archive from current project state
usage: apkg make-archive [-v <ver>]
usage: apkg make-archive [-v <ver>] [--no-cache]
options:
-v <ver>, --version <ver> rename archive to match specified version if needed
--no-cache disable cache
""" # noqa
from docopt import docopt
......@@ -14,6 +15,8 @@ from apkg.lib import ar
def run_command(cargs):
args = docopt(__doc__, argv=cargs)
out_path = ar.make_archive(version=args['--version'])
out_path = ar.make_archive(
version=args['--version'],
use_cache=not args['--no-cache'])
print(out_path)
return out_path
"""
create source package (files to build package from)
usage: apkg srcpkg [-u] [-a <ar>] [-v <ver>] [-r <rls>] [-d <distro>]
usage: apkg srcpkg [-a <ar>] [-u]
[-v <ver>] [-r <rls>] [-d <distro>]
[--no-cache]
options:
-u, --upstream use upstream archive / apkg get-source
......@@ -11,6 +13,7 @@ options:
-r <rls>, --release <rls> set package release
-d <distro>, --distro <distro> set target distro
default: current distro
--no-cache disable cache
""" # noqa
from docopt import docopt
......@@ -25,6 +28,7 @@ def run_command(cargs):
archive=args['--archive'],
version=args['--version'],
release=args['--release'],
distro=args['--distro'])
distro=args['--distro'],
use_cache=not args['--no-cache'])
print(out_srcpkg)
return out_srcpkg
......@@ -26,6 +26,9 @@ def print_status():
msg = "project base path: {t.bold}{path}{t.normal}"
print(msg.format(path=proj.path.resolve(), t=T))
msg = "project VCS: {t.bold}{vcs}{t.normal}"
print(msg.format(vcs=proj.vcs or 'none', t=T))
msg = "project config: {t.bold}{path}{t.normal}"
if proj.config_path.exists():
msg += " ({t.green}exists{t.normal})"
......@@ -57,7 +60,7 @@ def print_status():
msg = "current distro: {t.cyan}{id}{t.normal} / {t.cyan}{full}{t.normal}"
print(msg.format(full=adistro.fullname(), id=adistro.idver(), t=T))
template = proj.get_template_for_distro(adistro.idver())
template = proj._get_template_for_distro(adistro.idver())
msg = " package style: "
if template:
style = template.pkgstyle.name
......
......@@ -18,12 +18,18 @@ from apkg.util.run import run
log = getLogger(__name__)
def make_archive(version=None, project=None):
def make_archive(version=None, project=None, use_cache=True):
"""
create archive from current project state
"""
log.bold("creating dev archive")
proj = project or Project()
use_cache = proj.cache.enabled(use_cache)
if use_cache:
archive_path = proj.cache.get('archive/dev', proj.checksum)
if archive_path:
log.success("reuse cached archive: %s", archive_path)
return archive_path
try:
script = proj.config['project']['make_archive_script']
except KeyError:
......@@ -38,15 +44,15 @@ def make_archive(version=None, project=None):
out = run(script)
# last script stdout line is expected to be path to resulting archive
_, _, last_line = out.rpartition('\n')
archive_path = Path(last_line)
if not archive_path.exists():
in_archive_path = Path(last_line)
if not in_archive_path.exists():
msg = ("make_archive_script finished successfully but the archive\n"
"(indicated by last script stdout line) doesn't exist:\n\n"
"%s" % archive_path)
"%s" % in_archive_path)
raise exception.UnexpectedCommandOutput(msg=msg)
log.info("archive created: %s" % archive_path)
log.info("archive created: %s" % in_archive_path)
archive_fn = archive_path.name
archive_fn = in_archive_path.name
if version:
# specific version requested - rename if needed
name, sep, ver, ext = split_archive_fn(archive_fn)
......@@ -54,15 +60,18 @@ def make_archive(version=None, project=None):
archive_fn = name + sep + version + ext
msg = "archive renamed to match requested version: %s"
log.info(msg, archive_fn)
out_path = proj.dev_archive_path / archive_fn
log.info("copying archive to: %s" % out_path)
archive_path = proj.dev_archive_path / archive_fn
log.info("copying archive to: %s" % archive_path)
os.makedirs(py35path(proj.dev_archive_path), exist_ok=True)
shutil.copy(py35path(archive_path), py35path(out_path))
log.success("made archive: %s", out_path)
return out_path
shutil.copy(py35path(in_archive_path), py35path(archive_path))
log.success("made archive: %s", archive_path)
if use_cache:
proj.cache.update(
'archive/dev', proj.checksum, str(archive_path))
return archive_path
def get_archive(version=None, project=None):
def get_archive(version=None, project=None, use_cache=True):
"""
download archive for current project
"""
......@@ -71,6 +80,7 @@ def get_archive(version=None, project=None):
"TODO: automatic latest version detection\n\n"
"For now please select using --version/-v.")
proj = project or Project()
use_cache = proj.cache.enabled(use_cache)
try:
upstream_archive_url = proj.config['project']['upstream_archive_url']
except KeyError:
......@@ -87,6 +97,13 @@ def get_archive(version=None, project=None):
archive_t = jinja2.Template(upstream_archive_url)
archive_url = archive_t.render(**env)
if use_cache:
archive_path = proj.cache.get('archive/upstream', archive_url)
if archive_path:
log.success("reuse cached archive: %s", archive_path)
return archive_path
log.info('downloading archive: %s', archive_url)
r = requests.get(archive_url, allow_redirects=True)
if r.status_code != 200:
......@@ -104,6 +121,10 @@ def get_archive(version=None, project=None):
archive_path.open('wb').write(r.content)
log.success('downloaded archive: %s', archive_path)
if use_cache:
proj.cache.update(
'archive/upstream', archive_url, str(archive_path))
try:
upstream_signature_url = \
proj.config['project']['upstream_signature_url']
......
......@@ -22,11 +22,14 @@ def build_package(
release=None,
distro=None,
install_dep=False,
isolated=False):
isolated=False,
use_cache=True,
project=None):
log.bold('building package')
proj = Project()
proj = project or Project()
distro = adistro.distro_arg(distro)
use_cache = proj.cache.enabled(use_cache)
log.info("target distro: %s" % distro)
if srcpkg:
......@@ -43,7 +46,18 @@ def build_package(
version=version,
release=release,
distro=distro,
upstream=upstream)
upstream=upstream,
project=proj,
use_cache=use_cache)
if use_cache and not upstream:
cache_name = 'pkg/dev/%s' % distro
pkgs = proj.cache.get(cache_name, proj.checksum)
if pkgs:
log.success(
"reuse %d cached packages from: %s",
len(pkgs), pkgs[0].parent)
return pkgs
if install_dep:
# install build deps if requested
......@@ -81,6 +95,12 @@ def build_package(
"no packages:\n\n%s" % out_path)
raise exception.UnexpectedCommandOutput(msg=msg)
log.success("built %s packages in: %s", len(pkgs), out_path)
if use_cache and not upstream:
fns = list(map(str, pkgs))
proj.cache.update(
cache_name, proj.checksum, fns)
return pkgs
......
......@@ -17,11 +17,13 @@ log = getLogger(__name__)
def make_srcpkg(
archive=None, version=None, release=None,
distro=None, upstream=False):
distro=None, upstream=False, use_cache=True,
project=None):
log.bold('creating source package')
proj = Project()
proj = project or Project()
distro = adistro.distro_arg(distro)
use_cache = proj.cache.enabled(use_cache)
log.info("target distro: %s" % distro)
if not release:
......@@ -34,11 +36,25 @@ def make_srcpkg(
else:
# archive not specified - use make_archive or get_archive
if upstream:
ar_path = ar.get_archive(version=version, project=proj)
ar_path = ar.get_archive(
version=version,
project=proj,
use_cache=use_cache)
else:
ar_path = ar.make_archive(version=version, project=proj)
ar_path = ar.make_archive(
version=version,
project=proj,
use_cache=use_cache)
version = ar.get_archive_version(ar_path, version=version)
# --upstream builds aren't well supported yet - don't cache for now
if use_cache and not upstream:
cache_name = 'srcpkg/dev/%s' % distro
srcpkg_path = proj.cache.get(cache_name, proj.checksum)
if srcpkg_path:
log.success("reuse cached source package: %s", srcpkg_path)
return srcpkg_path
# fetch correct package template
template = proj.get_template_for_distro(distro)
if not template:
......@@ -92,4 +108,9 @@ def make_srcpkg(
msg = ("source package build reported success but there are "
"no results:\n\n%s" % srcpkg_path)
raise exception.UnexpectedCommandOutput(msg=msg)
if use_cache and not upstream:
proj.cache.update(
cache_name, proj.checksum, str(srcpkg_path))
return srcpkg_path
......@@ -3,13 +3,16 @@ try:
except ImportError:
from cached_property import cached_property
import glob
import hashlib
from pathlib import Path
import os
import toml
from apkg import cache as _cache
from apkg import exception
from apkg.log import getLogger
from apkg import pkgtemplate
from apkg.util.git import git
log = getLogger(__name__)
......@@ -32,6 +35,7 @@ class Project:
name = None
path = None
templates_path = None
cache_path = None
config_base_path = None
config_path = None
archive_path = None
......@@ -50,6 +54,7 @@ class Project:
self.path = Path('.')
if autoload:
self.load()
self.cache = _cache.ProjectCache(self)
def update_attrs(self):
"""
......@@ -80,6 +85,8 @@ class Project:
# output: pkg/{src-,}pkg
self.package_out_path = self.path / OUTPUT_BASE_DIR / 'pkgs'
self.srcpkg_out_path = self.path / OUTPUT_BASE_DIR / 'srcpkgs'
# cache: pkg/.cache.json
self.cache_path = self.path / OUTPUT_BASE_DIR / '.cache.json'
def load(self):
"""
......@@ -100,6 +107,34 @@ class Project:
log.verbose("project config not found: %s" % self.config_path)
return False
@cached_property
def vcs(self):
"""
Version Control System used in project
possible outputs: 'git', None
"""
o = git('rev-parse', silent=True, fatal=False)
if o.return_code == 0:
return 'git'
return None
@cached_property
def checksum(self):
"""
checksum of current project state
requires VCS (git), only computed once
"""
if self.vcs == 'git':
checksum = git.current_commit()[:10]
diff = git('diff', log_cmd=False)
if diff:
diff_hash = hashlib.sha256(diff.encode('utf-8'))
checksum += '-%s' % diff_hash.hexdigest()[:10]
return checksum
return None
@cached_property
def templates(self):
if self.templates_path.exists():
......
......@@ -27,8 +27,14 @@ def log_cmd_fail(cmd, cout):
def run(*cmd, **kwargs):
"""
run system commands easily - a subprocess.Popen wrapper
run('echo', 'hello world')
"""
fatal = kwargs.get('fatal', True)
direct = kwargs.get('direct', False)
silent = kwargs.get('silent', False)
log_cmd = kwargs.get('log_cmd', True)
log_fail = kwargs.get('log_fail', True)
log_fun = kwargs.get('log_fun', log.command)
......@@ -42,8 +48,11 @@ def run(*cmd, **kwargs):
cmd = [str(c) for c in cmd]
cmd_str = ' '.join(cmd)
if log_cmd:
log_fun(cmd_str)
if silent:
log_cmd = False
log_fail = False
print_stdout = False
print_stderr = False
if print_output:
print_stdout = True
......@@ -61,6 +70,9 @@ def run(*cmd, **kwargs):
stdout = subprocess.PIPE
stderr = subprocess.PIPE
if log_cmd:
log_fun(cmd_str)
try:
prc = subprocess.Popen(cmd, stdin=stdin, stdout=stdout,
stderr=stderr, env=env)
......
......@@ -8,13 +8,13 @@ import shutil
from apkg.compat import py35path
def init_testing_repo(repo_path, test_path):
def init_testing_repo(repo_path, test_path, ignore_dirs=[]):
dst = Path(test_path) / Path(repo_path).name
shutil.copytree(py35path(repo_path), py35path(dst))
inject_tree(repo_path, dst, ignore_dirs=ignore_dirs)
return dst
def inject_tree(src_path, dst_path):
def inject_tree(src_path, dst_path, ignore_dirs=[]):
"""
copy all files from src_path into dst_path
......@@ -24,7 +24,16 @@ def inject_tree(src_path, dst_path):
os.makedirs(py35path(dst_path), exist_ok=True)
# recursively copy all files
for d, _, files in os.walk(py35path(src_path)):
for d, subdirs, files in os.walk(py35path(src_path)):
if ignore_dirs:
# ignore selected dirs
ignored = []
for sd in subdirs:
if sd in ignore_dirs:
ignored.append(sd)
for sd_ignore in ignored:
subdirs.remove(sd_ignore)
rel_dir = Path(d).relative_to(src_path)
dst_dir = dst_path / rel_dir
os.makedirs(py35path(dst_dir), exist_ok=True)
......@@ -33,3 +42,10 @@ def inject_tree(src_path, dst_path):
dst = dst_dir / fn
src = Path(d) / fn
shutil.copy(py35path(src), py35path(dst))
def log_contains(string, caplog):
for r in caplog.records:
if string in r.message: