mirror of
https://github.com/github/codeql.git
synced 2025-12-17 09:13:20 +01:00
198 lines
6.6 KiB
Python
198 lines
6.6 KiB
Python
from semmle.util import makedirs
|
|
import os
|
|
from collections import deque
|
|
from functools import total_ordering
|
|
|
|
'''
|
|
Least Recently Written Disk-based Cache
|
|
|
|
Implements a LRW disk cache for trap files and similar.
|
|
This cache relies on the following properties which *must* hold.
|
|
|
|
Only one value can ever be associated with a key.
|
|
Keys should be ascii strings and cannot start with '$' or include any file or path separator characters.
|
|
Values should be byte strings (with any contents).
|
|
|
|
The cache is robust against arbitrary levels of concurrency.
|
|
|
|
'''
|
|
|
|
|
|
MAX_GENERATIONS = 50
|
|
MAX_FILES_PER_GENERATION = 200
|
|
|
|
def encode_keys(keys):
|
|
'Convert a collection of keys to a byte string'
|
|
return '\n'.join(keys).encode("ascii")
|
|
|
|
def decode_keys(data):
|
|
'Convert a byte string into a set of keys'
|
|
return set(data.decode("ascii").split('\n'))
|
|
|
|
@total_ordering
|
|
class Generation(object):
|
|
|
|
def __init__(self, cachedir, age):
|
|
self.cachedir = os.path.join(cachedir, str(age))
|
|
self.age = age
|
|
if not os.path.exists(self.cachedir):
|
|
makedirs(self.cachedir)
|
|
try:
|
|
with open(os.path.join(self.cachedir, "$keys"), 'rb') as fd:
|
|
self.keys = decode_keys(fd.read())
|
|
self.full = True
|
|
except Exception:
|
|
self.keys = set()
|
|
if os.path.isdir(self.cachedir):
|
|
#Directory exists, but cannot read "$keys", so this is a non-full generation
|
|
self.full = False
|
|
else:
|
|
self.full = True
|
|
|
|
def get(self, key):
|
|
if self.full and key not in self.keys:
|
|
return None
|
|
try:
|
|
with open(os.path.join(self.cachedir, key), 'rb') as fd:
|
|
return fd.read()
|
|
except Exception:
|
|
return None
|
|
|
|
def set(self, key, value):
|
|
'''Returns true if it should be able to store (key, value) even if in fact it can't.
|
|
This means that this method will return True if the generation is not full.'''
|
|
if self.full:
|
|
return False
|
|
if os.path.exists(os.path.join(self.cachedir, "$keys")):
|
|
self.full = True
|
|
try:
|
|
with open(os.path.join(self.cachedir, "$keys"), 'rb') as fd:
|
|
self.keys = decode_keys(fd.read())
|
|
except Exception:
|
|
self.keys = set()
|
|
return False
|
|
self._try_atomic_write_file(key, value)
|
|
if len(self._list_files()) >= MAX_FILES_PER_GENERATION:
|
|
self.full = True
|
|
self._write_keys()
|
|
return True
|
|
|
|
def _list_files(self):
|
|
try:
|
|
return os.listdir(self.cachedir)
|
|
except Exception:
|
|
#This probably means the directory has been deleted
|
|
return []
|
|
|
|
def _write_keys(self):
|
|
keys = self._list_files()
|
|
self._try_atomic_write_file("$keys", encode_keys(keys))
|
|
self.keys = set(keys)
|
|
|
|
def _try_atomic_write_file(self, name, contents):
|
|
fullname = os.path.join(self.cachedir, name)
|
|
tmpname = os.path.join(self.cachedir, '$%d%s' % (os.getpid(), name))
|
|
try:
|
|
with open(tmpname, 'wb') as tmp:
|
|
tmp.write(contents)
|
|
os.rename(tmpname, fullname)
|
|
except Exception:
|
|
#Failed for some reason. The folder may have been deleted, or on Windows, the file may already exist.
|
|
#Attempt to tidy up
|
|
if os.path.exists(tmpname):
|
|
try:
|
|
os.remove(tmpname)
|
|
except Exception:
|
|
#Give up :(
|
|
pass
|
|
|
|
def clear(self):
|
|
try:
|
|
filenames = os.listdir(self.cachedir)
|
|
except Exception:
|
|
#Can't do anything
|
|
return
|
|
for filename in filenames:
|
|
try:
|
|
os.remove(os.path.join(self.cachedir, filename))
|
|
except Exception:
|
|
# Can't delete. Maybe another process has deleted it or it is open (on Windows)
|
|
pass
|
|
try:
|
|
os.rmdir(self.cachedir)
|
|
except Exception:
|
|
# Can't delete
|
|
pass
|
|
|
|
def __lt__(self, other):
|
|
#Smaller numbers are older
|
|
return self.age > other.age
|
|
|
|
class Cache(object):
|
|
|
|
cache_of_caches = {}
|
|
|
|
def __init__(self, cachedir, verbose=False):
|
|
self.cachedir = cachedir
|
|
self.verbose = verbose
|
|
self.generations = []
|
|
if not os.path.exists(cachedir):
|
|
makedirs(cachedir)
|
|
generations = []
|
|
for gen in os.listdir(self.cachedir):
|
|
try:
|
|
age = int(gen)
|
|
generations.append(Generation(self.cachedir, age))
|
|
except Exception:
|
|
#gen might not be an int, or it may have been deleted
|
|
pass
|
|
if generations:
|
|
generations.sort()
|
|
else:
|
|
generations = [Generation(self.cachedir, 1)]
|
|
self.generations = deque(generations)
|
|
while len(self.generations) > MAX_GENERATIONS:
|
|
self.generations.pop().clear()
|
|
|
|
def set(self, key, value):
|
|
'''Add this (key, value) pair to the cache. keys should not start with '$' or include file or path separators.
|
|
Either adds the (key, value) atomically or does nothing. Partial keys or values are never visible.
|
|
'''
|
|
try:
|
|
while not self.generations[0].set(key, value):
|
|
self.generations.appendleft(Generation(self.cachedir, self.generations[0].age+1))
|
|
if len(self.generations) > MAX_GENERATIONS:
|
|
self.generations.pop().clear()
|
|
except Exception as ex:
|
|
#Its OK to fail but we must never raise
|
|
if self.verbose:
|
|
try:
|
|
print ("Exception setting cache key '%s': %s" % (key, ex))
|
|
except Exception:
|
|
# Just in case
|
|
pass
|
|
|
|
def get(self, key):
|
|
if key is None:
|
|
return None
|
|
try:
|
|
for gen in self.generations:
|
|
res = gen.get(key)
|
|
if res is not None:
|
|
return res
|
|
except Exception as ex:
|
|
if self.verbose:
|
|
try:
|
|
print ("Exception getting cache key '%s': %s" % (key, ex))
|
|
except Exception:
|
|
# Just in case
|
|
pass
|
|
return None
|
|
|
|
@staticmethod
|
|
def for_directory(cachedir, verbose):
|
|
'''Caches are relatively expensive objects, so we cache them.'''
|
|
if (cachedir, verbose) not in Cache.cache_of_caches:
|
|
Cache.cache_of_caches[(cachedir, verbose)] = Cache(cachedir, verbose)
|
|
return Cache.cache_of_caches[(cachedir, verbose)]
|