mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
34 lines
1.0 KiB
Python
34 lines
1.0 KiB
Python
|
|
from semmle import util
|
|
from semmle.extractors.base import BaseExtractor
|
|
|
|
HALF_MB = 1 << 19
|
|
|
|
class FileExtractor(BaseExtractor):
|
|
'''Extractor for extracting arbitrary 'text' files.'''
|
|
|
|
name = "file extractor"
|
|
|
|
def process(self, unit):
|
|
if not isinstance(unit, util.FileExtractable):
|
|
return NotImplemented
|
|
if util.isdir(unit.path):
|
|
return NotImplemented
|
|
with open(unit.path, "rb") as fd:
|
|
data = fd.read()
|
|
source = data.decode("latin-1")
|
|
if len(source) > HALF_MB:
|
|
self.logger.info("Skipping overly large file: '%s'", unit.path)
|
|
return ()
|
|
file_tag = util.get_source_file_tag(unit.path)
|
|
writer = util.TrapWriter()
|
|
writer.write_tuple("file_contents", "gS", file_tag, source)
|
|
writer.write_file(unit.path)
|
|
output = writer.get_compressed()
|
|
self.trap_folder.write_trap("file", unit.path, output)
|
|
self.src_archive.write(unit.path, data)
|
|
return ()
|
|
|
|
def close(self):
|
|
pass
|