mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Merge pull request #19424 from github/tausbn/python-extract-hidden-file-by-default
Python: Extract files in hidden dirs by default
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
name: Test Config
|
||||
paths-ignore:
|
||||
- "**/.*/**"
|
||||
@@ -0,0 +1,6 @@
|
||||
| name |
|
||||
+-------------------------------+
|
||||
| .hidden_file.py |
|
||||
| another_non_hidden.py |
|
||||
| foo.py |
|
||||
| visible_file_in_hidden_dir.py |
|
||||
@@ -0,0 +1,4 @@
|
||||
| name |
|
||||
+-----------------+
|
||||
| .hidden_file.py |
|
||||
| foo.py |
|
||||
@@ -0,0 +1,3 @@
|
||||
import python
|
||||
|
||||
select any(File f).getShortName() as name order by name
|
||||
@@ -0,0 +1 @@
|
||||
print(42)
|
||||
24
python/extractor/cli-integration-test/hidden-files/test.sh
Executable file
24
python/extractor/cli-integration-test/hidden-files/test.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
|
||||
|
||||
set -x
|
||||
|
||||
CODEQL=${CODEQL:-codeql}
|
||||
|
||||
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
cd "$SCRIPTDIR"
|
||||
|
||||
rm -rf db db-skipped
|
||||
|
||||
# Test 1: Default behavior should be to extract files in hidden directories
|
||||
$CODEQL database create db --language python --source-root repo_dir/
|
||||
$CODEQL query run --database db query.ql > query-default.actual
|
||||
diff query-default.expected query-default.actual
|
||||
|
||||
# Test 2: The default behavior can be overridden by setting `paths-ignore` in the config file
|
||||
$CODEQL database create db-skipped --language python --source-root repo_dir/ --codescanning-config=config.yml
|
||||
$CODEQL query run --database db-skipped query.ql > query-skipped.actual
|
||||
diff query-skipped.expected query-skipped.actual
|
||||
|
||||
rm -rf db db-skipped
|
||||
@@ -41,6 +41,9 @@ def glob_part_to_regex(glob, add_sep):
|
||||
|
||||
def glob_to_regex(glob, prefix=""):
|
||||
'''Convert entire glob to a compiled regex'''
|
||||
# When the glob ends in `/`, we need to remember this so that we don't accidentally add an
|
||||
# extra separator to the final regex.
|
||||
end_sep = "" if glob.endswith("/") else SEP
|
||||
glob = glob.strip().strip("/")
|
||||
parts = glob.split("/")
|
||||
#Trailing '**' is redundant, so strip it off.
|
||||
@@ -48,12 +51,17 @@ def glob_to_regex(glob, prefix=""):
|
||||
parts = parts[:-1]
|
||||
if not parts:
|
||||
return ".*"
|
||||
# The `glob.strip("/")` call above will have removed all trailing slashes, but if there was at
|
||||
# least one trailing slash, we want there to be an extra part, so we add it explicitly here in
|
||||
# that case, using the emptyness of `end_sep` as a proxy.
|
||||
if end_sep == "":
|
||||
parts += [""]
|
||||
parts = [ glob_part_to_regex(escape(p), True) for p in parts[:-1] ] + [ glob_part_to_regex(escape(parts[-1]), False) ]
|
||||
# we need to escape the prefix, specifically because on windows the prefix will be
|
||||
# something like `C:\\folder\\subfolder\\` and without escaping the
|
||||
# backslash-path-separators will get interpreted as regex escapes (which might be
|
||||
# invalid sequences, causing the extractor to crash)
|
||||
full_pattern = escape(prefix) + ''.join(parts) + "(?:" + SEP + ".*|$)"
|
||||
full_pattern = escape(prefix) + ''.join(parts) + "(?:" + end_sep + ".*|$)"
|
||||
return re.compile(full_pattern)
|
||||
|
||||
def filter_from_pattern(pattern, prev_filter, prefix):
|
||||
|
||||
@@ -83,46 +83,21 @@ class Traverser(object):
|
||||
self.logger.debug("Ignoring %s (symlink)", fullpath)
|
||||
continue
|
||||
if isdir(fullpath):
|
||||
if fullpath in self.exclude_paths or is_hidden(fullpath):
|
||||
if is_hidden(fullpath):
|
||||
self.logger.debug("Ignoring %s (hidden)", fullpath)
|
||||
else:
|
||||
self.logger.debug("Ignoring %s (excluded)", fullpath)
|
||||
else:
|
||||
empty = True
|
||||
for item in self._treewalk(fullpath):
|
||||
yield item
|
||||
empty = False
|
||||
if not empty:
|
||||
yield fullpath
|
||||
if fullpath in self.exclude_paths:
|
||||
self.logger.debug("Ignoring %s (excluded)", fullpath)
|
||||
continue
|
||||
|
||||
empty = True
|
||||
for item in self._treewalk(fullpath):
|
||||
yield item
|
||||
empty = False
|
||||
if not empty:
|
||||
yield fullpath
|
||||
elif self.filter(fullpath):
|
||||
yield fullpath
|
||||
else:
|
||||
self.logger.debug("Ignoring %s (filter)", fullpath)
|
||||
|
||||
|
||||
if os.name== 'nt':
|
||||
import ctypes
|
||||
|
||||
def is_hidden(path):
|
||||
#Magical windows code
|
||||
try:
|
||||
attrs = ctypes.windll.kernel32.GetFileAttributesW(str(path))
|
||||
if attrs == -1:
|
||||
return False
|
||||
if attrs&2:
|
||||
return True
|
||||
except Exception:
|
||||
#Not sure what to log here, probably best to carry on.
|
||||
pass
|
||||
return os.path.basename(path).startswith(".")
|
||||
|
||||
else:
|
||||
|
||||
def is_hidden(path):
|
||||
return os.path.basename(path).startswith(".")
|
||||
|
||||
|
||||
def exclude_filter_from_options(options):
|
||||
if options.exclude_package:
|
||||
choices = '|'.join(mod.replace('.', r'\.') for mod in options.exclude_package)
|
||||
|
||||
@@ -10,7 +10,7 @@ from io import BytesIO
|
||||
|
||||
#Semantic version of extractor.
|
||||
#Update this if any changes are made
|
||||
VERSION = "7.1.2"
|
||||
VERSION = "7.1.3"
|
||||
|
||||
PY_EXTENSIONS = ".py", ".pyw"
|
||||
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
|
||||
- The Python extractor now extracts files in hidden directories by default. If you would like to skip files in hidden directories, add `paths-ignore: ["**/.*/**"]` to your [Code Scanning config](https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#specifying-directories-to-scan). If you would like to skip all hidden files, you can use `paths-ignore: ["**/.*"]`. When using the CodeQL CLI for extraction, specify the configuration (creating the configuration file if necessary) using the `--codescanning-config` option.
|
||||
@@ -1,3 +1,5 @@
|
||||
| .hidden/inner/test.py |
|
||||
| .hidden/module.py |
|
||||
| folder/module.py |
|
||||
| package |
|
||||
| package/__init__.py |
|
||||
|
||||
@@ -3,3 +3,4 @@
|
||||
| Module foo.bar |
|
||||
| Module foo.include_test |
|
||||
| Package foo |
|
||||
| Script hidden_foo.py |
|
||||
|
||||
Reference in New Issue
Block a user