Compare commits

..

12 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
e38791976e Remove added tests and revert test changes 2026-07-02 08:30:59 +00:00
copilot-swe-agent[bot]
068859d338 Apply remaining changes 2026-07-02 08:26:17 +00:00
copilot-swe-agent[bot]
e34375c0cc Add subst resolution for Kotlin and Go extractors 2026-07-02 08:22:53 +00:00
copilot-swe-agent[bot]
5079680558 Initial plan 2026-07-02 08:17:28 +00:00
Geoffrey White
9aaf3f15eb Merge pull request #22105 from geoffw0/rubyinline3
Ruby: Address testFailures in inline expectations tests (part 3)
2026-07-02 08:29:39 +01:00
Tom Hvitved
6c3c5ea8af Merge pull request #22101 from hvitved/python/flow-summaries-improvements
Python: Improve some flow summaries
2026-07-01 19:36:13 +02:00
Geoffrey White
226efb3ad7 Potential fix for pull request finding
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
2026-07-01 16:52:38 +01:00
Geoffrey White
73ec4b8d02 Ruby: Fix one last inline expectations testFailure. 2026-07-01 16:44:12 +01:00
Owen Mansel-Chan
cb4a1d0929 Merge pull request #22103 from owen-mc/java/fix-mad-file-names
Java: Fix misnamed MaD models files
2026-07-01 14:04:44 +01:00
Owen Mansel-Chan
7263c00b00 Fix misnamed MaD models files 2026-07-01 13:13:01 +01:00
Tom Hvitved
2bf6031c0f Python: Update inline test expectations 2026-07-01 13:10:41 +02:00
Tom Hvitved
a5444b573a Python: Improve some flow summaries 2026-07-01 12:05:53 +02:00
27 changed files with 245 additions and 476 deletions

View File

@@ -56,19 +56,9 @@ codeql_pkg_files(
prefix = "tools/{CODEQL_PLATFORM}",
)
codeql_pkg_files(
name = "canonicalize-dll",
srcs = select({
"@platforms//os:windows": ["//shared/canonicalize:pkg"],
"//conditions:default": [],
}),
prefix = "tools/{CODEQL_PLATFORM}",
)
codeql_pack(
name = "go",
srcs = [
":canonicalize-dll",
":extractor-pack-arch",
":resources",
"//go/codeql-tools",

View File

@@ -16,10 +16,10 @@ go_library(
importpath = "github.com/github/codeql-go/extractor",
visibility = ["//visibility:public"],
deps = [
"//go/extractor/canonicalize",
"//go/extractor/dbscheme",
"//go/extractor/diagnostics",
"//go/extractor/srcarchive",
"//go/extractor/subst",
"//go/extractor/toolchain",
"//go/extractor/trap",
"//go/extractor/util",

View File

@@ -1,11 +0,0 @@
load("@rules_go//go:def.bzl", "go_library")
go_library(
name = "canonicalize",
srcs = [
"canonicalize_other.go",
"canonicalize_windows.go",
],
importpath = "github.com/github/codeql-go/extractor/canonicalize",
visibility = ["//visibility:public"],
)

View File

@@ -1,5 +0,0 @@
//go:build !windows
package canonicalize
func CanonicalizePath(path string) string { return path }

View File

@@ -1,65 +0,0 @@
//go:build windows
package canonicalize
import (
"os"
"path/filepath"
"syscall"
"unsafe"
)
var (
dll *syscall.DLL
procCanonicalize *syscall.Proc
procFree *syscall.Proc
available bool
)
func init() {
root := os.Getenv("CODEQL_EXTRACTOR_GO_ROOT")
if root == "" {
return
}
dllPath := filepath.Join(root, "tools", "win64", "codeql_canonical_path.dll")
d, err := syscall.LoadDLL(dllPath)
if err != nil {
return
}
p, err := d.FindProc("canonicalize_path_u8")
if err != nil {
return
}
f, _ := d.FindProc("canonicalize_free_u8")
dll = d
procCanonicalize = p
procFree = f
available = true
}
func CanonicalizePath(path string) string {
if !available {
return path
}
pathBytes := append([]byte(path), 0)
ret, _, _ := procCanonicalize.Call(uintptr(unsafe.Pointer(&pathBytes[0])))
if ret == 0 {
return path
}
result := bytePtrToString((*byte)(unsafe.Pointer(ret)))
if procFree != nil {
procFree.Call(ret)
}
return result
}
func bytePtrToString(p *byte) string {
if p == nil {
return ""
}
var n int
for ptr := unsafe.Pointer(p); *(*byte)(ptr) != 0; n++ {
ptr = unsafe.Add(ptr, 1)
}
return string(unsafe.Slice(p, n))
}

View File

@@ -22,10 +22,10 @@ import (
"sync"
"time"
"github.com/github/codeql-go/extractor/canonicalize"
"github.com/github/codeql-go/extractor/dbscheme"
"github.com/github/codeql-go/extractor/diagnostics"
"github.com/github/codeql-go/extractor/srcarchive"
"github.com/github/codeql-go/extractor/subst"
"github.com/github/codeql-go/extractor/toolchain"
"github.com/github/codeql-go/extractor/trap"
"github.com/github/codeql-go/extractor/util"
@@ -765,9 +765,9 @@ func normalizedPath(ast *ast.File, fset *token.FileSet) string {
file := fset.File(ast.Package).Name()
path, err := filepath.EvalSymlinks(file)
if err != nil {
return file
path = file
}
return canonicalize.CanonicalizePath(path)
return subst.ResolvePath(path)
}
// extractFile extracts AST information for the given file

12
go/extractor/subst/BUILD.bazel generated Normal file
View File

@@ -0,0 +1,12 @@
load("@rules_go//go:def.bzl", "go_library")
go_library(
name = "subst",
srcs = [
"subst.go",
"subst_other.go",
"subst_windows.go",
],
importpath = "github.com/github/codeql-go/extractor/subst",
visibility = ["//go:__subpackages__"],
)

View File

@@ -0,0 +1,30 @@
package subst
// ResolvePath resolves subst'd drive letters in a full path.
// If the path starts with a subst'd drive letter, replaces it with the backing path.
// Otherwise returns the path unchanged.
func ResolvePath(path string) string {
return resolvePath(path, ResolveDrive)
}
func resolvePath(path string, resolveDrive func(string) string) string {
if len(path) < 3 {
return path
}
if path[1] != ':' {
return path
}
if path[2] != '\\' && path[2] != '/' {
return path
}
c := path[0]
if !((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
return path
}
resolved := resolveDrive(path[:3])
if resolved == "" {
return path
}
return resolved + path[2:]
}

View File

@@ -0,0 +1,6 @@
//go:build !windows
package subst
// ResolveDrive is a no-op on non-Windows platforms.
func ResolveDrive(driveRoot string) string { return "" }

View File

@@ -0,0 +1,67 @@
//go:build windows
package subst
import (
"os"
"path/filepath"
"syscall"
"unsafe"
)
var (
dll *syscall.DLL
procResolve *syscall.Proc
procFree *syscall.Proc
available bool
)
func init() {
dist := os.Getenv("CODEQL_DIST")
if dist == "" {
return
}
dllPath := filepath.Join(dist, "tools", "win64", "canonicalize.dll")
d, err := syscall.LoadDLL(dllPath)
if err != nil {
return
}
p, err := d.FindProc("resolve_subst_u8")
if err != nil {
return
}
f, _ := d.FindProc("resolve_subst_free_u8")
dll = d
procResolve = p
procFree = f
available = true
}
// ResolveDrive resolves a subst'd drive root (e.g. "X:\") to its backing path.
// Returns "" if the drive is not subst'd or on error.
func ResolveDrive(driveRoot string) string {
if !available {
return ""
}
driveBytes := append([]byte(driveRoot), 0)
ret, _, _ := procResolve.Call(uintptr(unsafe.Pointer(&driveBytes[0])))
if ret == 0 {
return ""
}
result := goString((*byte)(unsafe.Pointer(ret)))
if procFree != nil {
procFree.Call(ret)
}
return result
}
func goString(p *byte) string {
if p == nil {
return ""
}
var n int
for ptr := unsafe.Pointer(p); *(*byte)(ptr) != 0; n++ {
ptr = unsafe.Add(ptr, 1)
}
return string(unsafe.Slice(p, n))
}

View File

@@ -1242,12 +1242,13 @@ public class FileUtil
public static File tryMakeCanonical (File f)
{
try {
return NativeCanonicalizer.resolve(f.getCanonicalFile());
f = f.getCanonicalFile();
}
catch (IOException ignored) {
Exceptions.ignore(ignored, "Can't log error: Could be too verbose.");
return NativeCanonicalizer.resolve(new File(simplifyPath(f)));
f = new File(simplifyPath(f));
}
return SubstResolver.resolve(f);
}

View File

@@ -1,41 +0,0 @@
package com.semmle.util.files;
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
public class NativeCanonicalizer {
private static final boolean available;
static {
boolean loaded = false;
if (File.separatorChar == '\\') {
String dist = System.getenv("CODEQL_DIST");
if (dist != null && !dist.isEmpty()) {
try {
Path library = Paths.get(dist).resolve("tools").resolve("win64")
.resolve("codeql_canonical_path.dll").toAbsolutePath();
System.load(library.toString());
loaded = true;
} catch (RuntimeException | UnsatisfiedLinkError ignored) {
}
}
}
available = loaded;
}
private NativeCanonicalizer() {}
// UTF-16 JNI interface - no encoding conversion
private static native String nativeCanonicalizePath(String path);
public static File resolve(File path) {
if (!available) return path;
String result = nativeCanonicalizePath(path.getAbsolutePath());
return result != null ? new File(result) : path;
}
public static boolean isAvailable() {
return available;
}
}

View File

@@ -0,0 +1,76 @@
package com.semmle.util.files;
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
/**
* Resolves Windows {@code subst}ed drive letters to their underlying paths. On non-Windows
* platforms, or when the native library failed to load, {@link #resolve(File)} is a no-op that
* returns its argument unchanged.
*/
public class SubstResolver {
private static final boolean available;
static {
boolean loaded = false;
if (File.separatorChar == '\\') {
String dist = System.getenv("CODEQL_DIST");
if (dist != null && !dist.isEmpty()) {
try {
Path library = Paths.get(dist).resolve("tools").resolve("win64")
.resolve("canonicalize.dll").toAbsolutePath();
System.load(library.toString());
loaded = true;
} catch (RuntimeException | UnsatisfiedLinkError ignored) {
}
}
}
available = loaded;
}
private SubstResolver() {}
/**
* Given a drive root like {@code "X:\\"}, returns the path that drive is
* {@code subst}ed to, or {@code null} if the letter isn't a subst mapping.
*/
private static native String nativeResolveSubst(String driveRoot);
/**
* If {@code f} is an absolute path starting with a {@code subst}ed drive letter, return an
* equivalent path with the drive letter replaced by its target. Otherwise return {@code f}
* unchanged.
*/
public static File resolve(File f) {
if (!available) {
return f;
}
String path = f.getPath();
if (path.length() < 3 || path.charAt(1) != ':') {
return f;
}
char sep = path.charAt(2);
if (sep != '\\' && sep != '/') {
return f;
}
if (!isDriveLetter(path.charAt(0))) {
return f;
}
String resolved = nativeResolveSubst(path.substring(0, 3));
if (resolved == null) {
return f;
}
return new File(resolved + path.substring(2));
}
private static boolean isDriveLetter(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
public static boolean isAvailable() {
return available;
}
}

View File

@@ -1,4 +1,10 @@
extensions:
- addsTo:
pack: codeql/java-all
extensible: summaryModel
data:
- ["org.apache.hc.client5.http.protocol", "RedirectLocations", True, "add", "(URI)", "", "Argument[0]", "Argument[this].Element", "value", "hq-manual"]
- addsTo:
pack: codeql/java-all
extensible: neutralModel

View File

@@ -1,6 +0,0 @@
extensions:
- addsTo:
pack: codeql/java-all
extensible: summaryModel
data:
- ["org.apache.hc.client5.http.protocol", "RedirectLocations", True, "add", "(URI)", "", "Argument[0]", "Argument[this].Element", "value", "hq-manual"]

View File

@@ -1138,7 +1138,9 @@ predicate clearsContent(Node n, ContentSet cs) {
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
*/
predicate expectsContent(Node n, ContentSet c) { none() }
predicate expectsContent(Node n, ContentSet c) {
FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c)
}
/**
* Holds if values stored inside attribute `c` are cleared at node `n`.

View File

@@ -91,6 +91,8 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = ""
}
string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) }
bindingset[token]
ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) {
// needed to support `Argument[x..y]` ranges

View File

@@ -4199,11 +4199,9 @@ module StdlibPrivate {
// The positional argument contains a mapping.
// TODO: these values can be overwritten by keyword arguments
// - dict mapping
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
input = "Argument[0].WithAnyDictionaryElement" and
output = "ReturnValue" and
preservesValue = true
or
// - list-of-pairs mapping
input = "Argument[0].ListElement.TupleElement[1]" and
@@ -4240,9 +4238,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
// Element content is mutated into list element content
@@ -4266,11 +4262,9 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]" and
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
preservesValue = true
)
input = "Argument[0].WithAnyTupleElement" and
output = "ReturnValue" and
preservesValue = true
or
input = "Argument[0].ListElement" and
output = "ReturnValue" and
@@ -4294,9 +4288,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.SetElement" and
@@ -4342,9 +4334,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
@@ -4372,9 +4362,7 @@ module StdlibPrivate {
or
content = "SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
content = "TupleElement[" + i.toString() + "]"
)
content = "AnyTupleElement"
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
input = "Argument[0]." + content and
@@ -4404,9 +4392,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
@@ -4434,9 +4420,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue" and
@@ -4468,9 +4452,7 @@ module StdlibPrivate {
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
// TODO: Once we have TupleElementAny, this generality can be increased.
i = 0 and
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
input = "Argument[1].TupleElement[" + j.toString() + "]"
)
input = "Argument[1].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "Argument[0].Parameter[" + i.toString() + "]" and
@@ -4499,9 +4481,7 @@ module StdlibPrivate {
or
input = "Argument[1].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[1].TupleElement[" + i.toString() + "]"
)
input = "Argument[1].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
@@ -4525,9 +4505,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement.TupleElement[1]" and
@@ -4552,12 +4530,7 @@ module StdlibPrivate {
or
input = "Argument[" + i.toString() + "].SetElement"
or
// We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance.
// TODO: Once we have TupleElementAny, this generality can be increased.
i in [0 .. 1] and
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]"
)
input = "Argument[" + i.toString() + "].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
@@ -4580,12 +4553,6 @@ module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::Content c |
input = "Argument[self]." + c.getMaDRepresentation() and
output = "ReturnValue." + c.getMaDRepresentation() and
preservesValue = true
)
or
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = true
@@ -4741,12 +4708,10 @@ module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.TupleElement[1]" and
preservesValue = true
// TODO: put `key` into "ReturnValue.TupleElement[0]"
)
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.TupleElement[1]" and
preservesValue = true
// TODO: put `key` into "ReturnValue.TupleElement[0]"
}
}
@@ -4825,11 +4790,9 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement" and
preservesValue = true
)
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.ListElement" and
preservesValue = true
or
input = "Argument[self]" and
output = "ReturnValue" and
@@ -4876,11 +4839,9 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement.TupleElement[1]" and
preservesValue = true
)
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.ListElement.TupleElement[1]" and
preservesValue = true
or
// TODO: Add the keys to output list
input = "Argument[self]" and

View File

@@ -589,11 +589,11 @@ def test_zip_tuple():
SINK(z[0][0]) # $ flow="SOURCE, l:-7 -> z[0][0]"
SINK(z[0][1]) # $ flow="SOURCE, l:-7 -> z[0][1]"
SINK_F(z[0][2])
SINK_F(z[0][2]) # $ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]"
SINK_F(z[0][3])
SINK(z[1][0]) # $ flow="SOURCE, l:-11 -> z[1][0]"
SINK_F(z[1][1]) # $ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
SINK(z[1][2]) # $ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance.
SINK(z[1][2]) # $ flow="SOURCE, l:-11 -> z[1][2]"
SINK_F(z[1][3])
@expects(4)

View File

@@ -362,7 +362,7 @@ def test_load_in_bulk():
# see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#in-bulk
d = TestLoad.objects.in_bulk([1])
for val in d.values():
SINK(val.text) # $ MISSING: flow
SINK(val.text) # $ flow="SOURCE, l:-65 -> val.text"
SINK(d[1].text) # $ flow="SOURCE, l:-66 -> d[1].text"

View File

@@ -231,11 +231,11 @@ edges
| hash_extensions.rb:122:5:122:10 | single : Array [element 0] | hash_extensions.rb:125:10:125:15 | single : Array [element 0] | provenance | |
| hash_extensions.rb:122:14:122:26 | call to [] : Array [element 0] | hash_extensions.rb:122:5:122:10 | single : Array [element 0] | provenance | |
| hash_extensions.rb:122:15:122:25 | call to source | hash_extensions.rb:122:14:122:26 | call to [] : Array [element 0] | provenance | |
| hash_extensions.rb:123:5:123:9 | multi : Array [element 0] | hash_extensions.rb:126:10:126:14 | multi : Array [element 0] | provenance | |
| hash_extensions.rb:123:5:123:9 | multi : Array [element 0] | hash_extensions.rb:127:10:127:14 | multi : Array [element 0] | provenance | |
| hash_extensions.rb:123:13:123:38 | call to [] : Array [element 0] | hash_extensions.rb:123:5:123:9 | multi : Array [element 0] | provenance | |
| hash_extensions.rb:123:14:123:24 | call to source | hash_extensions.rb:123:13:123:38 | call to [] : Array [element 0] | provenance | |
| hash_extensions.rb:125:10:125:15 | single : Array [element 0] | hash_extensions.rb:125:10:125:20 | call to sole | provenance | |
| hash_extensions.rb:126:10:126:14 | multi : Array [element 0] | hash_extensions.rb:126:10:126:19 | call to sole | provenance | |
| hash_extensions.rb:127:10:127:14 | multi : Array [element 0] | hash_extensions.rb:127:10:127:19 | call to sole | provenance | |
nodes
| active_support.rb:180:5:180:5 | x : Array [element 0] | semmle.label | x : Array [element 0] |
| active_support.rb:180:9:180:18 | call to [] : Array [element 0] | semmle.label | call to [] : Array [element 0] |
@@ -493,11 +493,10 @@ nodes
| hash_extensions.rb:123:14:123:24 | call to source | semmle.label | call to source |
| hash_extensions.rb:125:10:125:15 | single : Array [element 0] | semmle.label | single : Array [element 0] |
| hash_extensions.rb:125:10:125:20 | call to sole | semmle.label | call to sole |
| hash_extensions.rb:126:10:126:14 | multi : Array [element 0] | semmle.label | multi : Array [element 0] |
| hash_extensions.rb:126:10:126:19 | call to sole | semmle.label | call to sole |
| hash_extensions.rb:127:10:127:14 | multi : Array [element 0] | semmle.label | multi : Array [element 0] |
| hash_extensions.rb:127:10:127:19 | call to sole | semmle.label | call to sole |
subpaths
testFailures
| hash_extensions.rb:126:10:126:19 | call to sole | Unexpected result: hasValueFlow=b |
#select
| active_support.rb:182:10:182:13 | ...[...] | active_support.rb:180:10:180:17 | call to source | active_support.rb:182:10:182:13 | ...[...] | $@ | active_support.rb:180:10:180:17 | call to source | call to source |
| active_support.rb:188:10:188:13 | ...[...] | active_support.rb:186:10:186:18 | call to source | active_support.rb:188:10:188:13 | ...[...] | $@ | active_support.rb:186:10:186:18 | call to source | call to source |
@@ -558,4 +557,4 @@ testFailures
| hash_extensions.rb:115:10:115:39 | ...[...] | hash_extensions.rb:110:21:110:31 | call to source | hash_extensions.rb:115:10:115:39 | ...[...] | $@ | hash_extensions.rb:110:21:110:31 | call to source | call to source |
| hash_extensions.rb:115:10:115:39 | ...[...] | hash_extensions.rb:110:65:110:75 | call to source | hash_extensions.rb:115:10:115:39 | ...[...] | $@ | hash_extensions.rb:110:65:110:75 | call to source | call to source |
| hash_extensions.rb:125:10:125:20 | call to sole | hash_extensions.rb:122:15:122:25 | call to source | hash_extensions.rb:125:10:125:20 | call to sole | $@ | hash_extensions.rb:122:15:122:25 | call to source | call to source |
| hash_extensions.rb:126:10:126:19 | call to sole | hash_extensions.rb:123:14:123:24 | call to source | hash_extensions.rb:126:10:126:19 | call to sole | $@ | hash_extensions.rb:123:14:123:24 | call to source | call to source |
| hash_extensions.rb:127:10:127:19 | call to sole | hash_extensions.rb:123:14:123:24 | call to source | hash_extensions.rb:127:10:127:19 | call to sole | $@ | hash_extensions.rb:123:14:123:24 | call to source | call to source |

View File

@@ -123,7 +123,8 @@ def m_sole
multi = [source("b"), source("c")]
sink(empty.sole)
sink(single.sole) # $ hasValueFlow=a
sink(multi.sole) # TODO: model that 'sole' does not return if the receiver has multiple elements
# TODO: model that 'sole' does not return if the receiver has multiple elements
sink(multi.sole) # $ SPURIOUS: hasValueFlow=b
end
m_sole()

View File

@@ -1,35 +0,0 @@
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
load("@rules_pkg//pkg:mappings.bzl", "pkg_attributes", "pkg_files")
cc_binary(
name = "codeql_canonical_path.dll",
srcs = [
"canonicalize.cpp",
"canonicalize.h",
"canonicalize_jni.cpp",
],
defines = ["CODEQL_CANONICALIZE_EXPORTS"],
linkopts = ["-lkernel32"],
linkshared = True,
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
deps = ["@rules_java//toolchains:jni"],
)
cc_library(
name = "canonicalize",
srcs = ["canonicalize.cpp"],
hdrs = ["canonicalize.h"],
defines = ["CODEQL_CANONICALIZE_EXPORTS"],
linkopts = ["-lkernel32"],
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
)
pkg_files(
name = "pkg",
srcs = [":codeql_canonical_path.dll"],
attributes = pkg_attributes(mode = "0755"),
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
)

View File

@@ -1,165 +0,0 @@
#ifdef _WIN32
#include "canonicalize.h"
#include <windows.h>
#include <string>
#include <unordered_map>
#include <shared_mutex>
#include <random>
namespace {
class PathCache {
public:
static PathCache& instance() {
static PathCache cache;
return cache;
}
const wchar_t* canonicalize(const wchar_t* path) {
std::wstring key(path);
// Fast path: shared (read) lock for cache hit
{
std::shared_lock lock(mutex_);
auto it = cache_.find(key);
if (it != cache_.end()) {
return _wcsdup(it->second.c_str());
}
}
// Slow path: resolve and insert under exclusive lock
std::wstring resolved = resolve(path);
if (resolved.empty()) return nullptr;
std::unique_lock lock(mutex_);
// Check again under exclusive lock (another thread may have inserted)
auto it = cache_.find(key);
if (it != cache_.end()) {
return _wcsdup(it->second.c_str());
}
// Evict a random entry if at capacity (matches C# strategy)
if (cache_.size() >= max_capacity_) {
std::uniform_int_distribution<size_t> dist(0, cache_.size() - 1);
auto evict = cache_.begin();
std::advance(evict, dist(rng_));
cache_.erase(evict);
}
auto inserted = cache_.emplace(std::move(key), std::move(resolved)).first;
return _wcsdup(inserted->second.c_str());
}
private:
PathCache() = default;
static constexpr size_t max_capacity_ = 4096;
std::unordered_map<std::wstring, std::wstring> cache_;
std::shared_mutex mutex_;
std::mt19937 rng_{std::random_device{}()};
static std::wstring resolve(const wchar_t* path) {
HANDLE h = CreateFileW(
path,
0,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
nullptr,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS,
nullptr);
if (h == INVALID_HANDLE_VALUE) {
return resolve_nonexistent(path);
}
std::wstring result = get_final_path(h);
CloseHandle(h);
if (result.empty()) return {};
return strip_prefix(result);
}
static std::wstring get_final_path(HANDLE h) {
wchar_t buf[MAX_PATH];
DWORD len = GetFinalPathNameByHandleW(h, buf, MAX_PATH, FILE_NAME_NORMALIZED);
if (len > 0 && len < MAX_PATH) {
return std::wstring(buf, len);
}
if (len >= MAX_PATH) {
std::wstring big(len + 1, L'\0');
len = GetFinalPathNameByHandleW(h, big.data(), len + 1, FILE_NAME_NORMALIZED);
if (len > 0) return std::wstring(big.data(), len);
}
return {};
}
static std::wstring strip_prefix(const std::wstring& path) {
constexpr std::wstring_view unc_prefix = L"\\\\?\\UNC\\";
constexpr std::wstring_view lp_prefix = L"\\\\?\\";
if (path.starts_with(unc_prefix)) {
return L"\\" + path.substr(unc_prefix.size() - 1);
}
if (path.starts_with(lp_prefix)) {
return std::wstring(path.substr(lp_prefix.size()));
}
return path;
}
// For non-existent files: canonicalize parent, append filename
// (matches C#'s ConstructCanonicalPath)
static std::wstring resolve_nonexistent(const wchar_t* path) {
std::wstring spath(path);
auto sep = spath.find_last_of(L"\\/");
if (sep == std::wstring::npos) return {};
std::wstring parent = spath.substr(0, sep);
std::wstring name = spath.substr(sep + 1);
std::wstring canonical_parent = resolve(parent.c_str());
if (canonical_parent.empty()) return {};
return canonical_parent + L"\\" + name;
}
};
} // namespace
extern "C" {
CODEQL_API const wchar_t* canonicalize_path_w(const wchar_t* path) {
if (!path || !*path) return nullptr;
return PathCache::instance().canonicalize(path);
}
CODEQL_API void canonicalize_free_w(const wchar_t* path) {
free(const_cast<wchar_t*>(path));
}
CODEQL_API const char* canonicalize_path_u8(const char* path) {
if (!path || !*path) return nullptr;
int wlen = MultiByteToWideChar(CP_UTF8, 0, path, -1, nullptr, 0);
if (wlen <= 0) return nullptr;
std::wstring wpath(wlen - 1, L'\0');
MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath.data(), wlen);
const wchar_t* wresult = PathCache::instance().canonicalize(wpath.c_str());
if (!wresult) return nullptr;
int ulen = WideCharToMultiByte(CP_UTF8, 0, wresult, -1, nullptr, 0, nullptr, nullptr);
if (ulen <= 0) { free(const_cast<wchar_t*>(wresult)); return nullptr; }
char* result = static_cast<char*>(malloc(ulen));
WideCharToMultiByte(CP_UTF8, 0, wresult, -1, result, ulen, nullptr, nullptr);
free(const_cast<wchar_t*>(wresult));
return result;
}
CODEQL_API void canonicalize_free_u8(const char* path) {
free(const_cast<char*>(path));
}
} // extern "C"
#endif

View File

@@ -1,31 +0,0 @@
#ifndef CODEQL_CANONICALIZE_H
#define CODEQL_CANONICALIZE_H
#ifdef _WIN32
#ifdef CODEQL_CANONICALIZE_EXPORTS
#define CODEQL_API __declspec(dllexport)
#else
#define CODEQL_API __declspec(dllimport)
#endif
#include <wchar.h>
#ifdef __cplusplus
extern "C" {
#endif
// UTF-16 interface (for JNI / Java / Kotlin)
CODEQL_API const wchar_t* canonicalize_path_w(const wchar_t* path);
CODEQL_API void canonicalize_free_w(const wchar_t* path);
// UTF-8 interface (for Go)
CODEQL_API const char* canonicalize_path_u8(const char* path);
CODEQL_API void canonicalize_free_u8(const char* path);
#ifdef __cplusplus
}
#endif
#endif // _WIN32
#endif // CODEQL_CANONICALIZE_H

View File

@@ -1,25 +0,0 @@
#ifdef _WIN32
#include <jni.h>
#include "canonicalize.h"
extern "C" {
JNIEXPORT jstring JNICALL
Java_com_semmle_util_files_NativeCanonicalizer_nativeCanonicalizePath(
JNIEnv *env, jclass cls, jstring jpath) {
const jchar* path = env->GetStringChars(jpath, nullptr);
const wchar_t* result = canonicalize_path_w(reinterpret_cast<const wchar_t*>(path));
env->ReleaseStringChars(jpath, path);
if (result == nullptr) return nullptr;
jstring jresult = env->NewString(
reinterpret_cast<const jchar*>(result),
static_cast<jsize>(wcslen(result)));
canonicalize_free_w(result);
return jresult;
}
} // extern "C"
#endif