Compare commits

..

8 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
57083e7f40 Load canonical-path DLL from CODEQL_EXTRACTOR_GO_ROOT, not CODEQL_DIST 2026-07-01 13:55:18 +00:00
copilot-swe-agent[bot]
f7f537631c Replace non-ASCII em-dash in NativeCanonicalizer.java comment 2026-07-01 13:40:08 +00:00
copilot-swe-agent[bot]
bfa81c30f4 Fix bazel formatting and non-windows build of canonicalize-dll 2026-07-01 13:23:21 +00:00
copilot-swe-agent[bot]
c807858312 Revert remaining go/ql/test changes 2026-07-01 13:12:12 +00:00
copilot-swe-agent[bot]
1a12cd68ec Revert integration-test changes 2026-07-01 13:07:24 +00:00
copilot-swe-agent[bot]
17cbc4990d Use std::mt19937 instead of std::rand for cache eviction
Replace std::rand() with std::mt19937 seeded by std::random_device for
less predictable cache eviction. Also use .first from emplace() instead
of structured binding with ignored second element.
2026-07-01 13:02:00 +00:00
copilot-swe-agent[bot]
77c962b4d9 Add shared canonicalize library and Go/Kotlin integrations
- Add shared/canonicalize/ C++ DLL that resolves Windows paths via
  GetFinalPathNameByHandleW with a full-path cache (matching C#'s
  CanonicalPathCache strategy).
- Add JNI interface (canonicalize_jni.cpp) for Kotlin/Java consumers.
- Add NativeCanonicalizer.java to kotlin-extractor for loading the DLL
  and exposing a resolve() method.
- Call NativeCanonicalizer.resolve() in FileUtil.tryMakeCanonical().
- Add go/extractor/canonicalize package (Windows + non-Windows impls).
- Use canonicalize.CanonicalizePath() in go extractor normalizedPath().
- Update Bazel build files for all new targets and deps.
2026-07-01 12:58:02 +00:00
copilot-swe-agent[bot]
1e33a81d6b Initial plan 2026-07-01 12:52:11 +00:00
22 changed files with 475 additions and 50 deletions

View File

@@ -56,9 +56,19 @@ codeql_pkg_files(
prefix = "tools/{CODEQL_PLATFORM}",
)
codeql_pkg_files(
name = "canonicalize-dll",
srcs = select({
"@platforms//os:windows": ["//shared/canonicalize:pkg"],
"//conditions:default": [],
}),
prefix = "tools/{CODEQL_PLATFORM}",
)
codeql_pack(
name = "go",
srcs = [
":canonicalize-dll",
":extractor-pack-arch",
":resources",
"//go/codeql-tools",

View File

@@ -16,6 +16,7 @@ go_library(
importpath = "github.com/github/codeql-go/extractor",
visibility = ["//visibility:public"],
deps = [
"//go/extractor/canonicalize",
"//go/extractor/dbscheme",
"//go/extractor/diagnostics",
"//go/extractor/srcarchive",

11
go/extractor/canonicalize/BUILD.bazel generated Normal file
View File

@@ -0,0 +1,11 @@
load("@rules_go//go:def.bzl", "go_library")
go_library(
name = "canonicalize",
srcs = [
"canonicalize_other.go",
"canonicalize_windows.go",
],
importpath = "github.com/github/codeql-go/extractor/canonicalize",
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,5 @@
//go:build !windows
package canonicalize
func CanonicalizePath(path string) string { return path }

View File

@@ -0,0 +1,65 @@
//go:build windows
package canonicalize
import (
"os"
"path/filepath"
"syscall"
"unsafe"
)
var (
dll *syscall.DLL
procCanonicalize *syscall.Proc
procFree *syscall.Proc
available bool
)
func init() {
root := os.Getenv("CODEQL_EXTRACTOR_GO_ROOT")
if root == "" {
return
}
dllPath := filepath.Join(root, "tools", "win64", "codeql_canonical_path.dll")
d, err := syscall.LoadDLL(dllPath)
if err != nil {
return
}
p, err := d.FindProc("canonicalize_path_u8")
if err != nil {
return
}
f, _ := d.FindProc("canonicalize_free_u8")
dll = d
procCanonicalize = p
procFree = f
available = true
}
func CanonicalizePath(path string) string {
if !available {
return path
}
pathBytes := append([]byte(path), 0)
ret, _, _ := procCanonicalize.Call(uintptr(unsafe.Pointer(&pathBytes[0])))
if ret == 0 {
return path
}
result := bytePtrToString((*byte)(unsafe.Pointer(ret)))
if procFree != nil {
procFree.Call(ret)
}
return result
}
func bytePtrToString(p *byte) string {
if p == nil {
return ""
}
var n int
for ptr := unsafe.Pointer(p); *(*byte)(ptr) != 0; n++ {
ptr = unsafe.Add(ptr, 1)
}
return string(unsafe.Slice(p, n))
}

View File

@@ -22,6 +22,7 @@ import (
"sync"
"time"
"github.com/github/codeql-go/extractor/canonicalize"
"github.com/github/codeql-go/extractor/dbscheme"
"github.com/github/codeql-go/extractor/diagnostics"
"github.com/github/codeql-go/extractor/srcarchive"
@@ -766,7 +767,7 @@ func normalizedPath(ast *ast.File, fset *token.FileSet) string {
if err != nil {
return file
}
return path
return canonicalize.CanonicalizePath(path)
}
// extractFile extracts AST information for the given file

View File

@@ -1242,11 +1242,11 @@ public class FileUtil
public static File tryMakeCanonical (File f)
{
try {
return f.getCanonicalFile();
return NativeCanonicalizer.resolve(f.getCanonicalFile());
}
catch (IOException ignored) {
Exceptions.ignore(ignored, "Can't log error: Could be too verbose.");
return new File(simplifyPath(f));
return NativeCanonicalizer.resolve(new File(simplifyPath(f)));
}
}

View File

@@ -0,0 +1,41 @@
package com.semmle.util.files;
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
public class NativeCanonicalizer {
private static final boolean available;
static {
boolean loaded = false;
if (File.separatorChar == '\\') {
String dist = System.getenv("CODEQL_DIST");
if (dist != null && !dist.isEmpty()) {
try {
Path library = Paths.get(dist).resolve("tools").resolve("win64")
.resolve("codeql_canonical_path.dll").toAbsolutePath();
System.load(library.toString());
loaded = true;
} catch (RuntimeException | UnsatisfiedLinkError ignored) {
}
}
}
available = loaded;
}
private NativeCanonicalizer() {}
// UTF-16 JNI interface - no encoding conversion
private static native String nativeCanonicalizePath(String path);
public static File resolve(File path) {
if (!available) return path;
String result = nativeCanonicalizePath(path.getAbsolutePath());
return result != null ? new File(result) : path;
}
public static boolean isAvailable() {
return available;
}
}

View File

@@ -1,10 +1,4 @@
extensions:
- addsTo:
pack: codeql/java-all
extensible: summaryModel
data:
- ["org.apache.hc.client5.http.protocol", "RedirectLocations", True, "add", "(URI)", "", "Argument[0]", "Argument[this].Element", "value", "hq-manual"]
- addsTo:
pack: codeql/java-all
extensible: neutralModel

View File

@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/java-all
extensible: summaryModel
data:
- ["org.apache.hc.client5.http.protocol", "RedirectLocations", True, "add", "(URI)", "", "Argument[0]", "Argument[this].Element", "value", "hq-manual"]

View File

@@ -1138,9 +1138,7 @@ predicate clearsContent(Node n, ContentSet cs) {
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
*/
predicate expectsContent(Node n, ContentSet c) {
FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c)
}
predicate expectsContent(Node n, ContentSet c) { none() }
/**
* Holds if values stored inside attribute `c` are cleared at node `n`.

View File

@@ -91,8 +91,6 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = ""
}
string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) }
bindingset[token]
ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) {
// needed to support `Argument[x..y]` ranges

View File

@@ -4199,9 +4199,11 @@ module StdlibPrivate {
// The positional argument contains a mapping.
// TODO: these values can be overwritten by keyword arguments
// - dict mapping
input = "Argument[0].WithAnyDictionaryElement" and
output = "ReturnValue" and
preservesValue = true
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
// - list-of-pairs mapping
input = "Argument[0].ListElement.TupleElement[1]" and
@@ -4238,7 +4240,9 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
input = "Argument[0].AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
// Element content is mutated into list element content
@@ -4262,9 +4266,11 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0].WithAnyTupleElement" and
output = "ReturnValue" and
preservesValue = true
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]" and
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
preservesValue = true
)
or
input = "Argument[0].ListElement" and
output = "ReturnValue" and
@@ -4288,7 +4294,9 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
input = "Argument[0].AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.SetElement" and
@@ -4334,7 +4342,9 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
input = "Argument[0].AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
@@ -4362,7 +4372,9 @@ module StdlibPrivate {
or
content = "SetElement"
or
content = "AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
content = "TupleElement[" + i.toString() + "]"
)
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
input = "Argument[0]." + content and
@@ -4392,7 +4404,9 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
input = "Argument[0].AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
@@ -4420,7 +4434,9 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
input = "Argument[0].AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue" and
@@ -4452,7 +4468,9 @@ module StdlibPrivate {
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
// TODO: Once we have TupleElementAny, this generality can be increased.
i = 0 and
input = "Argument[1].AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
input = "Argument[1].TupleElement[" + j.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "Argument[0].Parameter[" + i.toString() + "]" and
@@ -4481,7 +4499,9 @@ module StdlibPrivate {
or
input = "Argument[1].SetElement"
or
input = "Argument[1].AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[1].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
@@ -4505,7 +4525,9 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
input = "Argument[0].AnyTupleElement"
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement.TupleElement[1]" and
@@ -4530,7 +4552,12 @@ module StdlibPrivate {
or
input = "Argument[" + i.toString() + "].SetElement"
or
input = "Argument[" + i.toString() + "].AnyTupleElement"
// We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance.
// TODO: Once we have TupleElementAny, this generality can be increased.
i in [0 .. 1] and
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
@@ -4553,6 +4580,12 @@ module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::Content c |
input = "Argument[self]." + c.getMaDRepresentation() and
output = "ReturnValue." + c.getMaDRepresentation() and
preservesValue = true
)
or
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = true
@@ -4708,10 +4741,12 @@ module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.TupleElement[1]" and
preservesValue = true
// TODO: put `key` into "ReturnValue.TupleElement[0]"
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.TupleElement[1]" and
preservesValue = true
// TODO: put `key` into "ReturnValue.TupleElement[0]"
)
}
}
@@ -4790,9 +4825,11 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.ListElement" and
preservesValue = true
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement" and
preservesValue = true
)
or
input = "Argument[self]" and
output = "ReturnValue" and
@@ -4839,9 +4876,11 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.ListElement.TupleElement[1]" and
preservesValue = true
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement.TupleElement[1]" and
preservesValue = true
)
or
// TODO: Add the keys to output list
input = "Argument[self]" and

View File

@@ -589,11 +589,11 @@ def test_zip_tuple():
SINK(z[0][0]) # $ flow="SOURCE, l:-7 -> z[0][0]"
SINK(z[0][1]) # $ flow="SOURCE, l:-7 -> z[0][1]"
SINK_F(z[0][2]) # $ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]"
SINK_F(z[0][2])
SINK_F(z[0][3])
SINK(z[1][0]) # $ flow="SOURCE, l:-11 -> z[1][0]"
SINK_F(z[1][1]) # $ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
SINK(z[1][2]) # $ flow="SOURCE, l:-11 -> z[1][2]"
SINK(z[1][2]) # $ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance.
SINK_F(z[1][3])
@expects(4)

View File

@@ -362,7 +362,7 @@ def test_load_in_bulk():
# see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#in-bulk
d = TestLoad.objects.in_bulk([1])
for val in d.values():
SINK(val.text) # $ flow="SOURCE, l:-65 -> val.text"
SINK(val.text) # $ MISSING: flow
SINK(d[1].text) # $ flow="SOURCE, l:-66 -> d[1].text"

View File

@@ -231,11 +231,11 @@ edges
| hash_extensions.rb:122:5:122:10 | single : Array [element 0] | hash_extensions.rb:125:10:125:15 | single : Array [element 0] | provenance | |
| hash_extensions.rb:122:14:122:26 | call to [] : Array [element 0] | hash_extensions.rb:122:5:122:10 | single : Array [element 0] | provenance | |
| hash_extensions.rb:122:15:122:25 | call to source | hash_extensions.rb:122:14:122:26 | call to [] : Array [element 0] | provenance | |
| hash_extensions.rb:123:5:123:9 | multi : Array [element 0] | hash_extensions.rb:127:10:127:14 | multi : Array [element 0] | provenance | |
| hash_extensions.rb:123:5:123:9 | multi : Array [element 0] | hash_extensions.rb:126:10:126:14 | multi : Array [element 0] | provenance | |
| hash_extensions.rb:123:13:123:38 | call to [] : Array [element 0] | hash_extensions.rb:123:5:123:9 | multi : Array [element 0] | provenance | |
| hash_extensions.rb:123:14:123:24 | call to source | hash_extensions.rb:123:13:123:38 | call to [] : Array [element 0] | provenance | |
| hash_extensions.rb:125:10:125:15 | single : Array [element 0] | hash_extensions.rb:125:10:125:20 | call to sole | provenance | |
| hash_extensions.rb:127:10:127:14 | multi : Array [element 0] | hash_extensions.rb:127:10:127:19 | call to sole | provenance | |
| hash_extensions.rb:126:10:126:14 | multi : Array [element 0] | hash_extensions.rb:126:10:126:19 | call to sole | provenance | |
nodes
| active_support.rb:180:5:180:5 | x : Array [element 0] | semmle.label | x : Array [element 0] |
| active_support.rb:180:9:180:18 | call to [] : Array [element 0] | semmle.label | call to [] : Array [element 0] |
@@ -493,10 +493,11 @@ nodes
| hash_extensions.rb:123:14:123:24 | call to source | semmle.label | call to source |
| hash_extensions.rb:125:10:125:15 | single : Array [element 0] | semmle.label | single : Array [element 0] |
| hash_extensions.rb:125:10:125:20 | call to sole | semmle.label | call to sole |
| hash_extensions.rb:127:10:127:14 | multi : Array [element 0] | semmle.label | multi : Array [element 0] |
| hash_extensions.rb:127:10:127:19 | call to sole | semmle.label | call to sole |
| hash_extensions.rb:126:10:126:14 | multi : Array [element 0] | semmle.label | multi : Array [element 0] |
| hash_extensions.rb:126:10:126:19 | call to sole | semmle.label | call to sole |
subpaths
testFailures
| hash_extensions.rb:126:10:126:19 | call to sole | Unexpected result: hasValueFlow=b |
#select
| active_support.rb:182:10:182:13 | ...[...] | active_support.rb:180:10:180:17 | call to source | active_support.rb:182:10:182:13 | ...[...] | $@ | active_support.rb:180:10:180:17 | call to source | call to source |
| active_support.rb:188:10:188:13 | ...[...] | active_support.rb:186:10:186:18 | call to source | active_support.rb:188:10:188:13 | ...[...] | $@ | active_support.rb:186:10:186:18 | call to source | call to source |
@@ -557,4 +558,4 @@ testFailures
| hash_extensions.rb:115:10:115:39 | ...[...] | hash_extensions.rb:110:21:110:31 | call to source | hash_extensions.rb:115:10:115:39 | ...[...] | $@ | hash_extensions.rb:110:21:110:31 | call to source | call to source |
| hash_extensions.rb:115:10:115:39 | ...[...] | hash_extensions.rb:110:65:110:75 | call to source | hash_extensions.rb:115:10:115:39 | ...[...] | $@ | hash_extensions.rb:110:65:110:75 | call to source | call to source |
| hash_extensions.rb:125:10:125:20 | call to sole | hash_extensions.rb:122:15:122:25 | call to source | hash_extensions.rb:125:10:125:20 | call to sole | $@ | hash_extensions.rb:122:15:122:25 | call to source | call to source |
| hash_extensions.rb:127:10:127:19 | call to sole | hash_extensions.rb:123:14:123:24 | call to source | hash_extensions.rb:127:10:127:19 | call to sole | $@ | hash_extensions.rb:123:14:123:24 | call to source | call to source |
| hash_extensions.rb:126:10:126:19 | call to sole | hash_extensions.rb:123:14:123:24 | call to source | hash_extensions.rb:126:10:126:19 | call to sole | $@ | hash_extensions.rb:123:14:123:24 | call to source | call to source |

View File

@@ -123,8 +123,7 @@ def m_sole
multi = [source("b"), source("c")]
sink(empty.sole)
sink(single.sole) # $ hasValueFlow=a
# TODO: model that 'sole' does not return if the receiver has multiple elements
sink(multi.sole) # $ SPURIOUS: hasValueFlow=b
sink(multi.sole) # TODO: model that 'sole' does not return if the receiver has multiple elements
end
m_sole()

View File

@@ -0,0 +1,35 @@
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
load("@rules_pkg//pkg:mappings.bzl", "pkg_attributes", "pkg_files")
cc_binary(
name = "codeql_canonical_path.dll",
srcs = [
"canonicalize.cpp",
"canonicalize.h",
"canonicalize_jni.cpp",
],
defines = ["CODEQL_CANONICALIZE_EXPORTS"],
linkopts = ["-lkernel32"],
linkshared = True,
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
deps = ["@rules_java//toolchains:jni"],
)
cc_library(
name = "canonicalize",
srcs = ["canonicalize.cpp"],
hdrs = ["canonicalize.h"],
defines = ["CODEQL_CANONICALIZE_EXPORTS"],
linkopts = ["-lkernel32"],
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
)
pkg_files(
name = "pkg",
srcs = [":codeql_canonical_path.dll"],
attributes = pkg_attributes(mode = "0755"),
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,165 @@
#ifdef _WIN32
#include "canonicalize.h"
#include <windows.h>
#include <string>
#include <unordered_map>
#include <shared_mutex>
#include <random>
namespace {
class PathCache {
public:
static PathCache& instance() {
static PathCache cache;
return cache;
}
const wchar_t* canonicalize(const wchar_t* path) {
std::wstring key(path);
// Fast path: shared (read) lock for cache hit
{
std::shared_lock lock(mutex_);
auto it = cache_.find(key);
if (it != cache_.end()) {
return _wcsdup(it->second.c_str());
}
}
// Slow path: resolve and insert under exclusive lock
std::wstring resolved = resolve(path);
if (resolved.empty()) return nullptr;
std::unique_lock lock(mutex_);
// Check again under exclusive lock (another thread may have inserted)
auto it = cache_.find(key);
if (it != cache_.end()) {
return _wcsdup(it->second.c_str());
}
// Evict a random entry if at capacity (matches C# strategy)
if (cache_.size() >= max_capacity_) {
std::uniform_int_distribution<size_t> dist(0, cache_.size() - 1);
auto evict = cache_.begin();
std::advance(evict, dist(rng_));
cache_.erase(evict);
}
auto inserted = cache_.emplace(std::move(key), std::move(resolved)).first;
return _wcsdup(inserted->second.c_str());
}
private:
PathCache() = default;
static constexpr size_t max_capacity_ = 4096;
std::unordered_map<std::wstring, std::wstring> cache_;
std::shared_mutex mutex_;
std::mt19937 rng_{std::random_device{}()};
static std::wstring resolve(const wchar_t* path) {
HANDLE h = CreateFileW(
path,
0,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
nullptr,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS,
nullptr);
if (h == INVALID_HANDLE_VALUE) {
return resolve_nonexistent(path);
}
std::wstring result = get_final_path(h);
CloseHandle(h);
if (result.empty()) return {};
return strip_prefix(result);
}
static std::wstring get_final_path(HANDLE h) {
wchar_t buf[MAX_PATH];
DWORD len = GetFinalPathNameByHandleW(h, buf, MAX_PATH, FILE_NAME_NORMALIZED);
if (len > 0 && len < MAX_PATH) {
return std::wstring(buf, len);
}
if (len >= MAX_PATH) {
std::wstring big(len + 1, L'\0');
len = GetFinalPathNameByHandleW(h, big.data(), len + 1, FILE_NAME_NORMALIZED);
if (len > 0) return std::wstring(big.data(), len);
}
return {};
}
static std::wstring strip_prefix(const std::wstring& path) {
constexpr std::wstring_view unc_prefix = L"\\\\?\\UNC\\";
constexpr std::wstring_view lp_prefix = L"\\\\?\\";
if (path.starts_with(unc_prefix)) {
return L"\\" + path.substr(unc_prefix.size() - 1);
}
if (path.starts_with(lp_prefix)) {
return std::wstring(path.substr(lp_prefix.size()));
}
return path;
}
// For non-existent files: canonicalize parent, append filename
// (matches C#'s ConstructCanonicalPath)
static std::wstring resolve_nonexistent(const wchar_t* path) {
std::wstring spath(path);
auto sep = spath.find_last_of(L"\\/");
if (sep == std::wstring::npos) return {};
std::wstring parent = spath.substr(0, sep);
std::wstring name = spath.substr(sep + 1);
std::wstring canonical_parent = resolve(parent.c_str());
if (canonical_parent.empty()) return {};
return canonical_parent + L"\\" + name;
}
};
} // namespace
extern "C" {
CODEQL_API const wchar_t* canonicalize_path_w(const wchar_t* path) {
if (!path || !*path) return nullptr;
return PathCache::instance().canonicalize(path);
}
CODEQL_API void canonicalize_free_w(const wchar_t* path) {
free(const_cast<wchar_t*>(path));
}
CODEQL_API const char* canonicalize_path_u8(const char* path) {
if (!path || !*path) return nullptr;
int wlen = MultiByteToWideChar(CP_UTF8, 0, path, -1, nullptr, 0);
if (wlen <= 0) return nullptr;
std::wstring wpath(wlen - 1, L'\0');
MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath.data(), wlen);
const wchar_t* wresult = PathCache::instance().canonicalize(wpath.c_str());
if (!wresult) return nullptr;
int ulen = WideCharToMultiByte(CP_UTF8, 0, wresult, -1, nullptr, 0, nullptr, nullptr);
if (ulen <= 0) { free(const_cast<wchar_t*>(wresult)); return nullptr; }
char* result = static_cast<char*>(malloc(ulen));
WideCharToMultiByte(CP_UTF8, 0, wresult, -1, result, ulen, nullptr, nullptr);
free(const_cast<wchar_t*>(wresult));
return result;
}
CODEQL_API void canonicalize_free_u8(const char* path) {
free(const_cast<char*>(path));
}
} // extern "C"
#endif

View File

@@ -0,0 +1,31 @@
#ifndef CODEQL_CANONICALIZE_H
#define CODEQL_CANONICALIZE_H
#ifdef _WIN32
#ifdef CODEQL_CANONICALIZE_EXPORTS
#define CODEQL_API __declspec(dllexport)
#else
#define CODEQL_API __declspec(dllimport)
#endif
#include <wchar.h>
#ifdef __cplusplus
extern "C" {
#endif
// UTF-16 interface (for JNI / Java / Kotlin)
CODEQL_API const wchar_t* canonicalize_path_w(const wchar_t* path);
CODEQL_API void canonicalize_free_w(const wchar_t* path);
// UTF-8 interface (for Go)
CODEQL_API const char* canonicalize_path_u8(const char* path);
CODEQL_API void canonicalize_free_u8(const char* path);
#ifdef __cplusplus
}
#endif
#endif // _WIN32
#endif // CODEQL_CANONICALIZE_H

View File

@@ -0,0 +1,25 @@
#ifdef _WIN32
#include <jni.h>
#include "canonicalize.h"
extern "C" {
JNIEXPORT jstring JNICALL
Java_com_semmle_util_files_NativeCanonicalizer_nativeCanonicalizePath(
JNIEnv *env, jclass cls, jstring jpath) {
const jchar* path = env->GetStringChars(jpath, nullptr);
const wchar_t* result = canonicalize_path_w(reinterpret_cast<const wchar_t*>(path));
env->ReleaseStringChars(jpath, path);
if (result == nullptr) return nullptr;
jstring jresult = env->NewString(
reinterpret_cast<const jchar*>(result),
static_cast<jsize>(wcslen(result)));
canonicalize_free_w(result);
return jresult;
}
} // extern "C"
#endif