From fe1baf7dc1884b25cb5adc128beae2d59165f2cb Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Wed, 30 Jul 2025 14:37:54 -0700 Subject: [PATCH] wip --- README.org | 78 +++++++++ codeql-bundling/README.org | 4 +- codeql-dataflow-sql-injection/README.org | 127 +++++++++++++- ...incoming.codeql-customizations-workshop.md | 164 ++++++++++++++++++ codeql-sqlite/README.org | 2 +- 5 files changed, 369 insertions(+), 6 deletions(-) create mode 100644 codeql-dataflow-sql-injection/incoming.codeql-customizations-workshop.md diff --git a/README.org b/README.org index 0476cd2..a1d9074 100644 --- a/README.org +++ b/README.org @@ -66,6 +66,84 @@ ** Additional Structure Notes - The original upstream README.md is preserved at [[./README-vscode-codeql-starter.md]] +* Possible Reading Orders + +** Data Flow +*** Debugging data flow config (instead of taint flow), Java + We can illustrate taint-flow debugging in the Java SQL injection sample + - [[./codeql-sqlite/TaintFlowDebugging.ql]] + - [[./codeql-sqlite/TaintFlowDebugging.md]] + +*** Debugging data flow config (instead of taint flow), C + +** Modeling +*** Review: SQLite Injection Workshop, Java + - Recap the Java-based injection example. + +*** Customizations via codeql, java + - codeql-dataflow-sql-injection/README.org, [[file:codeql-dataflow-sql-injection/README.org::*supplement codeql: Add to FlowSource or a subclass][supplement codeql: Add to FlowSource or a subclass]] + - TODO raw md from staging: codeql-dataflow-sql-injection/incoming.codeql-customizations-workshop.md + +*** Model Editor: Simplest Case, Java + - Extend the Java example using the model editor. + - Explain how "models-as-data" works under the hood. + - customizations using models-as-data, via model editor + - editor as illustration tool + - customizations using models-as-data, via text + - continue with codeql-dataflow-sql-injection + - [[file:codeql-dataflow-sql-injection/README.org::*supplement codeql: Add to models-as-data][supplement codeql: Add to models-as-data]] + +*** Jedis Example: Scale Demonstration, Java + - Use Jedis (Java Redis client) to show modeling at scale. + - Emphasize quantity; CodeQL logic is unchanged from #2. + +*** TODO Review: SQLite Injection Workshop (C) + - C++ version of the workshop. + +*** TODO (Optional) Extending Queries with Customizations.qll + - Supported in most languages, but not C++ by default. + - Can be enabled by building a custom CodeQL bundle. + - Use this CLI tool: https://github.com/advanced-security/codeql-bundle + + - USE language in name + + - Demonstrate using `codeql-lab`. + + in [[./codeql-sqlite/README.org]] + + ql/cpp/ql/lib/semmle/code/cpp/security/FlowSources.qll + #+BEGIN_SRC text + abstract class FlowSource extends DataFlow::Node + #+END_SRC + + + The other languages include Customizations.qll via , e.g., + ql/python/ql/lib/python.qll + 1. Modify + : ql/python/ql/lib/python.qll + 2. Add + : ql/python/ql/lib/Customizations.qll + + + For C/C++, + 1. Modify + : ql/cpp/ql/lib/cpp.qll + 2. Add + : ql/cpp/ql/lib/Customizations.qll + +*** TODO Use models-as-data QL code directly (no graphical editor). + + + + - The model definition files exist + - Data files exist + - There is no editor + - Generate YAML manually. + - customizations using models-as-data, via text + - continue with codeql-dataflow-sql-injection + - The ./ql/cpp/ql/src/Security/CWE/CWE-089/SqlTainted.ql query works out of + the box + - Add =char* get_user_info()= as extra source for illustration + + +** TODO codeql-bundling + * Tool Setup Some scripts are used here, found in [[./bin/]]. To ensure the ones written in Python have access to prerequites, set up a virtual environment via diff --git a/codeql-bundling/README.org b/codeql-bundling/README.org index 9113b8e..cd5e505 100644 --- a/codeql-bundling/README.org +++ b/codeql-bundling/README.org @@ -207,9 +207,7 @@ # {"name": "kind","type": "string"}, # {"name": "provenance","type": "string"} - - - - Check the Customizations.qll files, for extending /existing/ queries via + - <> Check the Customizations.qll files, for extending /existing/ queries via /custom/ codeql. Note that there isn't one for C++, but it can be added. #+BEGIN_SRC sh cd ~/codeql-lab/tmp.bundle/codeql/qlpacks/codeql && find * -name Customizations.qll diff --git a/codeql-dataflow-sql-injection/README.org b/codeql-dataflow-sql-injection/README.org index 7634fd5..192aeac 100644 --- a/codeql-dataflow-sql-injection/README.org +++ b/codeql-dataflow-sql-injection/README.org @@ -44,6 +44,7 @@ #+END_SRC and others. + XX: no java, use C In [[../ql/java/ql/lib/Customizations.qll]] notice the comments mentioning RemoteFlowSource. @@ -81,10 +82,132 @@ RemoteFlowSource, not ActiveThreatModelSource ** TODO supplement codeql: Add to models-as-data - - schema in codeql: [[../ql/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll]] + - schema in codeql: [[../ql/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll]] + #+BEGIN_SRC java + extensible predicate sourceModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, QlBuiltins::ExtensionId madId + ); + #+END_SRC + + - schema in json: ../tmp.bundle/codeql/qlpacks/codeql/cpp-queries/1.3.0/.codeql/libraries/codeql/cpp-all/3.0.0/.packinfo + #+BEGIN_SRC sh + ../bin/hovjson < ../tmp.bundle/codeql/qlpacks/codeql/cpp-queries/1.3.0/.codeql/libraries/codeql/cpp-all/3.0.0/.packinfo + { + "extensible_predicate_metadata": { + "extensible_predicates": [ + { + "name": "sourceModel", + "parameters": [ + {"name": "namespace","type": "string"}, + {"name": "type","type": "string"}, + {"name": "subtypes","type": "boolean"}, + {"name": "name","type": "string"}, + {"name": "signature","type": "string"}, + {"name": "ext","type": "string"}, + {"name": "output","type": "string"}, + {"name": "kind","type": "string"}, + {"name": "provenance","type": "string"} + ], + "has_origin": true, + "path": "semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll", + "start_line": 8, + "start_column": 1, + "end_line": 11, + "end_column": 3 + }, + .... + ] + } + } - - data sample: [[../.github/codeql/extensions/jedis-db-local-java/models/redis.clients.jedis.model.yml]] + #+END_SRC + - note: QlBuiltins::ExtensionId madId is only in ql, not json. + + - file format sample: ../ql/cpp/ql/lib/ext/empty.model.yml + + - data sample: + #+begin_src javascript + # partial model of windows system calls + extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance + # processenv.h + - ["", "", False, "GetCommandLineA", "", "", "ReturnValue[*]", "local", "manual"] + #+end_src + + - add a =sourceModel= + #+BEGIN_SRC yaml + extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: + - [ + "", + "", + False, + "get_user_info", + "", + "", + "ReturnValue[*]", + "remote", + "manual", + ] + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: [] + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: [] + #+END_SRC + #+BEGIN_SRC sh + 0:$ ls .github/codeql/extensions/ + jedis-db-local-java/ sqlite-db/ + (venv) + hohn@ghm3 ~/work-gh/codeql-lab + 0:$ cp -r .github/codeql/extensions/sqlite-db .github/codeql/extensions/sqlite-db-c + + pushd .github/codeql/extensions/sqlite-db-c + + sed -i -e 's/java-all/cpp-all/g;' codeql-pack.yml + # TODO also replace pack name + + 0:$ cat > models/sqlite.model.yml + extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: + - [ + "", + "", + False, + "get_user_info", + "", + "", + "ReturnValue[*]", + "remote", + "manual", + ] + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: [] + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: [] + #+END_SRC + + - back to SqlTainted.ql + - + - In the model editor, we see a java.io.*Console.*readline' (using =show already modeled= option) #+BEGIN_SRC sh 1:$ rg -i 'java.io.*Console.*readline' ql/java diff --git a/codeql-dataflow-sql-injection/incoming.codeql-customizations-workshop.md b/codeql-dataflow-sql-injection/incoming.codeql-customizations-workshop.md new file mode 100644 index 0000000..df4255d --- /dev/null +++ b/codeql-dataflow-sql-injection/incoming.codeql-customizations-workshop.md @@ -0,0 +1,164 @@ +# Adding to Customizations example + +## Setup and sample run + +The **prerequisite** for this workshop is the `java/codeql-dataflow-sql-injection/` also located in this repository, which centers around detecting a potential SQL Injection vulnerability in a small Java database interaction application. + +Now that we have used the query developed in the previous workshop, lets see if there is a pre-existing query that can detect the same vulnerability. + +Navigate to the `SQLTainted.ql` query and run it. + +To find that file locally use one of the following: + + 1) If you are using a [CodeQL bundle](https://github.com/github/codeql-action/releases), this can be found via a search like: +`find -name "SQLTainted.ql"`. + + 2) If you are using the [installed packs](https://github.com/orgs/codeql/packages/container/package/java-all) (obtained via Install Pack Dependencies), then the location of the query will be under `~/.codeql/packages/codeql/java-all/` or `C:\Users\\.codeql\packages\codeql\java-all\` + +## Identify the problem + +Determine if the query detects the following source and sink (again from the previous workshop) using the *Quick Evaluation* feature in the editor: + +source: +``` +System.console().readLine(); +``` +sink: +``` +conn.createStatement().executeUpdate(query); +``` + +## Investigate the Implementation + +Its time to look at the query file and libraries responsible for the implementation. Use the *Go to Definition* feature of the editor to investigate the `QueryInjectionSink` class used in the query and the `queryTaintedBy` predicate. + +Also look at the definition of the `RemoteFlowSource` class and take this time to discuss [*Abstract* classes](https://codeql.github.com/docs/ql-language-reference/types/#abstract-classes). + +Take some time to investigate the differences between *abstract* and *nonabstract* classes using a generic example: +``` +abstract class A extends string { + A() { this = ["A", "B", "C"] } + } + + class B extends A { B() { this = "B" } } + + class C extends A { C() { this = "C" } } + +from A a +select a +``` +versus: +``` +class A extends string { + A() { this = ["A", "B", "C"] } + } + + class B extends A { B() { this = "B" } } + + class C extends A { C() { this = "C" } } + +from A a +select a +``` +(attribution: this example was created by @smowton) + +## Add to the Implementation + +Now that we understand the reason that `SQLTainted.ql` does not detect the potential SQL Injection vulnerability (it does not model the source), we will add to the `Customizations.qll` file which acts as a query extension interface. This will allow `SQLTainted.ql` to detect the vulnerability. + +First determine which import will be required to access the abstract class that we will need to extend: + +``` +private import semmle.code.java.dataflow.FlowSources +``` + +Then add a custom class that models the +`System.console().readLine()` call: + +``` +class ReadLineFlowSource extends RemoteFlowSource { + ReadLineFlowSource() { + exists(MethodAccess read | + read.getCallee().hasName("readLine") and + this.asExpr() = read + ) + } + + override string getSourceType() { result = "readLine source" } + } +``` + +Now when we run `SQLTainted.ql` we will detect the same vulnerability that is detected by the end of the `java/codeql-dataflow-sql-injection/` workshop. + +## Additional practice + +Now we can also see what it would be like to add an additional sink to the `Customizations.qll` file. While the following doesn't apply for the particular rule `SQLTainted.ql`, we can just use this as an exercise for practice. + +We will now take the time to add a model for the `System.err.printf("Sent: %s", query)` call, as a sink in the application. + +``` +import semmle.code.java.security.QueryInjection + + class PrintfSink extends QueryInjectionSink { + PrintfSink(){ + exists(MethodAccess printf | + printf.getCallee().hasName("printf") + and this.asExpr() = printf.getAnArgument() + ) + } + } +``` + +We should now get 2 `path-problem` results when we run `SQLTainted.ql` and we should be familiar with adding custom sources and sinks to `Customizations.qll` to extend the pre-existing queries. + +# Model Editor Alternative Technique + +[CodeQL Model Editor](https://docs.github.com/en/code-security/codeql-for-vs-code/using-the-advanced-functionality-of-the-codeql-for-vs-code-extension/using-the-codeql-model-editor) can be used when an out of the box CodeQL needs a customization. Currently (as of June 2024) supported customizations via the Model Editor are sources and sinks. The Model Editor will generate [CodeQL model packs](https://docs.github.com/en/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-codeql-model-pack) which can get added to any analysis at scan runtime. + +## Setup the example for `readLine` + +As a sample, we want to add the `Console.io.readLine` call to the `RemoteFlowSource` concept, like we did above, and get it to be picked up in the out of the box scans. To accomplish this, **clear any previous changes** in the `Customizations.qll` file to get a fresh start on no customization currently added to `RemoteFlowSource`. + +This should reset the results of the `SqlTainted.ql` query to return nothing. + +Then double check if there are any out of the box models for `Console.io.readLine` already exist (as of June 2024 [there are](https://github.com/github/codeql/blob/main/java/ql/lib/ext/generated/java.io.model.ym). We want to temporarily remove those **just for demonstration purposes**. +To do that check: + + 1) If you are using a [CodeQL bundle](https://github.com/github/codeql-action/releases), these models can be found locally via a search like: +`grep -R "readLine" | grep "Console" | sort --unique | grep ".yml" | grep "java"` + + 2) If you are using the [installed packs](https://github.com/orgs/codeql/packages/container/package/java-all) (obtained via Install Pack Dependencies), then the location of the model will be under `~/.codeql/packages/codeql/java-queries//.codeql/libraries/codeql/java-all//ext/generated/java.io.model.yml`. + +Once that file is open, remove any lines containing the `java.io.Console.readLine` signature. + +## Open the Model Editor + +In the QL widget selection, there is a panel labelled "CODEQL METHOD MODELING". Select "Start Modeling". It should open a central panel that shows a display saying that some % of the Java Runtime is modelled (but not 100%). Expanding the Java Runtime panel should show `java.io.Console.readLine()` as a model-able option. + +## Model the API + +Select Model Type -> "Source" and Kind -> "remote" and then click "Save". This will generate the model pack in the `.github` folder. Take some time to explore that directory and the model pack. + +## Enable testing with the model + +To [test the model in the editor](https://docs.github.com/en/code-security/codeql-for-vs-code/using-the-advanced-functionality-of-the-codeql-for-vs-code-extension/using-the-codeql-model-editor#testing-codeql-model-packs-in-vs-code), an enable setting must be added to the VSCode settings. Open the `.vscode/settings.json` file and add this line: `"codeQL.runningQueries.useExtensionPacks": "all"`. + +## Utilize the model in a test + +Create the following sample query to perform a quick test that the model has been succesfully configured and added to a scan: + +example.ql +``` +import java +import semmle.code.java.dataflow.FlowSources + +from RemoteFlowSource r +select r +``` + +The results of this should now show the `readLine` call! + +## Utilize the model in the out of the box query + +Open the `SqlTainted.ql` query again and the run it. The results should now show a path through our sample vulnerable application! + diff --git a/codeql-sqlite/README.org b/codeql-sqlite/README.org index cfd23cd..39e4bac 100644 --- a/codeql-sqlite/README.org +++ b/codeql-sqlite/README.org @@ -83,7 +83,7 @@ In [[../ql/java/ql/lib/Customizations.qll]] notice the comments mentioning RemoteFlowSource. - Use imports from [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] + Use imports from [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] but note that there are conflicts. you will use : private import semmle.code.java.dataflow.FlowSources Follow this to FlowSources, and find the mentioned RemoteFlowSource