WIP: add dgml/dot output/remove test code

This commit is contained in:
Nicolas Will
2025-01-29 19:45:04 +01:00
parent e027b0e9a0
commit 9af18bc100
7 changed files with 162 additions and 179 deletions

View File

@@ -51,7 +51,9 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
* This predicate is used by derived classes to construct the graph of cryptographic operations.
*/
predicate properties(string key, string value, Location location) {
key = "origin" and location = this.getOrigin(value).getLocation()
key = "origin" and
location = this.getOrigin(value).getLocation() and
not location = this.getLocation()
}
/**

View File

@@ -1,125 +0,0 @@
/**
* A language-independent library for reasoning about cryptography.
*/
import codeql.util.Location
import codeql.util.Option
signature module InputSig<LocationSig Location> {
class KnownUnknownLocation extends Location;
class LocatableElement {
Location getLocation();
}
}
// An operation = a specific loc in code
// An algorithm
// Properties
// Node -> Operation -> Algorithm -> Symmetric -> SpecificSymmetricAlgo
// -[Language-Specific]-> LibrarySymmetricAlgo -> Properties
// For example (nsted newtypes):
/*
* newtype for each algo, and each one of those would have params for their properties
* implementation: optional/range for example
*
*
*
* /**
* Constructs an `Option` type that is a disjoint union of the given type and an
* additional singleton element.
*/
module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
newtype TNode =
TNodeUnknown() or
TNodeAlgorithm() or
TNodeOperation()
/*
* A cryptographic asset in code, i.e., an algorithm, operation, property, or known unknown.
*/
abstract class Node extends TNode {
// this would then extend LanguageNode
abstract Location getLocation();
abstract string toString();
abstract Node getChild(int childIndex);
final Node getAChild() { result = this.getChild(_) }
final Node getAParent() { result.getAChild() = this }
}
final class KnownUnknown extends Node, TNodeUnknown {
override string toString() { result = "unknown" }
override Node getChild(int childIndex) { none() }
override Location getLocation() { result instanceof Input::KnownUnknownLocation }
}
abstract class Operation extends Node, TNodeOperation {
/**
* Gets the algorithm associated with this operation.
*/
abstract Node getAlgorithm();
/**
* Gets the name of this operation, e.g., "hash" or "encrypt".
*/
abstract string getOperationName();
final override Node getChild(int childIndex) { childIndex = 0 and result = this.getAlgorithm() }
final override string toString() { result = this.getOperationName() }
}
abstract class Algorithm extends Node, TNodeAlgorithm {
/**
* Gets the name of this algorithm, e.g., "AES" or "SHA".
*/
abstract string getAlgorithmName();
}
/**
* A hashing operation that processes data to generate a hash value.
* This operation takes an input message of arbitrary content and length and produces a fixed-size
* hash value as the output using a specified hashing algorithm.
*/
abstract class HashOperation extends Operation {
abstract override HashAlgorithm getAlgorithm();
override string getOperationName() { result = "hash" }
}
/**
* A hashing algorithm that transforms variable-length input into a fixed-size hash value.
*/
abstract class HashAlgorithm extends Algorithm { }
/**
* An operation that derives one or more keys from an input value.
*/
abstract class KeyDerivationOperation extends Operation {
override string getOperationName() { result = "key derivation" }
}
/**
* An algorithm that derives one or more keys from an input value.
*/
abstract class KeyDerivationAlgorithm extends Algorithm {
abstract override string getAlgorithmName();
}
/**
* HKDF Extract+Expand key derivation function.
*/
abstract class HKDFAlgorithm extends KeyDerivationAlgorithm {
final override string getAlgorithmName() { result = "HKDF" }
abstract Node getDigestAlgorithm();
}
}

View File

@@ -84,16 +84,6 @@ module OpenSSLModel {
}
}
class TestKeyDerivationOperationHacky extends KeyDerivationOperation instanceof FunctionCall {
HKDF hkdf;
TestKeyDerivationOperationHacky() {
this.getEnclosingFunction() = hkdf.(Expr).getEnclosingFunction()
}
override Crypto::KeyDerivationAlgorithm getAlgorithm() { result = hkdf }
}
class PKCS12KDF extends KeyDerivationAlgorithm, Crypto::PKCS12KDF instanceof Expr {
KDFAlgorithmStringLiteral origin;

View File

@@ -0,0 +1,47 @@
/**
* @name "Outputs a graph representation of the cryptographic bill of materials."
* @kind graph
* @id cbomgraph
*/
import experimental.Quantum.Language
string getPropertyString(Crypto::NodeBase node, string key) {
result =
strictconcat(any(string value, Location location, string parsed |
node.properties(key, value, location) and
parsed = "(" + value + "," + location.toString() + ")"
|
parsed
), ","
)
}
string getLabel(Crypto::NodeBase node) { result = node.toString() }
query predicate nodes(Crypto::NodeBase node, string key, string value) {
key = "semmle.label" and
value = getLabel(node)
or
// CodeQL's DGML output does not include a location
key = "Location" and
value = node.getLocation().toString()
or
// Known unknown edges should be reported as properties rather than edges
node = node.getChild(key) and
value = "<unknown>"
or
// Report properties
value = getPropertyString(node, key)
}
query predicate edges(Crypto::NodeBase source, Crypto::NodeBase target, string key, string value) {
key = "semmle.label" and
target = source.getChild(value) and
// Known unknowns are reported as properties rather than edges
not source = target
}
query predicate graphProperties(string key, string value) {
key = "semmle.graphKind" and value = "graph"
}

View File

@@ -1,43 +0,0 @@
/**
* @name "PQC Test"
* @kind graph
*/
import experimental.Quantum.Language
string getValueAndLocationPairs(Crypto::NodeBase node, string key) {
exists(string value, Location location |
node.properties(key, value, location) and
result = "(" + value + "," + location.toString() + ")"
)
}
string properties(Crypto::NodeBase node) {
forex(string key | node.properties(key, _, _) |
result = key + ":" + strictconcat(getValueAndLocationPairs(node, key), ",")
)
}
string getLabel(Crypto::NodeBase node) {
result =
"[" + node.toString() + "]" +
any(string prop |
if exists(properties(node)) then prop = " " + properties(node) else prop = ""
|
prop
)
}
query predicate nodes(Crypto::NodeBase node, string key, string value) {
key = "semmle.label" and
value = getLabel(node)
}
query predicate edges(Crypto::NodeBase source, Crypto::NodeBase target, string key, string value) {
target = source.getChild(value) and
key = "semmle.label"
}
query predicate graphProperties(string key, string value) {
key = "semmle.graphKind" and value = "tree"
}

View File

@@ -0,0 +1,8 @@
#!/bin/bash
CODEQL_PATH="/Users/nicolaswill/Library/Application Support/Code/User/globalStorage/github.vscode-codeql/distribution5/codeql/codeql"
DATABASE_PATH="/Users/nicolaswill/openssl_codeql/openssl/openssl_db"
QUERY_FILE="CBOMGraph.ql"
OUTPUT_DIR="graph_output"
python3 generate_cbom.py -c "$CODEQL_PATH" -d "$DATABASE_PATH" -q "$QUERY_FILE" -o "$OUTPUT_DIR"

View File

@@ -0,0 +1,104 @@
#!/usr/bin/env python3
import os
import sys
import argparse
import subprocess
import xml.etree.ElementTree as ET
def run_codeql_analysis(codeql_path, database_path, query_path, output_dir):
"""Runs the CodeQL analysis and generates a DGML file."""
os.makedirs(output_dir, exist_ok=True)
command = [
codeql_path, "database", "analyze", database_path, query_path,
"--rerun", "--format=dgml", "--output", output_dir
]
print(f"Running CodeQL analysis: {' '.join(command)}")
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode == 0:
print("Analysis completed successfully.")
else:
print("Analysis failed.")
print(result.stderr)
sys.exit(1)
return result.returncode
def convert_dgml_to_dot(dgml_file, dot_file):
"""Converts the DGML file to DOT format using the exact original implementation."""
print(f"Processing DGML file: {dgml_file}")
# Read source DGML
with open(dgml_file, "r", encoding="utf-8") as f:
xml_content = f.read()
root = ET.fromstring(xml_content)
# Form dot element sequence
body_l = ["digraph cbom {",
"node [shape=box];",
"rankdir=LR;"
]
# Process nodes
for node in root.find("{http://schemas.microsoft.com/vs/2009/dgml}Nodes"):
att = node.attrib
node_id = att['Id']
label_parts = []
for key, value in att.items():
if key == 'Id':
continue
elif key == 'Label':
label_parts.append(value)
else:
label_parts.append(f"{key}={value}")
label = "\\n".join(label_parts)
prop_l = [f'label="{label}"']
node_s = f'nd_{node_id} [{", ".join(prop_l)}];'
body_l.append(node_s)
# Process edges
for edge in root.find("{http://schemas.microsoft.com/vs/2009/dgml}Links"):
att = edge.attrib
edge_s = 'nd_{} -> nd_{} [label="{}"];'.format(
att["Source"], att["Target"], att.get("Label", ""))
body_l.append(edge_s)
body_l.append("}")
# Write DOT output
with open(dot_file, "w", encoding="utf-8") as f:
f.write("\n".join(body_l))
print(f"DGML file successfully converted to DOT format: {dot_file}")
def main():
parser = argparse.ArgumentParser(description="Run CodeQL analysis and convert DGML to DOT.")
parser.add_argument("-c", "--codeql", required=True, help="Path to CodeQL CLI executable.")
parser.add_argument("-d", "--database", required=True, help="Path to the CodeQL database.")
parser.add_argument("-q", "--query", required=True, help="Path to the .ql query file.")
parser.add_argument("-o", "--output", required=True, help="Output directory for analysis results.")
args = parser.parse_args()
# Run CodeQL analysis
run_codeql_analysis(args.codeql, args.database, args.query, args.output)
# Locate DGML file
dgml_file = os.path.join(args.output, "cbomgraph.dgml")
dot_file = dgml_file.replace(".dgml", ".dot")
if os.path.exists(dgml_file):
# Convert DGML to DOT
convert_dgml_to_dot(dgml_file, dot_file)
else:
print(f"No DGML file found in {args.output}.")
sys.exit(1)
if __name__ == "__main__":
main()