Move language-agnostic model to shared library

This commit is contained in:
Nicolas Will
2025-02-06 21:54:18 +01:00
parent cd70acde66
commit 3dc28c2d17
7 changed files with 34 additions and 26 deletions

View File

@@ -1,252 +0,0 @@
/**
* A language-independent library for reasoning about cryptography.
*/
import codeql.util.Location
import codeql.util.Option
signature module InputSig<LocationSig Location> {
class LocatableElement {
Location getLocation();
}
class UnknownLocation instanceof Location;
}
module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
final class LocatableElement = Input::LocatableElement;
final class UnknownLocation = Input::UnknownLocation;
final class UnknownPropertyValue extends string {
UnknownPropertyValue() { this = "<unknown>" }
}
abstract class NodeBase instanceof LocatableElement {
/**
* Returns a string representation of this node, usually the name of the operation/algorithm/property.
*/
abstract string toString();
/**
* Returns the location of this node in the code.
*/
Location getLocation() { result = super.getLocation() }
/**
* Gets the origin of this node, e.g., a string literal in source describing it.
*/
LocatableElement getOrigin(string value) { none() }
/**
* Returns the child of this node with the given edge name.
*
* This predicate is used by derived classes to construct the graph of cryptographic operations.
*/
NodeBase getChild(string edgeName) { none() }
/**
* Defines properties of this node by name and either a value or location or both.
*
* This predicate is used by derived classes to construct the graph of cryptographic operations.
*/
predicate properties(string key, string value, Location location) {
key = "origin" and
location = this.getOrigin(value).getLocation() and
not location = this.getLocation()
}
/**
* Returns the parent of this node.
*/
final NodeBase getAParent() { result.getChild(_) = this }
}
class Asset = NodeBase;
/**
* A cryptographic operation, such as hashing or encryption.
*/
abstract class Operation extends Asset {
/**
* Gets the algorithm associated with this operation.
*/
abstract Algorithm getAlgorithm();
/**
* Gets the name of this operation, e.g., "hash" or "encrypt".
*/
abstract string getOperationName();
final override string toString() { result = this.getOperationName() }
override NodeBase getChild(string edgeName) {
result = super.getChild(edgeName)
or
edgeName = "uses" and
if exists(this.getAlgorithm()) then result = this.getAlgorithm() else result = this
}
}
abstract class Algorithm extends Asset {
/**
* Gets the name of this algorithm, e.g., "AES" or "SHA".
*/
abstract string getAlgorithmName();
/**
* Gets the raw name of this algorithm from source (no parsing or formatting)
*/
abstract string getRawAlgorithmName();
final override string toString() { result = this.getAlgorithmName() }
}
/**
* A hashing operation that processes data to generate a hash value.
* This operation takes an input message of arbitrary content and length and produces a fixed-size
* hash value as the output using a specified hashing algorithm.
*/
abstract class HashOperation extends Operation {
abstract override HashAlgorithm getAlgorithm();
override string getOperationName() { result = "HASH" }
}
// Rule: no newtype representing a type of algorithm should be modelled with multiple interfaces
//
// Example: HKDF and PKCS12KDF are both key derivation algorithms.
// However, PKCS12KDF also has a property: the iteration count.
//
// If we have HKDF and PKCS12KDF under TKeyDerivationType,
// someone modelling a library might try to make a generic identification of both of those algorithms.
//
// They will therefore not use the specialized type for PKCS12KDF,
// meaning "from PKCS12KDF algo select algo" will have no results.
//
newtype THashType =
// We're saying by this that all of these have an identical interface / properties / edges
MD5() or
SHA1() or
SHA256() or
SHA512() or
OtherHashType()
/**
* A hashing algorithm that transforms variable-length input into a fixed-size hash value.
*/
abstract class HashAlgorithm extends Algorithm {
final predicate hashTypeToNameMapping(THashType type, string name) {
type instanceof MD5 and name = "MD5"
or
type instanceof SHA1 and name = "SHA-1"
or
type instanceof SHA256 and name = "SHA-256"
or
type instanceof SHA512 and name = "SHA-512"
or
type instanceof OtherHashType and name = this.getRawAlgorithmName()
}
abstract THashType getHashType();
override string getAlgorithmName() { this.hashTypeToNameMapping(this.getHashType(), result) }
}
/**
* An operation that derives one or more keys from an input value.
*/
abstract class KeyDerivationOperation extends Operation {
override string getOperationName() { result = "KEY_DERIVATION" }
}
/**
* An algorithm that derives one or more keys from an input value.
*/
abstract class KeyDerivationAlgorithm extends Algorithm {
abstract override string getAlgorithmName();
}
/**
* HKDF key derivation function
*/
abstract class HKDF extends KeyDerivationAlgorithm {
final override string getAlgorithmName() { result = "HKDF" }
abstract HashAlgorithm getHashAlgorithm();
override NodeBase getChild(string edgeName) {
result = super.getChild(edgeName)
or
edgeName = "digest" and result = this.getHashAlgorithm()
}
}
/**
* PKCS #12 key derivation function
*/
abstract class PKCS12KDF extends KeyDerivationAlgorithm {
final override string getAlgorithmName() { result = "PKCS12KDF" }
abstract HashAlgorithm getHashAlgorithm();
override NodeBase getChild(string edgeName) {
result = super.getChild(edgeName)
or
edgeName = "digest" and result = this.getHashAlgorithm()
}
}
newtype TEllipticCurveFamilyType =
// We're saying by this that all of these have an identical interface / properties / edges
NIST() or
SEC() or
NUMS() or
PRIME() or
BRAINPOOL() or
CURVE25519() or
CURVE448() or
C2() or
SM2() or
ES() or
OtherEllipticCurveFamilyType()
/**
* Elliptic curve algorithm
*/
abstract class EllipticCurve extends Algorithm {
abstract string getKeySize(Location location);
abstract TEllipticCurveFamilyType getCurveFamilyType();
override predicate properties(string key, string value, Location location) {
super.properties(key, value, location)
or
key = "key_size" and
if exists(this.getKeySize(location))
then value = this.getKeySize(location)
else (
value instanceof UnknownPropertyValue and location instanceof UnknownLocation
)
// other properties, like field type are possible, but not modeled until considered necessary
}
override string getAlgorithmName() { result = this.getRawAlgorithmName().toUpperCase()}
/**
* Mandating that for Elliptic Curves specifically, users are responsible
* for providing as the 'raw' name, the official name of the algorithm.
* Casing doesn't matter, we will enforce further naming restrictions on
* `getAlgorithmName` by default.
* Rationale: elliptic curve names can have a lot of variation in their components
* (e.g., "secp256r1" vs "P-256"), trying to produce generalized set of properties
* is possible to capture all cases, but such modeling is likely not necessary.
* if all properties need to be captured, we can reassess how names are generated.
*/
override abstract string getRawAlgorithmName();
}
}

View File

@@ -1,4 +1,4 @@
private import Base
private import codeql.cryptography.Model
private import cpp as Lang
module CryptoInput implements InputSig<Lang::Location> {

View File

@@ -77,6 +77,8 @@ module OpenSSLModel {
HKDF() { algorithmStringToKDFFetchArgFlow("HKDF", origin, this) }
override string getRawAlgorithmName() { result = origin.getValue() }
override Crypto::HashAlgorithm getHashAlgorithm() { none() }
override Crypto::LocatableElement getOrigin(string name) {
@@ -89,6 +91,8 @@ module OpenSSLModel {
PKCS12KDF() { algorithmStringToKDFFetchArgFlow("PKCS12KDF", origin, this) }
override string getRawAlgorithmName() { result = origin.getValue() }
override Crypto::HashAlgorithm getHashAlgorithm() { none() }
override Crypto::NodeBase getOrigin(string name) {

View File

@@ -6,6 +6,7 @@ extractor: cpp
library: true
upgrades: upgrades
dependencies:
codeql/cryptography: ${workspace}
codeql/dataflow: ${workspace}
codeql/mad: ${workspace}
codeql/rangeanalysis: ${workspace}

View File

@@ -1,8 +0,0 @@
#!/bin/bash
CODEQL_PATH="/Users/nicolaswill/Library/Application Support/Code/User/globalStorage/github.vscode-codeql/distribution5/codeql/codeql"
DATABASE_PATH="/Users/nicolaswill/openssl_codeql/openssl/openssl_db"
QUERY_FILE="CBOMGraph.ql"
OUTPUT_DIR="graph_output"
python3 generate_cbom.py -c "$CODEQL_PATH" -d "$DATABASE_PATH" -q "$QUERY_FILE" -o "$OUTPUT_DIR"

View File

@@ -1,104 +0,0 @@
#!/usr/bin/env python3
import os
import sys
import argparse
import subprocess
import xml.etree.ElementTree as ET
def run_codeql_analysis(codeql_path, database_path, query_path, output_dir):
"""Runs the CodeQL analysis and generates a DGML file."""
os.makedirs(output_dir, exist_ok=True)
command = [
codeql_path, "database", "analyze", database_path, query_path,
"--rerun", "--format=dgml", "--output", output_dir
]
print(f"Running CodeQL analysis: {' '.join(command)}")
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode == 0:
print("Analysis completed successfully.")
else:
print("Analysis failed.")
print(result.stderr)
sys.exit(1)
return result.returncode
def convert_dgml_to_dot(dgml_file, dot_file):
"""Converts the DGML file to DOT format using the exact original implementation."""
print(f"Processing DGML file: {dgml_file}")
# Read source DGML
with open(dgml_file, "r", encoding="utf-8") as f:
xml_content = f.read()
root = ET.fromstring(xml_content)
# Form dot element sequence
body_l = ["digraph cbom {",
"node [shape=box];",
"rankdir=LR;"
]
# Process nodes
for node in root.find("{http://schemas.microsoft.com/vs/2009/dgml}Nodes"):
att = node.attrib
node_id = att['Id']
label_parts = []
for key, value in att.items():
if key == 'Id':
continue
elif key == 'Label':
label_parts.append(value)
else:
label_parts.append(f"{key}={value}")
label = "\\n".join(label_parts)
prop_l = [f'label="{label}"']
node_s = f'nd_{node_id} [{", ".join(prop_l)}];'
body_l.append(node_s)
# Process edges
for edge in root.find("{http://schemas.microsoft.com/vs/2009/dgml}Links"):
att = edge.attrib
edge_s = 'nd_{} -> nd_{} [label="{}"];'.format(
att["Source"], att["Target"], att.get("Label", ""))
body_l.append(edge_s)
body_l.append("}")
# Write DOT output
with open(dot_file, "w", encoding="utf-8") as f:
f.write("\n".join(body_l))
print(f"DGML file successfully converted to DOT format: {dot_file}")
def main():
parser = argparse.ArgumentParser(description="Run CodeQL analysis and convert DGML to DOT.")
parser.add_argument("-c", "--codeql", required=True, help="Path to CodeQL CLI executable.")
parser.add_argument("-d", "--database", required=True, help="Path to the CodeQL database.")
parser.add_argument("-q", "--query", required=True, help="Path to the .ql query file.")
parser.add_argument("-o", "--output", required=True, help="Output directory for analysis results.")
args = parser.parse_args()
# Run CodeQL analysis
run_codeql_analysis(args.codeql, args.database, args.query, args.output)
# Locate DGML file
dgml_file = os.path.join(args.output, "cbomgraph.dgml")
dot_file = dgml_file.replace(".dgml", ".dot")
if os.path.exists(dgml_file):
# Convert DGML to DOT
convert_dgml_to_dot(dgml_file, dot_file)
else:
print(f"No DGML file found in {args.output}.")
sys.exit(1)
if __name__ == "__main__":
main()