mirror of
https://github.com/github/codeql.git
synced 2026-04-23 15:55:18 +02:00
Move language-agnostic model to shared library
This commit is contained in:
@@ -1,252 +0,0 @@
|
||||
/**
|
||||
* A language-independent library for reasoning about cryptography.
|
||||
*/
|
||||
|
||||
import codeql.util.Location
|
||||
import codeql.util.Option
|
||||
|
||||
signature module InputSig<LocationSig Location> {
|
||||
class LocatableElement {
|
||||
Location getLocation();
|
||||
}
|
||||
|
||||
class UnknownLocation instanceof Location;
|
||||
}
|
||||
|
||||
module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
|
||||
final class LocatableElement = Input::LocatableElement;
|
||||
|
||||
final class UnknownLocation = Input::UnknownLocation;
|
||||
|
||||
final class UnknownPropertyValue extends string {
|
||||
UnknownPropertyValue() { this = "<unknown>" }
|
||||
}
|
||||
|
||||
abstract class NodeBase instanceof LocatableElement {
|
||||
/**
|
||||
* Returns a string representation of this node, usually the name of the operation/algorithm/property.
|
||||
*/
|
||||
abstract string toString();
|
||||
|
||||
/**
|
||||
* Returns the location of this node in the code.
|
||||
*/
|
||||
Location getLocation() { result = super.getLocation() }
|
||||
|
||||
/**
|
||||
* Gets the origin of this node, e.g., a string literal in source describing it.
|
||||
*/
|
||||
LocatableElement getOrigin(string value) { none() }
|
||||
|
||||
/**
|
||||
* Returns the child of this node with the given edge name.
|
||||
*
|
||||
* This predicate is used by derived classes to construct the graph of cryptographic operations.
|
||||
*/
|
||||
NodeBase getChild(string edgeName) { none() }
|
||||
|
||||
/**
|
||||
* Defines properties of this node by name and either a value or location or both.
|
||||
*
|
||||
* This predicate is used by derived classes to construct the graph of cryptographic operations.
|
||||
*/
|
||||
predicate properties(string key, string value, Location location) {
|
||||
key = "origin" and
|
||||
location = this.getOrigin(value).getLocation() and
|
||||
not location = this.getLocation()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the parent of this node.
|
||||
*/
|
||||
final NodeBase getAParent() { result.getChild(_) = this }
|
||||
}
|
||||
|
||||
class Asset = NodeBase;
|
||||
|
||||
/**
|
||||
* A cryptographic operation, such as hashing or encryption.
|
||||
*/
|
||||
abstract class Operation extends Asset {
|
||||
/**
|
||||
* Gets the algorithm associated with this operation.
|
||||
*/
|
||||
abstract Algorithm getAlgorithm();
|
||||
|
||||
/**
|
||||
* Gets the name of this operation, e.g., "hash" or "encrypt".
|
||||
*/
|
||||
abstract string getOperationName();
|
||||
|
||||
final override string toString() { result = this.getOperationName() }
|
||||
|
||||
override NodeBase getChild(string edgeName) {
|
||||
result = super.getChild(edgeName)
|
||||
or
|
||||
edgeName = "uses" and
|
||||
if exists(this.getAlgorithm()) then result = this.getAlgorithm() else result = this
|
||||
}
|
||||
}
|
||||
|
||||
abstract class Algorithm extends Asset {
|
||||
/**
|
||||
* Gets the name of this algorithm, e.g., "AES" or "SHA".
|
||||
*/
|
||||
abstract string getAlgorithmName();
|
||||
|
||||
/**
|
||||
* Gets the raw name of this algorithm from source (no parsing or formatting)
|
||||
*/
|
||||
abstract string getRawAlgorithmName();
|
||||
|
||||
final override string toString() { result = this.getAlgorithmName() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A hashing operation that processes data to generate a hash value.
|
||||
* This operation takes an input message of arbitrary content and length and produces a fixed-size
|
||||
* hash value as the output using a specified hashing algorithm.
|
||||
*/
|
||||
abstract class HashOperation extends Operation {
|
||||
abstract override HashAlgorithm getAlgorithm();
|
||||
|
||||
override string getOperationName() { result = "HASH" }
|
||||
}
|
||||
|
||||
// Rule: no newtype representing a type of algorithm should be modelled with multiple interfaces
|
||||
//
|
||||
// Example: HKDF and PKCS12KDF are both key derivation algorithms.
|
||||
// However, PKCS12KDF also has a property: the iteration count.
|
||||
//
|
||||
// If we have HKDF and PKCS12KDF under TKeyDerivationType,
|
||||
// someone modelling a library might try to make a generic identification of both of those algorithms.
|
||||
//
|
||||
// They will therefore not use the specialized type for PKCS12KDF,
|
||||
// meaning "from PKCS12KDF algo select algo" will have no results.
|
||||
//
|
||||
newtype THashType =
|
||||
// We're saying by this that all of these have an identical interface / properties / edges
|
||||
MD5() or
|
||||
SHA1() or
|
||||
SHA256() or
|
||||
SHA512() or
|
||||
OtherHashType()
|
||||
|
||||
/**
|
||||
* A hashing algorithm that transforms variable-length input into a fixed-size hash value.
|
||||
*/
|
||||
abstract class HashAlgorithm extends Algorithm {
|
||||
final predicate hashTypeToNameMapping(THashType type, string name) {
|
||||
type instanceof MD5 and name = "MD5"
|
||||
or
|
||||
type instanceof SHA1 and name = "SHA-1"
|
||||
or
|
||||
type instanceof SHA256 and name = "SHA-256"
|
||||
or
|
||||
type instanceof SHA512 and name = "SHA-512"
|
||||
or
|
||||
type instanceof OtherHashType and name = this.getRawAlgorithmName()
|
||||
}
|
||||
|
||||
abstract THashType getHashType();
|
||||
|
||||
override string getAlgorithmName() { this.hashTypeToNameMapping(this.getHashType(), result) }
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* An operation that derives one or more keys from an input value.
|
||||
*/
|
||||
abstract class KeyDerivationOperation extends Operation {
|
||||
override string getOperationName() { result = "KEY_DERIVATION" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An algorithm that derives one or more keys from an input value.
|
||||
*/
|
||||
abstract class KeyDerivationAlgorithm extends Algorithm {
|
||||
abstract override string getAlgorithmName();
|
||||
}
|
||||
|
||||
/**
|
||||
* HKDF key derivation function
|
||||
*/
|
||||
abstract class HKDF extends KeyDerivationAlgorithm {
|
||||
final override string getAlgorithmName() { result = "HKDF" }
|
||||
|
||||
abstract HashAlgorithm getHashAlgorithm();
|
||||
|
||||
override NodeBase getChild(string edgeName) {
|
||||
result = super.getChild(edgeName)
|
||||
or
|
||||
edgeName = "digest" and result = this.getHashAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* PKCS #12 key derivation function
|
||||
*/
|
||||
abstract class PKCS12KDF extends KeyDerivationAlgorithm {
|
||||
final override string getAlgorithmName() { result = "PKCS12KDF" }
|
||||
|
||||
abstract HashAlgorithm getHashAlgorithm();
|
||||
|
||||
override NodeBase getChild(string edgeName) {
|
||||
result = super.getChild(edgeName)
|
||||
or
|
||||
edgeName = "digest" and result = this.getHashAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
newtype TEllipticCurveFamilyType =
|
||||
// We're saying by this that all of these have an identical interface / properties / edges
|
||||
NIST() or
|
||||
SEC() or
|
||||
NUMS() or
|
||||
PRIME() or
|
||||
BRAINPOOL() or
|
||||
CURVE25519() or
|
||||
CURVE448() or
|
||||
C2() or
|
||||
SM2() or
|
||||
ES() or
|
||||
OtherEllipticCurveFamilyType()
|
||||
|
||||
|
||||
/**
|
||||
* Elliptic curve algorithm
|
||||
*/
|
||||
abstract class EllipticCurve extends Algorithm {
|
||||
|
||||
|
||||
abstract string getKeySize(Location location);
|
||||
|
||||
abstract TEllipticCurveFamilyType getCurveFamilyType();
|
||||
|
||||
override predicate properties(string key, string value, Location location) {
|
||||
super.properties(key, value, location)
|
||||
or
|
||||
key = "key_size" and
|
||||
if exists(this.getKeySize(location))
|
||||
then value = this.getKeySize(location)
|
||||
else (
|
||||
value instanceof UnknownPropertyValue and location instanceof UnknownLocation
|
||||
)
|
||||
// other properties, like field type are possible, but not modeled until considered necessary
|
||||
}
|
||||
|
||||
override string getAlgorithmName() { result = this.getRawAlgorithmName().toUpperCase()}
|
||||
|
||||
/**
|
||||
* Mandating that for Elliptic Curves specifically, users are responsible
|
||||
* for providing as the 'raw' name, the official name of the algorithm.
|
||||
* Casing doesn't matter, we will enforce further naming restrictions on
|
||||
* `getAlgorithmName` by default.
|
||||
* Rationale: elliptic curve names can have a lot of variation in their components
|
||||
* (e.g., "secp256r1" vs "P-256"), trying to produce generalized set of properties
|
||||
* is possible to capture all cases, but such modeling is likely not necessary.
|
||||
* if all properties need to be captured, we can reassess how names are generated.
|
||||
*/
|
||||
override abstract string getRawAlgorithmName();
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
private import Base
|
||||
private import codeql.cryptography.Model
|
||||
private import cpp as Lang
|
||||
|
||||
module CryptoInput implements InputSig<Lang::Location> {
|
||||
|
||||
@@ -77,6 +77,8 @@ module OpenSSLModel {
|
||||
|
||||
HKDF() { algorithmStringToKDFFetchArgFlow("HKDF", origin, this) }
|
||||
|
||||
override string getRawAlgorithmName() { result = origin.getValue() }
|
||||
|
||||
override Crypto::HashAlgorithm getHashAlgorithm() { none() }
|
||||
|
||||
override Crypto::LocatableElement getOrigin(string name) {
|
||||
@@ -89,6 +91,8 @@ module OpenSSLModel {
|
||||
|
||||
PKCS12KDF() { algorithmStringToKDFFetchArgFlow("PKCS12KDF", origin, this) }
|
||||
|
||||
override string getRawAlgorithmName() { result = origin.getValue() }
|
||||
|
||||
override Crypto::HashAlgorithm getHashAlgorithm() { none() }
|
||||
|
||||
override Crypto::NodeBase getOrigin(string name) {
|
||||
|
||||
@@ -6,6 +6,7 @@ extractor: cpp
|
||||
library: true
|
||||
upgrades: upgrades
|
||||
dependencies:
|
||||
codeql/cryptography: ${workspace}
|
||||
codeql/dataflow: ${workspace}
|
||||
codeql/mad: ${workspace}
|
||||
codeql/rangeanalysis: ${workspace}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
CODEQL_PATH="/Users/nicolaswill/Library/Application Support/Code/User/globalStorage/github.vscode-codeql/distribution5/codeql/codeql"
|
||||
DATABASE_PATH="/Users/nicolaswill/openssl_codeql/openssl/openssl_db"
|
||||
QUERY_FILE="CBOMGraph.ql"
|
||||
OUTPUT_DIR="graph_output"
|
||||
|
||||
python3 generate_cbom.py -c "$CODEQL_PATH" -d "$DATABASE_PATH" -q "$QUERY_FILE" -o "$OUTPUT_DIR"
|
||||
@@ -1,104 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import subprocess
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
def run_codeql_analysis(codeql_path, database_path, query_path, output_dir):
|
||||
"""Runs the CodeQL analysis and generates a DGML file."""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
command = [
|
||||
codeql_path, "database", "analyze", database_path, query_path,
|
||||
"--rerun", "--format=dgml", "--output", output_dir
|
||||
]
|
||||
|
||||
print(f"Running CodeQL analysis: {' '.join(command)}")
|
||||
result = subprocess.run(command, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("Analysis completed successfully.")
|
||||
else:
|
||||
print("Analysis failed.")
|
||||
print(result.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
return result.returncode
|
||||
|
||||
|
||||
def convert_dgml_to_dot(dgml_file, dot_file):
|
||||
"""Converts the DGML file to DOT format using the exact original implementation."""
|
||||
print(f"Processing DGML file: {dgml_file}")
|
||||
|
||||
# Read source DGML
|
||||
with open(dgml_file, "r", encoding="utf-8") as f:
|
||||
xml_content = f.read()
|
||||
|
||||
root = ET.fromstring(xml_content)
|
||||
|
||||
# Form dot element sequence
|
||||
body_l = ["digraph cbom {",
|
||||
"node [shape=box];",
|
||||
"rankdir=LR;"
|
||||
]
|
||||
|
||||
# Process nodes
|
||||
for node in root.find("{http://schemas.microsoft.com/vs/2009/dgml}Nodes"):
|
||||
att = node.attrib
|
||||
node_id = att['Id']
|
||||
label_parts = []
|
||||
for key, value in att.items():
|
||||
if key == 'Id':
|
||||
continue
|
||||
elif key == 'Label':
|
||||
label_parts.append(value)
|
||||
else:
|
||||
label_parts.append(f"{key}={value}")
|
||||
label = "\\n".join(label_parts)
|
||||
prop_l = [f'label="{label}"']
|
||||
node_s = f'nd_{node_id} [{", ".join(prop_l)}];'
|
||||
body_l.append(node_s)
|
||||
|
||||
# Process edges
|
||||
for edge in root.find("{http://schemas.microsoft.com/vs/2009/dgml}Links"):
|
||||
att = edge.attrib
|
||||
edge_s = 'nd_{} -> nd_{} [label="{}"];'.format(
|
||||
att["Source"], att["Target"], att.get("Label", ""))
|
||||
body_l.append(edge_s)
|
||||
|
||||
body_l.append("}")
|
||||
|
||||
# Write DOT output
|
||||
with open(dot_file, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(body_l))
|
||||
|
||||
print(f"DGML file successfully converted to DOT format: {dot_file}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run CodeQL analysis and convert DGML to DOT.")
|
||||
parser.add_argument("-c", "--codeql", required=True, help="Path to CodeQL CLI executable.")
|
||||
parser.add_argument("-d", "--database", required=True, help="Path to the CodeQL database.")
|
||||
parser.add_argument("-q", "--query", required=True, help="Path to the .ql query file.")
|
||||
parser.add_argument("-o", "--output", required=True, help="Output directory for analysis results.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Run CodeQL analysis
|
||||
run_codeql_analysis(args.codeql, args.database, args.query, args.output)
|
||||
|
||||
# Locate DGML file
|
||||
dgml_file = os.path.join(args.output, "cbomgraph.dgml")
|
||||
dot_file = dgml_file.replace(".dgml", ".dot")
|
||||
|
||||
if os.path.exists(dgml_file):
|
||||
# Convert DGML to DOT
|
||||
convert_dgml_to_dot(dgml_file, dot_file)
|
||||
else:
|
||||
print(f"No DGML file found in {args.output}.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user