Compare commits

..

19 Commits

Author SHA1 Message Date
Nick Rolfe
71ef2931a5 Ruby: update crate versions 2022-01-13 11:43:31 +00:00
Stephan Brandauer
40ad88ba53 Merge pull request #7474 from kaeluka/db-reads-as-taint-sources
JS: DB reads as taint sources
2022-01-13 12:06:48 +01:00
Michael Nebel
8583a4ffea Merge pull request #7583 from michaelnebel/csharp/fix-broken-test
C#: Narrow string interpolation expressions to a specific single file in testcase.
2022-01-13 11:37:52 +01:00
Erik Krogh Kristensen
89bab6ae12 Merge pull request #7097 from erik-krogh/railsReDoS
JS/PY/RB: support a limited number of ranges for ReDoS analysis
2022-01-13 11:04:36 +01:00
Stephan Brandauer
93507a2d71 combine two implementations for database-accesses as remote flow sources 2022-01-13 10:53:58 +01:00
Michael Nebel
aacb03a74b C#: Narrow string interpolation expressions to a specific single file in testcase. 2022-01-13 10:25:33 +01:00
Stephan Brandauer
63aaf24063 base implementation of Sequelize model on models-as-data 2022-01-13 09:41:25 +01:00
Anders Schack-Mulligen
da69886777 Merge pull request #7580 from github/workflow/coverage/update
Update CSV framework coverage reports
2022-01-13 09:26:00 +01:00
github-actions[bot]
625836a3be Add changed framework coverage reports 2022-01-13 00:11:30 +00:00
Henry Mercer
1c3c9216f5 Merge pull request #7576 from github/henrymercer/js-bump-atm-versions
JS: Bump ATM pack versions to 0.0.4
2022-01-12 16:53:10 +00:00
Stephan Brandauer
09a28c428c base implementation of Spanner model on models-as-data 2022-01-12 17:07:16 +01:00
Henry Mercer
9abc3411a4 JS: Bump ATM pack versions to 0.0.4 2022-01-12 15:19:13 +00:00
Robert Marsh
5031d6c4a3 Merge pull request #7566 from MathiasVP/smaller-join-in-reachesRefParameter
C++: Smaller join in `reachesRefParameter`
2022-01-12 10:04:35 -05:00
Mathias Vorreiter Pedersen
b3a7090068 C++: Fix join in reachesRefParameter by joining with 'getEnd' instead
of 'getANode'.

Before:

Tuple counts for FlowVar::FlowVar::reachesRefParameter_dispred#ff/2@956ac39i after 229ms:
  24806   ~1%     {2} r1 = JOIN FlowVar::FlowVar_internal::parameterIsNonConstReference#f WITH Parameter::Parameter::getFunction_dispred#ff ON FIRST 1 OUTPUT Lhs.0 'p', Rhs.1
  56985   ~3%     {3} r2 = JOIN r1 WITH num#FlowVar::FlowVar_internal::TBlockVar#fff_12#join_rhs ON FIRST 1 OUTPUT Rhs.1 'this', Lhs.0 'p', Lhs.1
  2384489 ~4%     {4} r3 = JOIN r2 WITH FlowVar::FlowVar_internal::getAReachedBlockVarSBB#ff ON FIRST 1 OUTPUT Rhs.1, Lhs.2, Lhs.1 'p', Lhs.0 'this'
  49457   ~0%     {2} r4 = JOIN r3 WITH SubBasicBlocks::SubBasicBlock::getANode_dispred#fb ON FIRST 2 OUTPUT Lhs.3 'this', Lhs.2 'p'
                  return r4

After:

Tuple counts for FlowVar::FlowVar::reachesRefParameter_dispred#ff/2@46f8bfn7 after 32ms:
  24806 ~1%     {2} r1 = JOIN FlowVar::FlowVar_internal::parameterIsNonConstReference#f WITH Parameter::Parameter::getFunction_dispred#ff ON FIRST 1 OUTPUT Lhs.0 'p', Rhs.1
  56985 ~1%     {3} r2 = JOIN r1 WITH num#FlowVar::FlowVar_internal::TBlockVar#fff_12#join_rhs ON FIRST 1 OUTPUT Lhs.1, Lhs.0 'p', Rhs.1 'this'
  56985 ~1%     {3} r3 = JOIN r2 WITH SubBasicBlocks::SubBasicBlock::getEnd_dispred#fb_10#join_rhs ON FIRST 1 OUTPUT Lhs.2 'this', Rhs.1, Lhs.1 'p'
  49457 ~0%     {2} r4 = JOIN r3 WITH FlowVar::FlowVar_internal::getAReachedBlockVarSBB#ff ON FIRST 2 OUTPUT Lhs.0 'this', Lhs.2 'p'
                return r4
2022-01-11 13:48:20 +00:00
Stephan Brandauer
132e0bf4b7 add database accesses as additional (heuristic) remote flow sources 2022-01-11 11:38:41 +01:00
Erik Krogh Kristensen
f7a63d5ea0 remove duplicated line 2022-01-07 18:38:02 +01:00
Erik Krogh Kristensen
c8d29a9cf1 sync files 2022-01-07 18:38:02 +01:00
Erik Krogh Kristensen
1a8b6d7414 recognize ranges without upper bounds 2022-01-07 18:38:01 +01:00
Erik Krogh Kristensen
acaf294bee support a limited number of regexp ranges 2022-01-07 18:36:30 +01:00
41 changed files with 1312 additions and 378 deletions

View File

@@ -435,7 +435,7 @@ module FlowVar_internal {
parameterIsNonConstReference(p) and
p = v and
// This definition reaches the exit node of the function CFG
getAReachedBlockVarSBB(this).getANode() = p.getFunction()
getAReachedBlockVarSBB(this).getEnd() = p.getFunction()
}
override predicate definedByInitialValue(StackVariable lsv) {

View File

@@ -1,7 +1,13 @@
import csharp
query predicate inserts(InterpolatedStringExpr expr, Expr e) { expr.getAnInsert() = e }
private predicate inSpecificSource(Expr expr) {
expr.getFile().getBaseName() = "ConstInterpolatedString.cs"
}
query predicate inserts(InterpolatedStringExpr expr, Expr e) {
expr.getAnInsert() = e and inSpecificSource(expr)
}
query predicate texts(InterpolatedStringExpr expr, StringLiteral literal) {
expr.getAText() = literal
expr.getAText() = literal and inSpecificSource(expr)
}

View File

@@ -29,7 +29,7 @@ jakarta.ws.rs.container,,9,,,,,,,,,,,,,,,,,,,,,,,9,,
jakarta.ws.rs.core,2,,149,,,,,,,,,,,,,,,,,2,,,,,,94,55
java.beans,,,1,,,,,,,,,,,,,,,,,,,,,,,1,
java.io,3,,31,,3,,,,,,,,,,,,,,,,,,,,,30,1
java.lang,8,,53,,,,,,,,,,8,,,,,,,,,,,,,42,11
java.lang,8,,56,,,,,,,,,,8,,,,,,,,,,,,,45,11
java.net,10,3,7,,,,,,,,,,,,,10,,,,,,,,,3,7,
java.nio,10,,4,,10,,,,,,,,,,,,,,,,,,,,,4,
java.sql,7,,,,,,,,,,,,,,,,,7,,,,,,,,,
1 package sink source summary sink:bean-validation sink:create-file sink:groovy sink:header-splitting sink:information-leak sink:intent-start sink:jexl sink:jndi-injection sink:ldap sink:logging sink:mvel sink:ognl-injection sink:open-url sink:set-hostname-verifier sink:sql sink:url-open-stream sink:url-redirect sink:xpath sink:xslt sink:xss source:contentprovider source:remote summary:taint summary:value
29 jakarta.ws.rs.core 2 149 2 94 55
30 java.beans 1 1
31 java.io 3 31 3 30 1
32 java.lang 8 53 56 8 42 45 11
33 java.net 10 3 7 10 3 7
34 java.nio 10 4 10 4
35 java.sql 7 7

View File

@@ -15,9 +15,9 @@ Java framework & library support
`Apache HttpComponents <https://hc.apache.org/>`_,"``org.apache.hc.core5.*``, ``org.apache.http``",5,136,28,,,3,,,,25
`Google Guava <https://guava.dev/>`_,``com.google.common.*``,,728,35,,6,,,,,
`JSON-java <https://github.com/stleary/JSON-java>`_,``org.json``,,236,,,,,,,,
Java Standard Library,``java.*``,3,526,72,13,,,7,,,10
Java Standard Library,``java.*``,3,529,72,13,,,7,,,10
Java extensions,"``javax.*``, ``jakarta.*``",54,552,32,,,4,,1,1,2
`Spring <https://spring.io/>`_,``org.springframework.*``,29,469,91,,,,19,14,,29
Others,"``androidx.slice``, ``cn.hutool.core.codec``, ``com.esotericsoftware.kryo.io``, ``com.esotericsoftware.kryo5.io``, ``com.fasterxml.jackson.core``, ``com.fasterxml.jackson.databind``, ``com.opensymphony.xwork2.ognl``, ``com.unboundid.ldap.sdk``, ``flexjson``, ``groovy.lang``, ``groovy.util``, ``jodd.json``, ``net.sf.saxon.s9api``, ``ognl``, ``org.apache.commons.codec``, ``org.apache.commons.jexl2``, ``org.apache.commons.jexl3``, ``org.apache.commons.logging``, ``org.apache.commons.ognl``, ``org.apache.directory.ldap.client.api``, ``org.apache.ibatis.jdbc``, ``org.apache.log4j``, ``org.apache.logging.log4j``, ``org.apache.shiro.codec``, ``org.apache.shiro.jndi``, ``org.codehaus.groovy.control``, ``org.dom4j``, ``org.hibernate``, ``org.jboss.logging``, ``org.jooq``, ``org.mvel2``, ``org.scijava.log``, ``org.slf4j``, ``org.xml.sax``, ``org.xmlpull.v1``, ``play.mvc``, ``ratpack.core.form``, ``ratpack.core.handling``, ``ratpack.core.http``, ``ratpack.exec``, ``ratpack.form``, ``ratpack.func``, ``ratpack.handling``, ``ratpack.http``, ``ratpack.util``",44,283,919,,,,14,18,,
Totals,,180,5639,1276,13,6,10,107,33,1,66
Totals,,180,5642,1276,13,6,10,107,33,1,66

View File

@@ -14,6 +14,73 @@ external predicate availableMlModels(
/** Get the ATM configuration. */
ATMConfig getCfg() { any() }
/**
* This module provides functionality that takes an endpoint and provides an entity that encloses that
* endpoint and is suitable for similarity analysis.
*/
module EndpointToEntity {
private import CodeToFeatures
/**
* Get an entity enclosing the endpoint that is suitable for similarity analysis. In general,
* this may associate multiple entities to a single endpoint.
*/
DatabaseFeatures::Entity getAnEntityForEndpoint(DataFlow::Node endpoint) {
DatabaseFeatures::entities(result, _, _, _, _, _, _, _, _) and
result.getDefinedFunction() = endpoint.getContainer().getEnclosingContainer*()
}
}
/**
* This module provides functionality that takes an entity and provides effective endpoints within
* that entity.
*
* We use the following terminology to describe endpoints:
*
* - The *candidate* endpoints are the set of data flow nodes that should be passed to the
* appropriate endpoint filter to produce the set of effective endpoints.
* When we have a model that beats the performance of the baseline, we will likely define the
* candidate endpoints based on the most confident predictions of the model.
* - An *effective* endpoint is a candidate endpoint which passes through the endpoint filter.
* In other words, it is a candidate endpoint for which the `isEffectiveSink` (or
* `isEffectiveSource`) predicate defined in the `ATMConfig` instance in scope holds.
*/
module EntityToEffectiveEndpoint {
private import CodeToFeatures
/**
* Returns endpoint candidates within the specified entities.
*
* The baseline implementation of this is that a candidate endpoint is any data flow node that is
* enclosed within the specified entity.
*/
private DataFlow::Node getABaselineEndpointCandidate(DatabaseFeatures::Entity entity) {
result.getContainer().getEnclosingContainer*() = entity.getDefinedFunction()
}
/**
* Get an effective source enclosed by the specified entity.
*
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
* source may occur in a function defined within the specified entity.
*/
DataFlow::Node getAnEffectiveSource(DatabaseFeatures::Entity entity) {
result = getABaselineEndpointCandidate(entity) and
getCfg().isEffectiveSource(result)
}
/**
* Get an effective sink enclosed by the specified entity.
*
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
* sink may occur in a function defined within the specified entity.
*/
DataFlow::Node getAnEffectiveSink(DatabaseFeatures::Entity entity) {
result = getABaselineEndpointCandidate(entity) and
getCfg().isEffectiveSink(result)
}
}
/**
* Scoring information produced by a scoring model.
*

View File

@@ -0,0 +1,444 @@
/*
* For internal use only.
*
* Extracts data about the functions in the database for use in adaptive threat modeling (ATM).
*/
module Raw {
private import javascript as raw
class RawAstNode = raw::ASTNode;
class Entity = raw::Function;
class Location = raw::Location;
/**
* Exposed as a tool for defining anchors for semantic search.
*/
class UnderlyingFunction = raw::Function;
/**
* Determines whether an entity should be omitted from ATM.
*/
predicate isEntityIgnored(Entity entity) {
// Ignore entities which don't have definitions, for example those in TypeScript
// declaration files.
not exists(entity.getBody())
or
// Ignore entities with an empty body, for example the JavaScript function () => {}.
entity.getNumBodyStmt() = 0 and not exists(entity.getAReturnedExpr())
}
newtype WrappedAstNode = TAstNode(RawAstNode rawNode)
/**
* This class represents nodes in the AST.
*/
class AstNode extends TAstNode {
RawAstNode rawNode;
AstNode() { this = TAstNode(rawNode) }
AstNode getAChildNode() { result = TAstNode(rawNode.getAChild()) }
AstNode getParentNode() { result = TAstNode(rawNode.getParent()) }
/**
* Holds if the AST node has `result` as its `index`th attribute.
*
* The index is not intended to mean anything, and is only here for disambiguation.
* There are no guarantees about any particular index being used (or not being used).
*/
string astNodeAttribute(int index) {
(
// NB: Unary and binary operator expressions e.g. -a, a + b and compound
// assignments e.g. a += b can be identified by the expression type.
result = rawNode.(raw::Identifier).getName()
or
// Computed property accesses for which we can predetermine the property being accessed.
// NB: May alias with operators e.g. could have '+' as a property name.
result = rawNode.(raw::IndexExpr).getPropertyName()
or
// We use `getRawValue` to give us distinct representations for `0xa`, `0xA`, and `10`.
result = rawNode.(raw::NumberLiteral).getRawValue()
or
// We use `getValue` rather than `getRawValue` so we assign `"a"` and `'a'` the same representation.
not rawNode instanceof raw::NumberLiteral and
result = rawNode.(raw::Literal).getValue()
or
result = rawNode.(raw::TemplateElement).getRawValue()
) and
index = 0
}
/**
* Returns a string indicating the "type" of the AST node.
*/
string astNodeType() {
// The definition of this method should correspond with that of the `@ast_node` entry in the
// dbscheme.
result = "js_exprs." + any(int kind | exprs(rawNode, kind, _, _, _))
or
result = "js_properties." + any(int kind | properties(rawNode, _, _, kind, _))
or
result = "js_stmts." + any(int kind | stmts(rawNode, kind, _, _, _))
or
result = "js_toplevel" and rawNode instanceof raw::TopLevel
or
result = "js_typeexprs." + any(int kind | typeexprs(rawNode, kind, _, _, _))
}
/**
* Holds if `result` is the `index`'th child of the AST node, for some arbitrary indexing.
* A root of the AST should be its own child, with an arbitrary (though conventionally
* 0) index.
*
* Notably, the order in which child nodes are visited is not required to be meaningful,
* and no particular index is required to be meaningful. However, `(parent, index)`
* should be a keyset.
*/
pragma[nomagic]
AstNode astNodeChild(int index) {
result =
rank[index - 1](AstNode child, raw::Location l |
child = this.getAChildNode() and l = child.getLocation()
|
child
order by
l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn(),
child.astNodeType()
)
or
not exists(result.getParentNode()) and this = result and index = 0
}
raw::Location getLocation() { result = rawNode.getLocation() }
string toString() { result = rawNode.toString() }
predicate isEntityNameNode(Entity entity) {
exists(int index |
TAstNode(entity) = this.getParentNode() and
this = this.getParentNode().astNodeChild(index) and
// An entity name node must be the first child of the entity.
index = min(int otherIndex | exists(this.getParentNode().astNodeChild(otherIndex))) and
entity.getName() = rawNode.(raw::VarDecl).getName()
)
}
}
/**
* Holds if `result` is the `index`'th child of the `parent` entity. Such
* a node is a root of an AST associated with this entity.
*/
AstNode entityChild(AstNode parent, int index) {
// In JavaScript, entities appear in the AST parent/child relationship.
result = parent.astNodeChild(index)
}
/**
* Holds if `node` is contained in `entity`. Note that a single node may be contained
* in multiple entities, if they are nested. An entity, in particular, should be
* reported as contained within itself.
*/
predicate entityContains(Entity entity, AstNode node) {
node.getParentNode*() = TAstNode(entity) and not node.isEntityNameNode(entity)
}
/**
* Get the name of the entity.
*
* We attempt to assign unnamed entities approximate names if they are passed to a likely
* external library function. If we can't assign them an approximate name, we give them the name
* `""`, so that these entities are included in `AdaptiveThreatModeling.qll`.
*
* For entities which have multiple names, we choose the lexically smallest name.
*/
string getEntityName(Entity entity) {
if exists(entity.getName())
then
// https://github.com/github/ml-ql-adaptive-threat-modeling/issues/244 discusses making use
// of all the names during training.
result = min(entity.getName())
else
if exists(getApproximateNameForEntity(entity))
then result = getApproximateNameForEntity(entity)
else result = ""
}
/**
* Holds if the call `call` has `entity` is its `argumentIndex`th argument.
*/
private predicate entityUsedAsArgumentToCall(
Entity entity, raw::DataFlow::CallNode call, int argumentIndex
) {
raw::DataFlow::localFlowStep*(call.getArgument(argumentIndex), entity.flow())
}
/**
* Returns a generated name for the entity. This name is generated such that
* entities with the same names have similar behavior.
*/
private string getApproximateNameForEntity(Entity entity) {
count(raw::DataFlow::CallNode call, int index | entityUsedAsArgumentToCall(entity, call, index)) =
1 and
exists(raw::DataFlow::CallNode call, int index, string basePart |
entityUsedAsArgumentToCall(entity, call, index) and
(
if count(getReceiverName(call)) = 1
then basePart = getReceiverName(call) + "."
else basePart = ""
) and
result = basePart + call.getCalleeName() + "#functionalargument"
)
}
private string getReceiverName(raw::DataFlow::CallNode call) {
result = call.getReceiver().asExpr().(raw::VarAccess).getName()
}
/** Consistency checks: these predicates should each have no results */
module Consistency {
/** `getEntityName` should assign each entity a single name. */
query predicate entityWithManyNames(Entity entity, string name) {
name = getEntityName(entity) and
count(getEntityName(entity)) > 1
}
query predicate nodeWithNoType(AstNode node) { not exists(node.astNodeType()) }
query predicate nodeWithManyTypes(AstNode node, string type) {
type = node.astNodeType() and
count(node.astNodeType()) > 1
}
query predicate nodeWithNoParent(AstNode node, string type) {
not node = any(AstNode parent).astNodeChild(_) and
type = node.astNodeType() and
not exists(RawAstNode rawNode | node = TAstNode(rawNode) and rawNode instanceof raw::Module)
}
query predicate duplicateChildIndex(AstNode parent, int index, AstNode child) {
child = parent.astNodeChild(index) and
count(parent.astNodeChild(index)) > 1
}
query predicate duplicateAttributeIndex(AstNode node, int index) {
exists(node.astNodeAttribute(index)) and
count(node.astNodeAttribute(index)) > 1
}
}
}
module Wrapped {
/*
* We require any node with attributes to be a leaf. Where a non-leaf node
* has an attribute, we instead create a synthetic leaf node that has that
* attribute.
*/
/**
* Holds if the AST node `e` is a leaf node.
*/
private predicate isLeaf(Raw::AstNode e) { not exists(e.astNodeChild(_)) }
newtype WrappedEntity =
TEntity(Raw::Entity entity) {
exists(entity.getLocation().getFile().getRelativePath()) and
Raw::entityContains(entity, _)
}
/**
* A type ranging over the kinds of entities for which we want to consider embeddings.
*/
class Entity extends WrappedEntity {
Raw::Entity rawEntity;
Entity() { this = TEntity(rawEntity) and not Raw::isEntityIgnored(rawEntity) }
string getName() { result = Raw::getEntityName(rawEntity) }
AstNode getAstRoot(int index) {
result = TAstNode(rawEntity, Raw::entityChild(Raw::TAstNode(rawEntity), index))
}
string toString() { result = rawEntity.toString() }
Raw::Location getLocation() { result = rawEntity.getLocation() }
Raw::UnderlyingFunction getDefinedFunction() { result = rawEntity }
}
newtype WrappedAstNode =
TAstNode(Raw::Entity enclosingEntity, Raw::AstNode node) {
Raw::entityContains(enclosingEntity, node)
} or
TSyntheticNode(
Raw::Entity enclosingEntity, Raw::AstNode node, int syntheticChildIndex, int attrIndex
) {
Raw::entityContains(enclosingEntity, node) and
exists(node.astNodeAttribute(attrIndex)) and
not isLeaf(node) and
if exists(node.astNodeChild(_))
then
syntheticChildIndex =
attrIndex - min(int other | exists(node.astNodeAttribute(other))) +
max(int other | exists(node.astNodeChild(other))) + 1
else syntheticChildIndex = attrIndex
}
pragma[nomagic]
private AstNode injectedChild(Raw::Entity enclosingEntity, Raw::AstNode parent, int index) {
result = TAstNode(enclosingEntity, parent.astNodeChild(index)) or
result = TSyntheticNode(enclosingEntity, parent, index, _)
}
/**
* A type ranging over AST nodes. Ultimately, only nodes contained in entities will
* be considered.
*/
class AstNode extends WrappedAstNode {
Raw::Entity enclosingEntity;
Raw::AstNode rawNode;
AstNode() {
(
this = TAstNode(enclosingEntity, rawNode) or
this = TSyntheticNode(enclosingEntity, rawNode, _, _)
) and
not Raw::isEntityIgnored(enclosingEntity)
}
string getAttribute(int index) {
result = rawNode.astNodeAttribute(index) and
not exists(TSyntheticNode(enclosingEntity, rawNode, _, index))
}
string getType() { result = rawNode.astNodeType() }
AstNode getChild(int index) { result = injectedChild(enclosingEntity, rawNode, index) }
string toString() { result = this.getType() }
Raw::Location getLocation() { result = rawNode.getLocation() }
}
/**
* A synthetic AST node, created to be a leaf for an otherwise non-leaf attribute.
*/
class SyntheticAstNode extends AstNode, TSyntheticNode {
int childIndex;
int attributeIndex;
SyntheticAstNode() {
this = TSyntheticNode(enclosingEntity, rawNode, childIndex, attributeIndex)
}
override string getAttribute(int index) {
result = rawNode.astNodeAttribute(attributeIndex) and index = attributeIndex
}
override string getType() {
result = rawNode.astNodeType() + "::<synthetic " + childIndex + ">"
}
override AstNode getChild(int index) { none() }
}
}
module DatabaseFeatures {
/**
* Exposed as a tool for defining anchors for semantic search.
*/
class UnderlyingFunction = Raw::UnderlyingFunction;
private class Location = Raw::Location;
private newtype TEntityOrAstNode =
TEntity(Wrapped::Entity entity) or
TAstNode(Wrapped::AstNode astNode)
class EntityOrAstNode extends TEntityOrAstNode {
abstract string getType();
abstract string toString();
abstract Location getLocation();
}
class Entity extends EntityOrAstNode, TEntity {
Wrapped::Entity entity;
Entity() { this = TEntity(entity) }
string getName() { result = entity.getName() }
AstNode getAstRoot(int index) { result = TAstNode(entity.getAstRoot(index)) }
override string getType() { result = "javascript function" }
override string toString() { result = "Entity: " + this.getName() }
override Location getLocation() { result = entity.getLocation() }
UnderlyingFunction getDefinedFunction() { result = entity.getDefinedFunction() }
}
class AstNode extends EntityOrAstNode, TAstNode {
Wrapped::AstNode rawNode;
AstNode() { this = TAstNode(rawNode) }
AstNode getChild(int index) { result = TAstNode(rawNode.getChild(index)) }
string getAttribute(int index) { result = rawNode.getAttribute(index) }
override string getType() { result = rawNode.getType() }
override string toString() { result = this.getType() }
override Location getLocation() { result = rawNode.getLocation() }
}
/** Consistency checks: these predicates should each have no results */
module Consistency {
query predicate nonLeafAttribute(AstNode node, int index, string attribute) {
attribute = node.getAttribute(index) and
exists(node.getChild(_))
}
}
query predicate entities(
Entity entity, string entity_name, string entity_type, string path, int startLine,
int startColumn, int endLine, int endColumn, string absolutePath
) {
entity_name = entity.getName() and
entity_type = entity.getType() and
exists(Location l | l = entity.getLocation() |
path = l.getFile().getRelativePath() and
absolutePath = l.getFile().getAbsolutePath() and
l.hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
)
}
query predicate astNodes(
Entity enclosingEntity, EntityOrAstNode parent, int index, AstNode node, string node_type
) {
node = enclosingEntity.getAstRoot(index) and
parent = enclosingEntity and
node_type = node.getType()
or
astNodes(enclosingEntity, _, _, parent, _) and
node = parent.(AstNode).getChild(index) and
node_type = node.getType()
}
query predicate nodeAttributes(AstNode node, string attr) {
// Only get attributes of AST nodes we extract.
// This excludes nodes in standard libraries since the standard library files
// are located outside the source root.
astNodes(_, _, _, node, _) and
attr = node.getAttribute(_)
}
}

View File

@@ -5,8 +5,21 @@
*/
import javascript
private import FeaturizationConfig
private import FunctionBodyFeatures as FunctionBodyFeatures
import CodeToFeatures
private import EndpointScoring
/**
* A configuration that defines which endpoints should be featurized.
*
* This is used as a performance optimization to ensure that we only featurize the endpoints we need
* to featurize.
*/
abstract class FeaturizationConfig extends string {
bindingset[this]
FeaturizationConfig() { any() }
abstract DataFlow::Node getAnEndpointToFeaturize();
}
/**
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
@@ -18,16 +31,14 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
(
// Features for endpoints that are contained within a function.
exists(Function function |
function = FunctionBodyFeatures::getRepresentativeFunctionForEndpoint(endpoint)
|
exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) |
// The name of the function that encloses the endpoint.
featureName = "enclosingFunctionName" and result = FunctionNames::getNameToFeaturize(function)
featureName = "enclosingFunctionName" and result = entity.getName()
or
// A feature containing natural language tokens from the function that encloses the endpoint in
// the order that they appear in the source code.
featureName = "enclosingFunctionBody" and
result = FunctionBodyFeatures::getBodyTokensFeature(function)
result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
)
or
result =
@@ -74,10 +85,11 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
*
* This may in general report multiple strings, each containing a space-separated list of tokens.
*
* **Technical details:** This predicate can have multiple values per endpoint and feature name. As
* a result, the results from this predicate must be concatenated together. However concatenating
* other features like the function body tokens is expensive, so for performance reasons we separate
* out this predicate from those other features.
* **Technical details:** This predicate can have multiple values per endpoint and feature name. As a
* result, the results from this predicate must be concatenated together. However concatenating
* other features like the function body tokens is expensive, so we separate out this predicate
* from others like `FunctionBodies::getBodyTokenFeatureForEntity` to avoid having to perform this
* concatenation operation on other features like the function body tokens.
*/
private string getACallBasedTokenFeatureComponent(
DataFlow::Node endpoint, DataFlow::CallNode call, string featureName
@@ -114,6 +126,47 @@ private string getACallBasedTokenFeatureComponent(
)
}
/** This module provides functionality for getting the function body feature associated with a particular entity. */
module FunctionBodies {
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) {
// Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
entity =
getRepresentativeEntityForEndpoint(any(FeaturizationConfig cfg).getAnEndpointToFeaturize()) and
// Performance optimization: If a function has more than 256 body tokens, then featurize it as
// absent. This approximates the behavior of the classifer on non-generic body features where
// large body features are replaced by the absent token.
//
// We count nodes instead of tokens because tokens are often not unique.
strictcount(DatabaseFeatures::AstNode node |
DatabaseFeatures::astNodes(entity, _, _, node, _) and
exists(string t | DatabaseFeatures::nodeAttributes(node, t))
) <= 256 and
exists(DatabaseFeatures::AstNode node |
DatabaseFeatures::astNodes(entity, _, _, node, _) and
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and
location = node.getLocation()
)
}
/**
* Gets the body token feature for the specified entity.
*
* This is a string containing natural language tokens in the order that they appear in the source code for the entity.
*/
string getBodyTokenFeatureForEntity(DatabaseFeatures::Entity entity) {
result =
strictconcat(string token, Location l |
bodyTokens(entity, l, token)
|
token, " "
order by
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
l.getEndColumn(), token
)
}
}
/**
* This module provides functionality for getting a representation of the access path of nodes
* within the program.
@@ -232,59 +285,8 @@ private module AccessPaths {
}
}
private module FunctionNames {
/**
* Get the name of the function.
*
* We attempt to assign unnamed entities approximate names if they are passed to a likely
* external library function. If we can't assign them an approximate name, we give them the name
* `""`, so that these entities are included in `AdaptiveThreatModeling.qll`.
*
* For entities which have multiple names, we choose the lexically smallest name.
*/
string getNameToFeaturize(Function function) {
if exists(function.getName())
then result = min(function.getName())
else
if exists(getApproximateNameForFunction(function))
then result = getApproximateNameForFunction(function)
else result = ""
}
/**
* Holds if the call `call` has `function` is its `argumentIndex`th argument.
*/
private predicate functionUsedAsArgumentToCall(
Function function, DataFlow::CallNode call, int argumentIndex
) {
DataFlow::localFlowStep*(call.getArgument(argumentIndex), function.flow())
}
/**
* Returns a generated name for the function. This name is generated such that
* entities with the same names have similar behavior.
*/
private string getApproximateNameForFunction(Function function) {
count(DataFlow::CallNode call, int index | functionUsedAsArgumentToCall(function, call, index)) =
1 and
exists(DataFlow::CallNode call, int index, string basePart |
functionUsedAsArgumentToCall(function, call, index) and
(
if count(getReceiverName(call)) = 1
then basePart = getReceiverName(call) + "."
else basePart = ""
) and
result = basePart + call.getCalleeName() + "#functionalargument"
)
}
private string getReceiverName(DataFlow::CallNode call) {
result = call.getReceiver().asExpr().(VarAccess).getName()
}
}
/** Get a name of a supported generic token-based feature. */
string getASupportedFeatureName() {
private string getASupportedFeatureName() {
result =
[
"enclosingFunctionName", "calleeName", "receiverName", "argumentIndex", "calleeApiName",
@@ -301,5 +303,12 @@ string getASupportedFeatureName() {
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
// Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
featureValue = getTokenFeature(endpoint, featureName)
(
if strictcount(getTokenFeature(endpoint, featureName)) = 1
then featureValue = getTokenFeature(endpoint, featureName)
else (
// Performance note: this is a Cartesian product between all endpoints and feature names.
featureValue = "" and featureName = getASupportedFeatureName()
)
)
}

View File

@@ -5,21 +5,86 @@
*/
private import javascript
private import BaseScoring
private import EndpointFeatures as EndpointFeatures
private import FeaturizationConfig
private import EndpointTypes
import BaseScoring
import CodeToFeatures
import EndpointFeatures as EndpointFeatures
import EndpointTypes
private string getACompatibleModelChecksum() {
availableMlModels(result, "javascript", _, "atm-endpoint-scoring")
}
/**
* The maximum number of AST nodes an entity containing an endpoint should have before we should
* choose a smaller entity to represent the endpoint.
*
* This is intended to represent a balance in terms of the amount of context we provide to the
* model: we don't want the function to be too small, because then it doesn't contain very much
* context and miss useful information, but also we don't want it to be too large, because then
* there's likely to be a lot of irrelevant or very loosely related context.
*/
private int getMaxNumAstNodes() { result = 1024 }
/**
* Returns the number of AST nodes contained within the specified entity.
*/
private int getNumAstNodesInEntity(DatabaseFeatures::Entity entity) {
// Restrict the values `entity` can take on
entity = EndpointToEntity::getAnEntityForEndpoint(_) and
result =
count(DatabaseFeatures::AstNode astNode | DatabaseFeatures::astNodes(entity, _, _, astNode, _))
}
/**
* Get a single entity to use as the representative entity for the endpoint.
*
* We try to use the largest entity containing the endpoint that's below the AST node limit defined
* in `getMaxNumAstNodes`. In the event of a tie, we use the entity that appears first within the
* source archive.
*
* If no entities are smaller than the AST node limit, then we use the smallest entity containing
* the endpoint.
*/
DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpoint) {
// Check whether there's an entity containing the endpoint that's smaller than the AST node limit.
if
getNumAstNodesInEntity(EndpointToEntity::getAnEntityForEndpoint(endpoint)) <=
getMaxNumAstNodes()
then
// Use the largest entity smaller than the AST node limit, resolving ties using the entity that
// appears first in the source archive.
result =
min(DatabaseFeatures::Entity entity, int numAstNodes, Location l |
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
numAstNodes = getNumAstNodesInEntity(entity) and
numAstNodes <= getMaxNumAstNodes() and
l = entity.getLocation()
|
entity
order by
numAstNodes desc, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
)
else
// Use the smallest entity, resolving ties using the entity that
// appears first in the source archive.
result =
min(DatabaseFeatures::Entity entity, int numAstNodes, Location l |
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
numAstNodes = getNumAstNodesInEntity(entity) and
l = entity.getLocation()
|
entity
order by
numAstNodes, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
)
}
module ModelScoring {
/**
* A featurization config that only featurizes new candidate endpoints that are part of a flow
* path.
*/
class RelevantFeaturizationConfig extends FeaturizationConfig {
class RelevantFeaturizationConfig extends EndpointFeatures::FeaturizationConfig {
RelevantFeaturizationConfig() { this = "RelevantFeaturization" }
override DataFlow::Node getAnEndpointToFeaturize() {
@@ -30,15 +95,15 @@ module ModelScoring {
}
DataFlow::Node getARequestedEndpoint() {
result = any(FeaturizationConfig cfg).getAnEndpointToFeaturize()
result = any(EndpointFeatures::FeaturizationConfig cfg).getAnEndpointToFeaturize()
}
private int getARequestedEndpointType() { result = any(EndpointType type).getEncoding() }
predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) =
scoreEndpoints(getARequestedEndpoint/0, EndpointFeatures::tokenFeatures/3,
EndpointFeatures::getASupportedFeatureName/0, getARequestedEndpointType/0,
getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score)
scoreEndpoints(getARequestedEndpoint/0, getARequestedEndpointType/0,
EndpointFeatures::tokenFeatures/3, getACompatibleModelChecksum/0)(endpoint,
encodedEndpointType, score)
}
/**

View File

@@ -1,14 +0,0 @@
import javascript
/**
* A configuration that defines which endpoints should be featurized.
*
* This is used as a performance optimization to ensure that we only featurize the endpoints we need
* to featurize.
*/
abstract class FeaturizationConfig extends string {
bindingset[this]
FeaturizationConfig() { any() }
abstract DataFlow::Node getAnEndpointToFeaturize();
}

View File

@@ -1,146 +0,0 @@
/*
* FunctionBodyFeatures.qll
*
* Contains logic relating to the `enclosingFunctionBody` and `enclosingFunctionName` features.
*/
import javascript
private import FeaturizationConfig
string getTokenizedAstNode(ASTNode node) {
// NB: Unary and binary operator expressions e.g. -a, a + b and compound
// assignments e.g. a += b can be identified by the expression type.
result = node.(Identifier).getName()
or
// Computed property accesses for which we can predetermine the property being accessed.
// NB: May alias with operators e.g. could have '+' as a property name.
result = node.(IndexExpr).getPropertyName()
or
// We use `getRawValue` to give us distinct representations for `0xa`, `0xA`, and `10`.
result = node.(NumberLiteral).getRawValue()
or
// We use `getValue` rather than `getRawValue` so we assign `"a"` and `'a'` the same representation.
not node instanceof NumberLiteral and
result = node.(Literal).getValue()
or
result = node.(TemplateElement).getRawValue()
}
/** Returns an AST node within the function `f` that we should featurize. */
pragma[inline]
ASTNode getAnASTNodeToFeaturize(Function f) {
result.getParent*() = f and
// Don't featurize the function name as part of the function body tokens
not result = f.getIdentifier()
}
/**
* Get a function containing the endpoint that is suitable for featurization. In general, this
* can associate an endpoint to multiple functions, since functions can be nested in JavaScript.
*/
Function getAFunctionForEndpoint(DataFlow::Node endpoint) {
// Performance optimization: Restrict the set of endpoints to the endpoints to featurize.
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
result = endpoint.getContainer().getEnclosingContainer*()
}
/**
* The maximum number of AST nodes an function containing an endpoint should have before we should
* choose a smaller function to represent the endpoint.
*
* This is intended to represent a balance in terms of the amount of context we provide to the
* model: we don't want the function to be too small, because then it doesn't contain very much
* context and miss useful information, but also we don't want it to be too large, because then
* there's likely to be a lot of irrelevant or very loosely related context.
*/
private int getMaxNumAstNodes() { result = 1024 }
/**
* Returns the number of AST nodes contained within the specified function.
*/
private int getNumAstNodesInFunction(Function function) {
// Restrict the values `function` can take on
function = getAFunctionForEndpoint(_) and
result = count(getAnASTNodeToFeaturize(function))
}
/**
* Get the enclosing function for an endpoint.
*
* This is used to compute the `enclosingFunctionBody` and `enclosingFunctionName` features.
*
* We try to use the largest function containing the endpoint that's below the AST node limit
* defined in `getMaxNumAstNodes`. In the event of a tie, we use the function that appears first
* within the source code.
*
* If no functions are smaller than the AST node limit, then we use the smallest function containing
* the endpoint.
*/
Function getRepresentativeFunctionForEndpoint(DataFlow::Node endpoint) {
// Check whether there's a function containing the endpoint that's smaller than the AST node
// limit.
if getNumAstNodesInFunction(getAFunctionForEndpoint(endpoint)) <= getMaxNumAstNodes()
then
// Use the largest function smaller than the AST node limit, resolving ties using the function
// that appears first in the source code.
result =
min(Function function, int numAstNodes, Location l |
function = getAFunctionForEndpoint(endpoint) and
numAstNodes = getNumAstNodesInFunction(function) and
numAstNodes <= getMaxNumAstNodes() and
l = function.getLocation()
|
function
order by
numAstNodes desc, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
)
else
// Use the smallest function, resolving ties using the function that appears first in the source
// code.
result =
min(Function function, int numAstNodes, Location l |
function = getAFunctionForEndpoint(endpoint) and
numAstNodes = getNumAstNodesInFunction(function) and
l = function.getLocation()
|
function
order by
numAstNodes, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
)
}
/** Returns an AST node within the function `f` that an associated token feature. */
ASTNode getAnASTNodeWithAFeature(Function f) {
// Performance optimization: Restrict the set of functions to those containing an endpoint to featurize.
f = getRepresentativeFunctionForEndpoint(any(FeaturizationConfig cfg).getAnEndpointToFeaturize()) and
result = getAnASTNodeToFeaturize(f)
}
/** Holds if `location` is the location of an AST node within the function `function` and `token` is a node attribute associated with that AST node. */
string getBodyTokensFeature(Function function) {
// Performance optimization: If a function has more than 256 body subtokens, then featurize it as absent. This
// approximates the behavior of the classifer on non-generic body features where large body
// features are replaced by the absent token.
//
// We count nodes instead of tokens because tokens are often not unique.
strictcount(ASTNode node |
node = getAnASTNodeToFeaturize(function) and
exists(getTokenizedAstNode(node))
) <= 256 and
result =
strictconcat(Location l, string token |
// The use of a nested exists here allows us to avoid duplicates due to two AST nodes in the
// same location featurizing to the same token. By using a nested exists, we take only unique
// (location, token) pairs.
exists(ASTNode node |
node = getAnASTNodeToFeaturize(function) and
token = getTokenizedAstNode(node) and
l = node.getLocation()
)
|
token, " "
order by
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
l.getEndColumn(), token
)
}

View File

@@ -53,11 +53,7 @@ predicate isSomeModeledArgument(DataFlow::Node n) {
/**
* Holds if `n` appears to be a numeric value.
*/
// Performance optimisation: This predicate operates on a large set of
// starting nodes, so use binding hints to suggest computing that set last.
predicate isNumeric(DataFlow::Node n) {
getAnAccessedName(pragma[only_bind_into](n)).regexpMatch(".*index.*")
}
predicate isNumeric(DataFlow::Node n) { isReadFrom(n, ".*index.*") }
/**
* Holds if `n` is an argument to a library without sinks.

View File

@@ -1,5 +1,5 @@
name: codeql/javascript-experimental-atm-lib
version: 0.0.2
version: 0.0.4
extractor: javascript
library: true
groups:

View File

@@ -1,6 +1,6 @@
name: codeql/javascript-experimental-atm-queries
language: javascript
version: 0.0.2
version: 0.0.4
suites: codeql-suites
defaultSuiteFile: codeql-suites/javascript-atm-code-scanning.qls
groups:

View File

@@ -84,6 +84,11 @@ abstract class FileNameSource extends DataFlow::Node { }
abstract class DatabaseAccess extends DataFlow::Node {
/** Gets an argument to this database access that is interpreted as a query. */
abstract DataFlow::Node getAQueryArgument();
/** Gets a node to which a result of the access may flow. */
DataFlow::Node getAResult() {
none() // Overridden in subclass
}
}
/**

View File

@@ -462,15 +462,15 @@ module AccessPath {
ReachableBasicBlock bb, Root root, string path, int ranking, AccessPathKind type
) {
result =
rank[ranking](ControlFlowNode ref, int i |
rank[ranking](ControlFlowNode ref |
ref = getAccessTo(root, path, _) and
ref = bb.getNode(i) and
ref.getBasicBlock() = bb and
// Prunes the accesses where there does not exists a read and write within the same basicblock.
// This could be more precise, but doing it like this avoids massive joins.
hasRead(bb) and
hasWrite(bb)
|
ref order by i
ref order by any(int i | ref = bb.getNode(i))
) and
result = getAccessTo(root, path, type)
}
@@ -492,7 +492,7 @@ module AccessPath {
*/
pragma[noinline]
private predicate hasWrite(ReachableBasicBlock bb) {
bb = getAccessTo(_, _, AccessPathWrite()).getBasicBlock()
bb = getAccessTo(_, _, AccessPathRead()).getBasicBlock()
}
/**
@@ -565,12 +565,9 @@ module AccessPath {
)
or
// across basic blocks.
exists(Root root, string path, ReachableBasicBlock readBlock |
exists(Root root, string path |
read.asExpr() = getAccessTo(root, path, AccessPathRead()) and
readBlock = read.getBasicBlock() and
// Performance optimisation: check that `read` is in a *reachable* basic block
// before looking for a dominating write block.
getAWriteBlock(root, path).strictlyDominates(pragma[only_bind_out](readBlock))
getAWriteBlock(root, path).strictlyDominates(read.getBasicBlock())
)
}
}

View File

@@ -217,7 +217,7 @@ private class AnalyzedImplicitInit extends AnalyzedSsaDefinition, SsaImplicitIni
*/
private class AnalyzedVariableCapture extends AnalyzedSsaDefinition, SsaVariableCapture {
override AbstractValue getAnRhsValue() {
exists(LocalVariable v | v = this.getSourceVariable() |
exists(LocalVariable v | v = getSourceVariable() |
result = v.(AnalyzedCapturedVariable).getALocalValue()
or
result = any(AnalyzedExplicitDefinition def | def.getSourceVariable() = v).getAnRhsValue()

View File

@@ -56,7 +56,9 @@ predicate isGeneratedCodeFile(File f) { isGenerated(f.getATopLevel()) }
predicate isTestFile(File f) {
exists(Test t | t.getFile() = f)
or
f = getATestFile(_)
exists(string stemExt | stemExt = "test" or stemExt = "spec" |
f = getTestFile(any(File orig), stemExt)
)
or
f.getAbsolutePath().regexpMatch(".*/__(mocks|tests)__/.*")
}

View File

@@ -46,17 +46,35 @@ module Knex {
RawKnexSqlString() { this = any(RawKnexCall call).getArgument(0).asExpr() }
}
/** A call that triggers a SQL query submission. */
private class KnexDatabaseAccess extends DatabaseAccess {
KnexDatabaseAccess() {
this = knexObject().getMember(["then", "stream", "asCallback"]).getACall()
/** A call that triggers a SQL query submission by calling then/stream/asCallback. */
private class KnexDatabaseCallback extends DatabaseAccess, DataFlow::CallNode {
string member;
KnexDatabaseCallback() {
member = ["then", "stream", "asCallback"] and
this = knexObject().getMember(member).getACall()
}
override DataFlow::Node getAResult() {
member = "then" and
result = this.getCallback(0).getParameter(0)
or
exists(AwaitExpr await |
this = await.flow() and
await.getOperand() = knexObject().getAUse().asExpr()
)
member = "asCallback" and
result = this.getCallback(0).getParameter(1)
}
override DataFlow::Node getAQueryArgument() { none() }
}
private class KnexDatabaseAwait extends DatabaseAccess, DataFlow::ValueNode {
KnexDatabaseAwait() {
exists(AwaitExpr enclosingAwait | this = enclosingAwait.flow() |
enclosingAwait.getOperand() = knexObject().getAUse().asExpr()
)
}
override DataFlow::Node getAResult() { result = this }
override DataFlow::Node getAQueryArgument() { none() }
}
}

View File

@@ -3,6 +3,7 @@
*/
import javascript
import semmle.javascript.Promises
module NoSQL {
/** An expression that is interpreted as a NoSQL query. */
@@ -65,6 +66,10 @@ private module MongoDB {
override DataFlow::Node getAQueryArgument() { result = this.getArgument(queryArgIdx) }
override DataFlow::Node getAResult() {
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
DataFlow::Node getACodeOperator() {
result = getADollarWhereProperty(this.getParameter(queryArgIdx))
}
@@ -537,12 +542,29 @@ private module Mongoose {
// NB: the complete information is not easily accessible for deeply chained calls
f.getQueryArgument().getARhs() = result
}
override DataFlow::Node getAResult() {
result = this.getCallback(this.getNumArgument() - 1).getParameter(1)
}
}
class ExplicitQueryEvaluation extends DatabaseAccess {
class ExplicitQueryEvaluation extends DatabaseAccess, DataFlow::CallNode {
string member;
ExplicitQueryEvaluation() {
// explicit execution using a Query method call
Query::getAMongooseQuery().getMember(["exec", "then", "catch"]).getACall() = this
member = ["exec", "then", "catch"] and
Query::getAMongooseQuery().getMember(member).getACall() = this
}
private int resultParamIndex() {
member = "then" and result = 0
or
member = "exec" and result = 1
}
override DataFlow::Node getAResult() {
result = this.getCallback(_).getParameter(this.resultParamIndex())
}
override DataFlow::Node getAQueryArgument() {
@@ -588,6 +610,10 @@ private module Minimongo {
override DataFlow::Node getAQueryArgument() { result = this.getArgument(queryArgIdx) }
override DataFlow::Node getAResult() {
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
DataFlow::Node getACodeOperator() {
result = getADollarWhereProperty(this.getParameter(queryArgIdx))
}
@@ -609,7 +635,7 @@ private module Minimongo {
* Provides classes modeling the MarsDB library.
*/
private module MarsDB {
private class MarsDBAccess extends DatabaseAccess {
private class MarsDBAccess extends DatabaseAccess, DataFlow::CallNode {
string method;
MarsDBAccess() {
@@ -623,21 +649,29 @@ private module MarsDB {
string getMethod() { result = method }
override DataFlow::Node getAResult() {
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() { none() }
}
/** A call to a MarsDB query method. */
private class QueryCall extends DatabaseAccess, API::CallNode {
private class QueryCall extends MarsDBAccess, API::CallNode {
int queryArgIdx;
QueryCall() {
exists(string m |
this.(MarsDBAccess).getMethod() = m and
this.getMethod() = m and
// implements parts of the Minimongo interface
Minimongo::CollectionMethodSignatures::interpretsArgumentAsQuery(m, queryArgIdx)
)
}
override DataFlow::Node getAResult() {
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() { result = this.getArgument(queryArgIdx) }
DataFlow::Node getACodeOperator() {
@@ -744,9 +778,13 @@ private module Redis {
/**
* An access to a database through redis
*/
class RedisDatabaseAccess extends DatabaseAccess {
class RedisDatabaseAccess extends DatabaseAccess, DataFlow::CallNode {
RedisDatabaseAccess() { this = redis().getMember(_).getACall() }
override DataFlow::Node getAResult() {
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() { none() }
}
}
@@ -768,9 +806,13 @@ private module IoRedis {
/**
* An access to a database through ioredis
*/
class IoRedisDatabaseAccess extends DatabaseAccess {
class IoRedisDatabaseAccess extends DatabaseAccess, DataFlow::CallNode {
IoRedisDatabaseAccess() { this = ioredis().getMember(_).getACall() }
override DataFlow::Node getAResult() {
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() { none() }
}
}

View File

@@ -3,6 +3,7 @@
*/
import javascript
import semmle.javascript.Promises
module SQL {
/** A string-valued expression that is interpreted as a SQL command. */
@@ -81,6 +82,8 @@ private module MySql {
)
}
override DataFlow::Node getAResult() { result = this.getCallback(_).getParameter(1) }
override DataFlow::Node getAQueryArgument() { result = this.getArgument(0) }
}
@@ -178,6 +181,16 @@ private module Postgres {
private class QueryCall extends DatabaseAccess, DataFlow::MethodCallNode {
QueryCall() { this = [client(), pool()].getMember("query").getACall() }
override DataFlow::Node getAResult() {
this.getNumArgument() = 2 and
result = this.getCallback(1).getParameter(1)
or
this.getNumArgument() = 1 and
result = this.getAMethodCall("then").getCallback(0).getParameter(0)
or
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() { result = this.getArgument(0) }
}
@@ -322,6 +335,10 @@ private module Postgres {
)
}
override DataFlow::Node getAResult() {
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() {
result = this.getADirectQueryArgument()
or
@@ -370,6 +387,11 @@ private module Sqlite {
this = database().getMember("prepare").getACall()
}
override DataFlow::Node getAResult() {
result = this.getCallback(1).getParameter(1) or
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() { result = this.getArgument(0) }
}
@@ -413,13 +435,17 @@ private module MsSql {
API::Node pool() { result = mssqlClass("ConnectionPool") }
/** A tagged template evaluated as a query. */
private class QueryTemplateExpr extends DatabaseAccess, DataFlow::ValueNode {
private class QueryTemplateExpr extends DatabaseAccess, DataFlow::ValueNode, DataFlow::SourceNode {
override TaggedTemplateExpr astNode;
QueryTemplateExpr() {
mssql().getMember("query").getAUse() = DataFlow::valueNode(astNode.getTag())
}
override DataFlow::Node getAResult() {
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() {
result = DataFlow::valueNode(astNode.getTemplate().getAnElement())
}
@@ -429,6 +455,12 @@ private module MsSql {
private class QueryCall extends DatabaseAccess, DataFlow::MethodCallNode {
QueryCall() { this = [mssql(), request()].getMember(["query", "batch"]).getACall() }
override DataFlow::Node getAResult() {
result = this.getCallback(1).getParameter(1)
or
PromiseFlow::loadStep(this.getALocalUse(), result, Promises::valueProp())
}
override DataFlow::Node getAQueryArgument() { result = this.getArgument(0) }
}
@@ -505,6 +537,12 @@ private module Sequelize {
]
}
}
class SequelizeSource extends ModelInput::SourceModelCsv {
override predicate row(string row) {
row = "sequelize;Sequelize;Member[query].ReturnValue.Awaited;database-access-result"
}
}
}
private module SpannerCsv {
@@ -516,7 +554,10 @@ private module SpannerCsv {
"@google-cloud/spanner;;@google-cloud/spanner;;Member[Spanner]",
"@google-cloud/spanner;Database;@google-cloud/spanner;;ReturnValue.Member[instance].ReturnValue.Member[database].ReturnValue",
"@google-cloud/spanner;v1.SpannerClient;@google-cloud/spanner;;Member[v1].Member[SpannerClient].Instance",
"@google-cloud/spanner;Transaction;@google-cloud/spanner;Database;Member[runTransaction,runTransactionAsync].Argument[0..1].Parameter[1]",
"@google-cloud/spanner;Transaction;@google-cloud/spanner;Database;Member[runTransaction,runTransactionAsync,getTransaction].Argument[0..1].Parameter[1]",
"@google-cloud/spanner;Transaction;@google-cloud/spanner;Database;Member[getTransaction].ReturnValue.Awaited",
"@google-cloud/spanner;Snapshot;@google-cloud/spanner;Database;Member[getSnapshot].Argument[0..1].Parameter[1]",
"@google-cloud/spanner;Snapshot;@google-cloud/spanner;Database;Member[getSnapshot].ReturnValue.Awaited",
"@google-cloud/spanner;BatchTransaction;@google-cloud/spanner;Database;Member[batchTransaction].ReturnValue",
"@google-cloud/spanner;BatchTransaction;@google-cloud/spanner;Database;Member[createBatchTransaction].ReturnValue.Awaited",
"@google-cloud/spanner;~SqlExecutorDirect;@google-cloud/spanner;Database;Member[run,runPartitionedUpdate,runStream]",
@@ -539,4 +580,23 @@ private module SpannerCsv {
]
}
}
class SpannerSources extends ModelInput::SourceModelCsv {
string spannerClass() { result = ["v1.SpannerClient", "Database", "Transaction", "Snapshot",] }
string resultPath() {
result =
[
"Member[executeSql].Argument[0..].Parameter[1]",
"Member[executeSql].ReturnValue.Awaited.Member[0]", "Member[run].ReturnValue.Awaited",
"Member[run].Argument[0..].Parameter[1]",
]
}
override predicate row(string row) {
row =
"@google-cloud/spanner;" + this.spannerClass() + ";" + this.resultPath() +
";database-access-result"
}
}
}

View File

@@ -40,7 +40,7 @@ class BDDTest extends Test, @call_expr {
/**
* Gets the test file for `f` with stem extension `stemExt`.
* That is, a file named `<base>.<stemExt>.<ext>` in the
* That is, a file named file named `<base>.<stemExt>.<ext>` in the
* same directory as `f` which is named `<base>.<ext>`.
*/
bindingset[stemExt]
@@ -48,33 +48,6 @@ File getTestFile(File f, string stemExt) {
result = f.getParentContainer().getFile(f.getStem() + "." + stemExt + "." + f.getExtension())
}
/**
* Gets a test file for `f`.
* That is, a file named `<base>.<stemExt>.<ext>` in the
* same directory as `f`, where `f` is named `<base>.<ext>` and
* `<stemExt>` is a well-known test file identifier, such as `test` or `spec`.
*/
File getATestFile(File f) {
result = f.getParentContainer().getFile(getATestFileName(f))
}
/**
* Gets a name of a test file for `f`.
* That is, `<base>.<stemExt>.<ext>` where
* `f` is named `<base>.<ext>` and `<stemExt>` is
* a well-known test file identifier, such as `test` or `spec`.
*/
// Helper predicate factored out for performance.
// This predicate is linear in the size of f, and forces
// callers to join only once against f rather than two separate joins
// when computing the stem and the extension.
// This loses some flexibility because callers cannot specify
// an arbitrary stemExt.
pragma[nomagic]
private string getATestFileName(File f) {
result = f.getStem() + "." + ["test", "spec"] + "." + f.getExtension()
}
/**
* A Jest test, that is, an invocation of a global function named
* `test` where the first argument is a string and the second

View File

@@ -31,7 +31,7 @@
* - Instance: the value returned by a constructor call
* - Awaited: the value from a resolved promise/future-like object
* - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
* - Other langauge-specific tokens mentioned in `ModelsAsData.qll`.
* - Other language-specific tokens mentioned in `ModelsAsData.qll`.
* 4. The `input` and `output` columns specify how data enters and leaves the element selected by the
* first `(package, type, path)` tuple. Both strings are `.`-separated access paths
* of the same syntax as the `path` column.

View File

@@ -14,7 +14,7 @@ private import semmle.javascript.security.dataflow.CommandInjectionCustomization
abstract class HeuristicSource extends DataFlow::Node { }
/**
* An access to a password, viewed a source of remote flow.
* An access to a password, viewed as a source of remote flow.
*/
private class RemoteFlowPassword extends HeuristicSource, RemoteFlowSource {
RemoteFlowPassword() { isReadFrom(this, "(?is).*(password|passwd).*") }
@@ -52,3 +52,20 @@ class RemoteServerResponse extends HeuristicSource, RemoteFlowSource {
override string getSourceType() { result = "a response from a remote server" }
}
/**
* A remote flow source originating from a database access.
*/
private class RemoteFlowSourceFromDBAccess extends RemoteFlowSource, HeuristicSource {
RemoteFlowSourceFromDBAccess() {
this = ModelOutput::getASourceNode("database-access-result").getAUse() or
exists(DatabaseAccess dba | this = dba.getAResult())
}
override string getSourceType() { result = "Database access" }
override predicate isUserControlledObject() {
// NB. supported databases all might return JSON.
any()
}
}

View File

@@ -16,23 +16,15 @@ import javascript
*/
bindingset[regexp]
predicate isReadFrom(DataFlow::Node read, string regexp) {
getAnAccessedName(read).regexpMatch(regexp)
}
/**
* Gets the "name" accessed by `read`. The "name" is one of:
* - the name of the read variable, if `read` is a variable read
* - the name of the read property, if `read` is a property read
* - the suffix of the getter-method name, if `read` is a getter invocation, for example "Number" in "getNumber"
*/
string getAnAccessedName(DataFlow::Node read) {
exists(DataFlow::Node actualRead |
actualRead = read.asExpr().getUnderlyingValue().(LogOrExpr).getAnOperand().flow() or // unfold `x || y` once
actualRead = read
|
actualRead.asExpr().getUnderlyingValue().(VarAccess).getName() = result or
actualRead.(DataFlow::PropRead).getPropertyName() = result or
actualRead.(DataFlow::InvokeNode).getCalleeName() = "get" + result
exists(string name | name.regexpMatch(regexp) |
actualRead.asExpr().getUnderlyingValue().(VarAccess).getName() = name or
actualRead.(DataFlow::PropRead).getPropertyName() = name or
actualRead.(DataFlow::InvokeNode).getCalleeName() = "get" + name
)
)
}

View File

@@ -50,12 +50,8 @@ module CorsMisconfigurationForCredentials {
|
routeHandler.getAResponseHeader(_) = origin and
routeHandler.getAResponseHeader(_) = credentials and
// Performance optimisation: start with the set of all route handlers
// rather than the set of all exprs.
pragma[only_bind_into](origin)
.definesExplicitly("access-control-allow-origin", this.asExpr()) and
pragma[only_bind_into](credentials)
.definesExplicitly("access-control-allow-credentials", credentialsValue)
origin.definesExplicitly("access-control-allow-origin", this.asExpr()) and
credentials.definesExplicitly("access-control-allow-credentials", credentialsValue)
|
credentialsValue.mayHaveBooleanValue(true) or
credentialsValue.mayHaveStringValue("true")

View File

@@ -437,27 +437,8 @@ module DomBasedXss {
b = phi.getAnInput().getDefinition() and
count(phi.getAnInput()) = 2 and
not a = b and
/*
* Performance optimisation:
*
* When join-ordering and evaluating this conjunction,
* it is preferable to start with the relatively small set of
* `sanitizer` calls, then compute the set of SSA variables accessed
* as the arguments of those sanitizer calls, then reason about how
* those variables are used in phi nodes.
*
* Use directional binding pragmas to encourage this join order,
* starting with `sanitizer`.
*
* Without these pragmas, the join orderer may choose the opposite order:
* start with all `phi` nodes, then compute the set of SSA variables involved,
* then the (potentially large) set of accesses to those variables,
* then the set of accesses used as the argument of a sanitizer call.
*/
pragma[only_bind_out](sanitizer) = DataFlow::valueNode(a.getDef().getSource()) and
pragma[only_bind_out](sanitizer.getAnArgument().asExpr()) =
b.getSourceVariable().getAnAccess()
sanitizer = DataFlow::valueNode(a.getDef().getSource()) and
sanitizer.getAnArgument().asExpr().(VarAccess).getVariable() = b.getSourceVariable()
|
pred = DataFlow::ssaDefinitionNode(b) and
succ = DataFlow::ssaDefinitionNode(phi)

View File

@@ -539,6 +539,55 @@ private class EdgeLabel extends TInputSymbol {
}
}
/**
* A RegExp term that acts like a plus.
* Either it's a RegExpPlus, or it is a range {1,X} where X is >= 30.
* 30 has been chosen as a threshold because for exponential blowup 2^30 is enough to get a decent DOS attack.
*/
private class EffectivelyPlus extends RegExpTerm {
EffectivelyPlus() {
this instanceof RegExpPlus
or
exists(RegExpRange range |
range.getLowerBound() = 1 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a star.
* Either it's a RegExpStar, or it is a range {0,X} where X is >= 30.
*/
private class EffectivelyStar extends RegExpTerm {
EffectivelyStar() {
this instanceof RegExpStar
or
exists(RegExpRange range |
range.getLowerBound() = 0 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a question mark.
* Either it's a RegExpQuestion, or it is a range {0,1}.
*/
private class EffectivelyQuestion extends RegExpTerm {
EffectivelyQuestion() {
this instanceof RegExpOpt
or
exists(RegExpRange range | range.getLowerBound() = 0 and range.getUpperBound() = 1 |
this = range
)
}
}
/**
* Gets the state before matching `t`.
*/
@@ -559,14 +608,14 @@ State after(RegExpTerm t) {
or
exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
or
exists(RegExpStar star | t = star.getAChild() | result = before(star))
exists(EffectivelyStar star | t = star.getAChild() | result = before(star))
or
exists(RegExpPlus plus | t = plus.getAChild() |
exists(EffectivelyPlus plus | t = plus.getAChild() |
result = before(plus) or
result = after(plus)
)
or
exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
exists(EffectivelyQuestion opt | t = opt.getAChild() | result = after(opt))
or
exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
}
@@ -617,15 +666,17 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
or
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
or
exists(RegExpStar star | lbl = Epsilon() |
exists(EffectivelyStar star | lbl = Epsilon() |
q1 = before(star) and q2 = before(star.getChild(0))
or
q1 = before(star) and q2 = after(star)
)
or
exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
exists(EffectivelyPlus plus | lbl = Epsilon() |
q1 = before(plus) and q2 = before(plus.getChild(0))
)
or
exists(RegExpOpt opt | lbl = Epsilon() |
exists(EffectivelyQuestion opt | lbl = Epsilon() |
q1 = before(opt) and q2 = before(opt.getChild(0))
or
q1 = before(opt) and q2 = after(opt)

View File

@@ -1,4 +1,4 @@
import javascript
private import semmle.javascript.heuristics.AdditionalSinks
select any(HeuristicSink s)
select any(HeuristicSink s | s.getFile().getBaseName() = "sinks.js")

View File

@@ -1,2 +0,0 @@
| sources.js:2:5:2:12 | password |
| sources.js:3:5:3:20 | JSON.stringify() |

View File

@@ -1,4 +1,13 @@
import javascript
private import semmle.javascript.heuristics.AdditionalSources
import testUtilities.ConsistencyChecking
select any(HeuristicSource s)
class Taint extends TaintTracking::Configuration {
Taint() { this = "Taint" }
override predicate isSource(DataFlow::Node node) { node instanceof HeuristicSource }
override predicate isSink(DataFlow::Node node) {
node = any(DataFlow::CallNode call | call.getCalleeName() = "sink").getAnArgument()
}
}

View File

@@ -1,4 +1,236 @@
(function() {
password;
JSON.stringify();
const password = '1234';
sink(password); // NOT OK
const s = JSON.stringify();
sink(s); // NOT OK
})();
(async function() {
const knex = require('knex');
const users = knex().select('*').from('users');
users.then(function (users) {
sink(users); // NOT OK
});
users.asCallback(function (err, users) {
sink(users); // NOT OK
});
sink(await users); // NOT OK
})();
(function() {
const pg = require('pg');
const pool = new pg.Pool({});
pool.connect(async function (err, client, done) {
client.query('SELECT * FROM users', function (err, users) {
sink(users);
});
const thenable = client.query('SELECT * FROM users')
thenable.then(function(users) {
sink(users); // NOT OK
});
const pgpromise = client.query('SELECT * FROM users');
sink(await pgpromise); // NOT OK
});
})();
(async function () {
const pgpromise = require('pg-promise')();
const db = pgpromise('postgres://username:password@localhost:1234/database');
const pgppromise = db.any('SELECT * FROM users');
pgppromise.then(function (users) {
sink(users);
});
sink(await pgppromise);
})();
(function () {
const mysql = require('mysql2');
const conn = mysql.createConnection({});
conn.query(
'SELECT * FROM `users`',
function(err, users, fields) {
sink(users); // NOT OK
}
);
conn.execute(
'SELECT * FROM `users` WHERE name = ?',
['Alice'],
function(err, users) {
sink(users);
}
);
})();
(async function () {
const sqlite = require('sqlite3');
const db = new sqlite.Database(':memory:');
db.all('SELECT * FROM users', function (err, users) {
sink(users); // NOT OK
});
const sqlitepromise = db.all('SELECT * FROM users');
sink(await sqlitepromise); // NOT OK
})();
(async function () {
const { Sequelize } = require('sequelize');
const sequelize = new Sequelize('sqlite::memory:');
class User extends sequelize.Model {}
User.init({ name: sequelize.DataTypes.String }, { sequelize, modelName: 'user' });
sequelize.query('SELECT * FROM users').then(function (users) {
sink(users); // NOT OK
});
})();
(async function () {
const sql = require('mssql');
await sql.connect('...');
sql.query('SELECT * FROM users', function (err, users) {
sink(users); // NOT OK
});
const mssqlthenable = sql.query('SELECT * FROM users');
mssqlthenable.then(function (users) {
sink(users); // NOT OK
});
const mssqlpromise = sql.query('SELECT * FROM users');
sink(await mssqlpromise); // NOT OK
const uname = 'Alice';
const mssqltaggedquery = sql.query`SELECT * FROM users where name=${uname}`
sink(await mssqltaggedquery); // NOT OK
})();
(async function () {
const {Spanner} = require('@google-cloud/spanner');
const db = new Spanner({projectId: 'test'})
.instance('instanceid')
.database('databaseid');
db.executeSql('SELECT * FROM users', {}, function (err, users) {
sink(users); // NOT OK
});
const [users] = (await db.executeSql('SELECT * FROM users', {}));
sink(users); // NOT OK
const spannerpromise = db.run({
sql: 'SELECT * FROM users'
});
sink(await spannerpromise); // NOT OK
db.run({
sql: 'SELECT * FROM users'
}, function (err, rows, stats, meta) {
sink(rows); // NOT OK
});
const client = new Spanner.v1.SpannerClient({});
client.executeSql('SELECT * FROM users', {}, function (err, users) {
sink(users); // NOT OK
});
db.runTransaction(function(err, txn) {
txn.run('SELECT * FROM users', function (err, users) {
sink(users); // NOT OK
});
txn.commit(function () {});
});
db.getSnapshot(function (err, txn) {
txn.run('SELECT * FROM users', function (err, users) {
sink(users); // NOT OK
});
txn.end();
});
})();
(function () {
const { MongoClient } = require('mongodb');
MongoClient.connect('mongodb://localhost:1234', async function (err, db) {
const collection = db.collection('users');
const users = await collection.find({});
sink(users); // NOT OK
});
})();
(async function () {
const mongoose = require('mongoose');
await mongoose.connect('mongodb://localhost:1234');
const User = mongoose.model('User', {
name: {
type: String,
unique: true
}
});
User.find({ name: 'Alice' }, function (err, alice) {
sink(alice); // NOT OK
});
User.find({ name: 'Bob' }).exec(function (err, bob) {
sink(bob); // NOT OK
});
const promise = User.find({ name: 'Claire' });
promise.then(c => sink(c)); // NOT OK
})();
(async function () {
const minimongo = require('minimongo');
const LocalDb = minimongo.MemoryDb;
const db = new LocalDb();
const doc = db.users;
const users = await doc.find({});
sink(users); // NOT OK
})();
(async function () {
const { Collection } = require('marsdb');
const doc = new Collection('users');
const users = await doc.find({});
sink(users); // NOT OK
})();
(async function () {
const redis = require("redis");
const client = redis.createClient();
const alice = await client.get('alice');
sink(alice); // NOT OK
})();
(async function () {
const Redis = require('ioredis');
const redis = new Redis();
const bob = await redis.get('bob');
sink(bob); // NOT OK
})();

View File

@@ -201,6 +201,7 @@
| regexplib/markup.js:13:14:13:16 | .+? | Strings starting with '<' and with many repetitions of '!' can start matching anywhere after the start of the preceeding .*? |
| regexplib/markup.js:14:13:14:14 | .* | Strings starting with '<' and with many repetitions of 'a' can start matching anywhere after the start of the preceeding .* |
| regexplib/markup.js:14:24:14:25 | .* | Strings starting with '<>' and with many repetitions of '>a' can start matching anywhere after the start of the preceeding .* |
| regexplib/markup.js:15:16:15:18 | .*? | Strings starting with '<img' and with many repetitions of '<imga' can start matching anywhere after the start of the preceeding <(\\/{0,1})img(.*?)(\\/{0,1})\\> |
| regexplib/markup.js:16:5:16:9 | [^>]* | Strings starting with 'src' and with many repetitions of 'src' can start matching anywhere after the start of the preceeding src[^>]*[^/].(?:jpg\|bmp\|gif)(?:\\"\|\\') |
| regexplib/markup.js:17:8:17:24 | (\\s(\\w*=".*?")?)* | Strings starting with '<a' and with many repetitions of ' =""' can start matching anywhere after the start of the preceeding .*? |
| regexplib/markup.js:17:12:17:14 | \\w* | Strings starting with '<a ' and with many repetitions of '="" a' can start matching anywhere after the start of the preceeding .*? |
@@ -213,6 +214,10 @@
| regexplib/markup.js:20:197:20:198 | "+ | Strings with many repetitions of '""' can start matching anywhere after the start of the preceeding "+ |
| regexplib/markup.js:20:245:20:247 | .*? | Strings with many repetitions of 'color: # IF found THEN move ahead "" # single or double # or no quotes\\t' can start matching anywhere after the start of the preceeding .*? |
| regexplib/markup.js:20:274:20:276 | .*? | Strings starting with '<font # Match start of Font Tag ' and with many repetitions of '<font # Match start of Font Tag a' can start matching anywhere after the start of the preceeding <\\*?font # Match start of Font Tag (?(?=[^>]+color.*>) #IF\\/THEN lookahead color in tag (.*?color\\s*?[=\|:]\\s*?) # IF found THEN move ahead ('+\\#*?[\\w\\s]*'+ # CAPTURE ColorName\\/Hex \|"+\\#*?[\\w\\s]*"+ # single or double \|\\#*\\w*\\b) # or no quotes\t.*?> # & move to end of tag \|.*?> # ELSE move to end of Tag ) # Close the If\\/Then lookahead # Use Multiline and IgnoreCase # Replace the matches from RE with MatchEvaluator below: # if m.Groups(1).Value<>"" then # Return "<font color=" & m.Groups(1).Value & ">" # else # Return "<font>" # end if |
| regexplib/markup.js:24:39:24:41 | \\s+ | Strings starting with '&lt;A' and with many repetitions of ' - != ' can start matching anywhere after the start of the preceeding \\s* |
| regexplib/markup.js:24:43:24:45 | \\S+ | Strings starting with '&lt;A ' and with many repetitions of '- !=' can start matching anywhere after the start of the preceeding \\s* |
| regexplib/markup.js:24:48:24:50 | \\s* | Strings starting with '&lt;A !' and with many repetitions of ' =- ! ' can start matching anywhere after the start of the preceeding \\s+ |
| regexplib/markup.js:24:52:24:54 | \\s* | Strings starting with '&lt;A !=' and with many repetitions of '- !=' can start matching anywhere after the start of the preceeding \\S+ |
| regexplib/markup.js:25:11:25:15 | [^>]* | Strings starting with '<A' and with many repetitions of '<A' can start matching anywhere after the start of the preceeding <[a-zA-Z][^>]*\\son\\w+=(\\w+\|'[^']*'\|"[^"]*")[^>]*> |
| regexplib/markup.js:25:45:25:49 | [^>]* | Strings starting with '<A ona=a' and with many repetitions of '0' can start matching anywhere after the start of the preceeding \\w+ |
| regexplib/markup.js:27:3:27:7 | [^>]* | Strings starting with '<' and with many repetitions of '<' can start matching anywhere after the start of the preceeding <[^>]*name[\\s]*=[\\s]*"?[^\\w_]*"?[^>]*> |
@@ -228,6 +233,10 @@
| regexplib/markup.js:44:3:44:7 | [^>]* | Strings starting with '<' and with many repetitions of '<' can start matching anywhere after the start of the preceeding <[^>]*name[\\s]*=[\\s]*"?[^\\w_]*"?[^>]*> |
| regexplib/markup.js:44:34:44:38 | [^>]* | Strings starting with '<name=' and with many repetitions of '\\t' can start matching anywhere after the start of the preceeding [\\s]* |
| regexplib/markup.js:45:6:45:13 | [\\d\\D]*? | Strings starting with '/*' and with many repetitions of 'a/*' can start matching anywhere after the start of the preceeding \\/\\*[\\d\\D]*?\\*\\/ |
| regexplib/markup.js:47:39:47:41 | \\s+ | Strings starting with '&lt;A' and with many repetitions of ' - != ' can start matching anywhere after the start of the preceeding \\s* |
| regexplib/markup.js:47:43:47:45 | \\S+ | Strings starting with '&lt;A ' and with many repetitions of '- !=' can start matching anywhere after the start of the preceeding \\s* |
| regexplib/markup.js:47:48:47:50 | \\s* | Strings starting with '&lt;A !' and with many repetitions of ' =- ! ' can start matching anywhere after the start of the preceeding \\s+ |
| regexplib/markup.js:47:52:47:54 | \\s* | Strings starting with '&lt;A !=' and with many repetitions of '- !=' can start matching anywhere after the start of the preceeding \\S+ |
| regexplib/markup.js:48:6:48:13 | [\\s\\S]*? | Strings starting with '<!--' and with many repetitions of '<!--' can start matching anywhere after the start of the preceeding <!--[\\s\\S]*?--> |
| regexplib/markup.js:53:15:53:19 | [\\w]* | Strings starting with '[a' and with many repetitions of '0' can start matching anywhere after the start of the preceeding \\w+ |
| regexplib/markup.js:56:23:56:25 | \\w+ | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (\\/?(?<step>\\w+))+ |
@@ -300,6 +309,7 @@
| regexplib/strings.js:14:61:14:63 | \\w* | Strings starting with 'AA' and with many repetitions of 'A' can start matching anywhere after the start of the preceeding \\w* |
| regexplib/strings.js:14:107:14:109 | \\w* | Strings starting with 'AAA' and with many repetitions of 'A' can start matching anywhere after the start of the preceeding \\w* |
| regexplib/strings.js:19:31:19:57 | [a-z&#230;&#248;&#229;0-9]+ | Strings starting with '#@' and with many repetitions of '##' can start matching anywhere after the start of the preceeding [a-z&#230;&#248;&#229;0-9]+ |
| regexplib/strings.js:19:69:19:95 | [a-z&#230;&#248;&#229;0-9]+ | Strings starting with '#@#' and with many repetitions of '##' can start matching anywhere after the start of the preceeding [a-z&#230;&#248;&#229;0-9]+ |
| regexplib/strings.js:20:3:20:20 | ((\\\\")\|[^"(\\\\")])+ | Strings starting with '"' and with many repetitions of '\\\\"' can start matching anywhere after the start of the preceeding "((\\\\")\|[^"(\\\\")])+" |
| regexplib/strings.js:21:3:21:7 | [^>]+ | Strings starting with '<' and with many repetitions of '<' can start matching anywhere after the start of the preceeding <[^>]+> |
| regexplib/strings.js:23:3:23:20 | ((\\\\")\|[^"(\\\\")])+ | Strings starting with '"' and with many repetitions of '\\\\"' can start matching anywhere after the start of the preceeding "((\\\\")\|[^"(\\\\")])+" |
@@ -313,8 +323,10 @@
| regexplib/strings.js:40:3:40:5 | \\w+ | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (\\w+)\\s+\\1 |
| regexplib/strings.js:48:3:48:12 | [^\\.\\?\\!]* | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding ([^\\.\\?\\!]*)[\\.\\?\\!] |
| regexplib/strings.js:49:3:49:5 | \\S+ | Strings with many repetitions of '!' can start matching anywhere after the start of the preceeding (\\S+)\\x20{2,}(?=\\S+) |
| regexplib/strings.js:53:25:53:33 | [a-z0-9]+ | Strings with many repetitions of '0' can start matching anywhere after the start of the preceeding [a-z0-9]+ |
| regexplib/strings.js:53:65:53:73 | [a-z0-9]+ | Strings with many repetitions of '0' can start matching anywhere after the start of the preceeding [a-z0-9]+ |
| regexplib/strings.js:53:4:53:12 | [a-z0-9]+ | Strings with many repetitions of '0.00' can start matching anywhere after the start of the preceeding [a-z0-9]+ |
| regexplib/strings.js:53:25:53:33 | [a-z0-9]+ | Strings starting with '0' and with many repetitions of '0' can start matching anywhere after the start of the preceeding [a-z0-9]+ |
| regexplib/strings.js:53:44:53:52 | [a-z0-9]+ | Strings with many repetitions of '00' can start matching anywhere after the start of the preceeding [a-z0-9]+ |
| regexplib/strings.js:53:65:53:73 | [a-z0-9]+ | Strings starting with '0' and with many repetitions of '0' can start matching anywhere after the start of the preceeding [a-z0-9]+ |
| regexplib/strings.js:54:20:54:22 | \\w+ | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (NOT)?(\\s*\\(*)\\s*(\\w+)\\s*(=\|<>\|<\|>\|LIKE\|IN)\\s*(\\(([^\\)]*)\\)\|'([^']*)'\|(-?\\d*\\.?\\d+))(\\s*\\)*\\s*)(AND\|OR)? |
| regexplib/strings.js:56:52:56:53 | .+ | Strings starting with 'PRN.' and with many repetitions of '.' can start matching anywhere after the start of the preceeding .* |
| regexplib/strings.js:57:36:57:38 | .*? | Strings starting with '?se[A' and with many repetitions of '?se[Aa' can start matching anywhere after the start of the preceeding (?s)(?:\\e\\[(?:(\\d+);?)*([A-Za-z])(.*?))(?=\\e\\[\|\\z) |
@@ -519,3 +531,6 @@
| tst.js:399:6:399:12 | (d\|dd)* | Strings with many repetitions of 'd' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
| tst.js:400:6:401:1 | (e\|ee)* | Strings with many repetitions of 'e' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
| tst.js:404:6:405:7 | (g\|gg)* | Strings with many repetitions of 'g' can start matching anywhere after the start of the preceeding (g\|gg)*h$ |
| tst.js:407:128:407:129 | * | Strings starting with '0/*' and with many repetitions of ' ' can start matching anywhere after the start of the preceeding \\s* |
| tst.js:409:23:409:29 | [\\w.-]* | Strings starting with '//' and with many repetitions of '//' can start matching anywhere after the start of the preceeding (\\/(?:\\/[\\w.-]*)*){0,1}:([\\w.-]+) |
| tst.js:411:15:411:19 | a{1,} | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a{1,})* |

View File

@@ -29,12 +29,14 @@
| regexplib/email.js:5:24:5:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '0' and containing many repetitions of '0'. |
| regexplib/email.js:5:63:5:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '0@0' and containing many repetitions of '0'. |
| regexplib/email.js:6:10:6:35 | (?:[a-zA-Z0-9][\\.\\-\\+_]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:6:60:6:88 | (?:[a-zA-Z0-9][\\.\\-_]?){0,62} | This part of the regular expression may cause exponential backtracking on strings starting with '0@' and containing many repetitions of '0'. |
| regexplib/email.js:13:36:13:44 | [a-zA-Z]* | This part of the regular expression may cause exponential backtracking on strings starting with 'A' and containing many repetitions of 'A'. |
| regexplib/email.js:25:67:25:78 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '0' and containing many repetitions of '0'. |
| regexplib/email.js:25:106:25:117 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '0@0' and containing many repetitions of '0'. |
| regexplib/email.js:25:212:25:223 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:25:251:25:262 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| regexplib/email.js:32:10:32:25 | (?:\\w[\\.\\-\\+]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| regexplib/email.js:32:41:32:61 | (?:\\w[\\.\\-\\+]?){0,62} | This part of the regular expression may cause exponential backtracking on strings starting with 'a@' and containing many repetitions of 'a'. |
| regexplib/email.js:33:16:33:22 | [-.\\w]* | This part of the regular expression may cause exponential backtracking on strings starting with '0' and containing many repetitions of '0'. |
| regexplib/email.js:33:38:33:51 | ([0-9a-zA-Z])+ | This part of the regular expression may cause exponential backtracking on strings starting with '0@' and containing many repetitions of '00.'. |
| regexplib/email.js:33:53:33:58 | [-\\w]* | This part of the regular expression may cause exponential backtracking on strings starting with '0@0' and containing many repetitions of '0'. |
@@ -45,9 +47,13 @@
| regexplib/markup.js:13:6:13:12 | [^"']+? | This part of the regular expression may cause exponential backtracking on strings starting with '<' and containing many repetitions of '!'. |
| regexplib/markup.js:13:14:13:16 | .+? | This part of the regular expression may cause exponential backtracking on strings starting with '<' and containing many repetitions of 'a"'. |
| regexplib/markup.js:17:17:17:19 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '<a ="' and containing many repetitions of '" ="'. |
| regexplib/markup.js:24:43:24:45 | \\S+ | This part of the regular expression may cause exponential backtracking on strings starting with '&lt;A ' and containing many repetitions of '!=- '. |
| regexplib/markup.js:24:47:24:118 | (\\s*=\\s*([-\\w\\.]{1,1024}\|&quot;[^&quot;]{0,1024}&quot;\|'[^']{0,1024}'))? | This part of the regular expression may cause exponential backtracking on strings starting with '&lt;A !' and containing many repetitions of ' =- !'. |
| regexplib/markup.js:37:29:37:56 | [a-zA-Z0-9\|:\|\\/\|=\|-\|.\|\\?\|&]* | This part of the regular expression may cause exponential backtracking on strings starting with '[a=' and containing many repetitions of '='. |
| regexplib/markup.js:40:23:40:25 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| regexplib/markup.js:40:132:40:134 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with 'a[@a=''' and containing many repetitions of ' @a<""'. |
| regexplib/markup.js:47:43:47:45 | \\S+ | This part of the regular expression may cause exponential backtracking on strings starting with '&lt;A ' and containing many repetitions of '!=- '. |
| regexplib/markup.js:47:47:47:118 | (\\s*=\\s*([-\\w\\.]{1,1024}\|&quot;[^&quot;]{0,1024}&quot;\|'[^']{0,1024}'))? | This part of the regular expression may cause exponential backtracking on strings starting with '&lt;A !' and containing many repetitions of ' =- !'. |
| regexplib/markup.js:53:29:53:56 | [a-zA-Z0-9\|:\|\\/\|=\|-\|.\|\\?\|&]* | This part of the regular expression may cause exponential backtracking on strings starting with '[a=' and containing many repetitions of '='. |
| regexplib/markup.js:56:23:56:25 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| regexplib/markup.js:56:132:56:134 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with 'a[@a=''' and containing many repetitions of ' @a<""'. |
@@ -62,7 +68,10 @@
| regexplib/misc.js:148:23:148:29 | [^"'=]+ | This part of the regular expression may cause exponential backtracking on strings starting with '<! ' and containing many repetitions of '! '. |
| regexplib/misc.js:173:4:173:11 | ([a-z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| regexplib/strings.js:19:31:19:57 | [a-z&#230;&#248;&#229;0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '#@' and containing many repetitions of '#'. |
| regexplib/strings.js:19:69:19:95 | [a-z&#230;&#248;&#229;0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '#@#' and containing many repetitions of '##'. |
| regexplib/strings.js:47:3:47:5 | \\S* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
| regexplib/strings.js:53:4:53:12 | [a-z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '00.'. |
| regexplib/strings.js:53:14:53:24 | [\\-a-z0-9]* | This part of the regular expression may cause exponential backtracking on strings starting with '0' and containing many repetitions of '00.0'. |
| regexplib/strings.js:57:17:57:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings starting with '?se[' and containing many repetitions of '9'. |
| regexplib/strings.js:81:17:81:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings starting with '?se[' and containing many repetitions of '9'. |
| regexplib/strings.js:91:3:91:5 | \\S* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
@@ -188,3 +197,5 @@
| tst.js:399:6:399:12 | (d\|dd)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'dd'. |
| tst.js:400:6:401:1 | (e\|ee)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ee'. |
| tst.js:404:6:405:7 | (g\|gg)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'gg'. |
| tst.js:407:125:407:127 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '0/*' and containing many repetitions of ' ;0'. |
| tst.js:411:15:411:19 | a{1,} | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

View File

@@ -362,8 +362,8 @@ var bad84 = /^((?:a{0|-)|\w\{\d)+X$/;
var bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/;
var bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/;
// GOOD:
var good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;
// NOT GOOD - but not flagged
var bad86AndAHalf = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;
// GOOD
var good43 = /("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)/g;
@@ -403,3 +403,9 @@ var bad96 = new RegExp("(" +
var bad97 = new RegExp(
"(g|gg" +
")*h$");
var bad98 = /^(?:\*\/\*|[a-zA-Z0-9][a-zA-Z0-9!\#\$&\-\^_\.\+]{0,126}\/(?:\*|[a-zA-Z0-9][a-zA-Z0-9!\#\$&\-\^_\.\+]{0,126})(?:\s* *; *[a-zA-Z0-9][a-zA-Z0-9!\#\$&\-\^_\.\+]{0,126}(?:="?[a-zA-Z0-9][a-zA-Z0-9!\#\$&\-\^_\.\+]{0,126}"?)?\s*)*)$/;
var good48 = /(\/(?:\/[\w.-]*)*){0,1}:([\w.-]+)/;
var bad99 = /(a{1,})*b/;

View File

@@ -539,6 +539,55 @@ private class EdgeLabel extends TInputSymbol {
}
}
/**
* A RegExp term that acts like a plus.
* Either it's a RegExpPlus, or it is a range {1,X} where X is >= 30.
* 30 has been chosen as a threshold because for exponential blowup 2^30 is enough to get a decent DOS attack.
*/
private class EffectivelyPlus extends RegExpTerm {
EffectivelyPlus() {
this instanceof RegExpPlus
or
exists(RegExpRange range |
range.getLowerBound() = 1 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a star.
* Either it's a RegExpStar, or it is a range {0,X} where X is >= 30.
*/
private class EffectivelyStar extends RegExpTerm {
EffectivelyStar() {
this instanceof RegExpStar
or
exists(RegExpRange range |
range.getLowerBound() = 0 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a question mark.
* Either it's a RegExpQuestion, or it is a range {0,1}.
*/
private class EffectivelyQuestion extends RegExpTerm {
EffectivelyQuestion() {
this instanceof RegExpOpt
or
exists(RegExpRange range | range.getLowerBound() = 0 and range.getUpperBound() = 1 |
this = range
)
}
}
/**
* Gets the state before matching `t`.
*/
@@ -559,14 +608,14 @@ State after(RegExpTerm t) {
or
exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
or
exists(RegExpStar star | t = star.getAChild() | result = before(star))
exists(EffectivelyStar star | t = star.getAChild() | result = before(star))
or
exists(RegExpPlus plus | t = plus.getAChild() |
exists(EffectivelyPlus plus | t = plus.getAChild() |
result = before(plus) or
result = after(plus)
)
or
exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
exists(EffectivelyQuestion opt | t = opt.getAChild() | result = after(opt))
or
exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
}
@@ -617,15 +666,17 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
or
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
or
exists(RegExpStar star | lbl = Epsilon() |
exists(EffectivelyStar star | lbl = Epsilon() |
q1 = before(star) and q2 = before(star.getChild(0))
or
q1 = before(star) and q2 = after(star)
)
or
exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
exists(EffectivelyPlus plus | lbl = Epsilon() |
q1 = before(plus) and q2 = before(plus.getChild(0))
)
or
exists(RegExpOpt opt | lbl = Epsilon() |
exists(EffectivelyQuestion opt | lbl = Epsilon() |
q1 = before(opt) and q2 = before(opt.getChild(0))
or
q1 = before(opt) and q2 = after(opt)

BIN
ruby/Cargo.lock generated

Binary file not shown.

View File

@@ -12,7 +12,7 @@ node-types = { path = "../node-types" }
tree-sitter = "0.19"
tree-sitter-embedded-template = "0.19"
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "888e2e563ed3b43c417f17e57f7e29c39ce9aeea" }
clap = "2.33"
clap = "3.0"
tracing = "0.1"
tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }
rayon = "1.5.0"

View File

@@ -7,7 +7,7 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = "2.33"
clap = "3.0"
node-types = { path = "../node-types" }
tracing = "0.1"
tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }

View File

@@ -8,4 +8,4 @@ edition = "2018"
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_json = "1.0"

View File

@@ -539,6 +539,55 @@ private class EdgeLabel extends TInputSymbol {
}
}
/**
* A RegExp term that acts like a plus.
* Either it's a RegExpPlus, or it is a range {1,X} where X is >= 30.
* 30 has been chosen as a threshold because for exponential blowup 2^30 is enough to get a decent DOS attack.
*/
private class EffectivelyPlus extends RegExpTerm {
EffectivelyPlus() {
this instanceof RegExpPlus
or
exists(RegExpRange range |
range.getLowerBound() = 1 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a star.
* Either it's a RegExpStar, or it is a range {0,X} where X is >= 30.
*/
private class EffectivelyStar extends RegExpTerm {
EffectivelyStar() {
this instanceof RegExpStar
or
exists(RegExpRange range |
range.getLowerBound() = 0 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a question mark.
* Either it's a RegExpQuestion, or it is a range {0,1}.
*/
private class EffectivelyQuestion extends RegExpTerm {
EffectivelyQuestion() {
this instanceof RegExpOpt
or
exists(RegExpRange range | range.getLowerBound() = 0 and range.getUpperBound() = 1 |
this = range
)
}
}
/**
* Gets the state before matching `t`.
*/
@@ -559,14 +608,14 @@ State after(RegExpTerm t) {
or
exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
or
exists(RegExpStar star | t = star.getAChild() | result = before(star))
exists(EffectivelyStar star | t = star.getAChild() | result = before(star))
or
exists(RegExpPlus plus | t = plus.getAChild() |
exists(EffectivelyPlus plus | t = plus.getAChild() |
result = before(plus) or
result = after(plus)
)
or
exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
exists(EffectivelyQuestion opt | t = opt.getAChild() | result = after(opt))
or
exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
}
@@ -617,15 +666,17 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
or
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
or
exists(RegExpStar star | lbl = Epsilon() |
exists(EffectivelyStar star | lbl = Epsilon() |
q1 = before(star) and q2 = before(star.getChild(0))
or
q1 = before(star) and q2 = after(star)
)
or
exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
exists(EffectivelyPlus plus | lbl = Epsilon() |
q1 = before(plus) and q2 = before(plus.getChild(0))
)
or
exists(RegExpOpt opt | lbl = Epsilon() |
exists(EffectivelyQuestion opt | lbl = Epsilon() |
q1 = before(opt) and q2 = before(opt.getChild(0))
or
q1 = before(opt) and q2 = after(opt)

View File

@@ -362,11 +362,11 @@ bad84 = /^((?:a{0|-)|\w\{\d)+X$/
bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/
bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/
# GOOD:
good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
# NOT GOOD
bad87 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
# NOT GOOD
bad87 = /^X(\u0061|a)*Y$/
bad88 = /^X(\u0061|a)*Y$/
# GOOD
good43 = /^X(\u0061|b)+Y$/