diff --git a/csharp/extractor/Semmle.Extraction/CommentProcessing.cs b/csharp/extractor/Semmle.Extraction/CommentProcessing.cs index 2067f869471..ed3d800ed94 100644 --- a/csharp/extractor/Semmle.Extraction/CommentProcessing.cs +++ b/csharp/extractor/Semmle.Extraction/CommentProcessing.cs @@ -35,7 +35,7 @@ namespace Semmle.Extraction.CommentProcessing class LocationComparer : IComparer { - public int Compare(Location l1, Location l2) => CommentProcessor.Compare(l1, l2); + public int Compare(Location? l1, Location? l2) => CommentProcessor.Compare(l1, l2); } /// @@ -44,8 +44,12 @@ namespace Semmle.Extraction.CommentProcessing /// First location /// Second location /// <0 if l1 before l2, >0 if l1 after l2, else 0. - static int Compare(Location l1, Location l2) + static int Compare(Location? l1, Location? l2) { + if (object.ReferenceEquals(l1, l2)) return 0; + if (l1 == null) return -1; + if (l2 == null) return 1; + int diff = l1.SourceTree == l2.SourceTree ? 0 : l1.SourceTree.FilePath.CompareTo(l2.SourceTree.FilePath); if (diff != 0) return diff; diff = l1.SourceSpan.Start - l2.SourceSpan.Start; @@ -243,7 +247,7 @@ namespace Semmle.Extraction.CommentProcessing /// Process comments up until nextElement. /// Group comments into blocks, and associate blocks with elements. /// - /// + /// /// Enumerator for all comments in the program. /// The next element in the list. /// A stack of nested program elements. @@ -261,7 +265,7 @@ namespace Semmle.Extraction.CommentProcessing // Iterate comments until the commentEnumerator has gone past nextElement while (nextElement == null || Compare(commentEnumerator.Current.Value.Location, nextElement.Value.Key) < 0) { - if(block is null) + if (block is null) block = new CommentBlock(commentEnumerator.Current.Value); if (!block.CombinesWith(commentEnumerator.Current.Value)) @@ -284,7 +288,7 @@ namespace Semmle.Extraction.CommentProcessing } } - if(!(block is null)) + if (!(block is null)) GenerateBindings(block, elementStack, nextElement, cb); return true; diff --git a/csharp/ql/src/semmle/code/csharp/PrintAst.qll b/csharp/ql/src/semmle/code/csharp/PrintAst.qll index 36eca6699dc..f4bbeaf57cf 100644 --- a/csharp/ql/src/semmle/code/csharp/PrintAst.qll +++ b/csharp/ql/src/semmle/code/csharp/PrintAst.qll @@ -151,13 +151,13 @@ class PrintAstNode extends TPrintAstNode { /** * Gets a textual representation of this node in the PrintAst output tree. */ - abstract string toString(); + string toString() { none() } /** * Gets the child node at index `childIndex`. Child indices must be unique, * but need not be contiguous. */ - abstract PrintAstNode getChild(int childIndex); + PrintAstNode getChild(int childIndex) { none() } /** * Gets a child of this node. @@ -172,7 +172,7 @@ class PrintAstNode extends TPrintAstNode { /** * Gets the location of this node in the source code. */ - abstract Location getLocation(); + Location getLocation() { none() } /** * Gets the value of the property of this node, where the name of the property @@ -194,6 +194,30 @@ class PrintAstNode extends TPrintAstNode { } } +/** A top-level AST node. */ +class TopLevelPrintAstNode extends PrintAstNode { + TopLevelPrintAstNode() { not exists(this.getParent()) } + + private int getOrder() { + this = + rank[result](TopLevelPrintAstNode n, Location l | + l = n.getLocation() + | + n + order by + l.getFile().getRelativePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(), + l.getEndColumn() + ) + } + + override string getProperty(string key) { + result = super.getProperty(key) + or + key = "semmle.order" and + result = this.getOrder().toString() + } +} + /** * A node representing an AST node with an underlying `Element`. */ diff --git a/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll b/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll index 9813c0299ef..5702fce60e4 100644 --- a/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll +++ b/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll @@ -26,7 +26,7 @@ abstract class NodeImpl extends Node { /** Gets the type of this node used for type pruning. */ cached - DataFlowType getDataFlowType() { + Gvn::GvnType getDataFlowType() { Stages::DataFlowStage::forceCachingInSameStage() and exists(Type t0 | result = Gvn::getGlobalValueNumber(t0) | t0 = getCSharpType(this.getType()) @@ -490,14 +490,23 @@ private Type getCSharpType(DotNet::Type t) { result.matchesHandle(t) } +/** A GVN type that is either a `DataFlowType` or unifiable with a `DataFlowType`. */ +private class DataFlowTypeOrUnifiable extends Gvn::GvnType { + pragma[nomagic] + DataFlowTypeOrUnifiable() { + this instanceof DataFlowType or + Gvn::unifiable(any(DataFlowType t), this) + } +} + pragma[noinline] -private TypeParameter getATypeParameterSubType(DataFlowType t) { +private TypeParameter getATypeParameterSubType(DataFlowTypeOrUnifiable t) { not t instanceof Gvn::TypeParameterGvnType and exists(Type t0 | t = Gvn::getGlobalValueNumber(t0) | implicitConversionRestricted(result, t0)) } pragma[noinline] -private DataFlowType getANonTypeParameterSubType(DataFlowType t) { +private Gvn::GvnType getANonTypeParameterSubType(DataFlowTypeOrUnifiable t) { not t instanceof Gvn::TypeParameterGvnType and not result instanceof Gvn::TypeParameterGvnType and exists(Type t1, Type t2 | @@ -728,12 +737,8 @@ private module Cached { ) } - /** - * Holds if GVNs `t1` and `t2` may have a common sub type. Neither `t1` nor - * `t2` are allowed to be type parameters. - */ - cached - predicate commonSubType(DataFlowType t1, DataFlowType t2) { + pragma[nomagic] + private predicate commonSubTypeGeneral(DataFlowTypeOrUnifiable t1, DataFlowType t2) { not t1 instanceof Gvn::TypeParameterGvnType and t1 = t2 or @@ -742,11 +747,18 @@ private module Cached { getANonTypeParameterSubType(t1) = getANonTypeParameterSubType(t2) } + /** + * Holds if GVNs `t1` and `t2` may have a common sub type. Neither `t1` nor + * `t2` are allowed to be type parameters. + */ + cached + predicate commonSubType(DataFlowType t1, DataFlowType t2) { commonSubTypeGeneral(t1, t2) } + cached predicate commonSubTypeUnifiableLeft(DataFlowType t1, DataFlowType t2) { - exists(DataFlowType t | + exists(Gvn::GvnType t | Gvn::unifiable(t1, t) and - commonSubType(t, t2) + commonSubTypeGeneral(t, t2) ) } @@ -2004,7 +2016,7 @@ module LibraryFlow { /** Gets the type of content `c`. */ pragma[noinline] -private DataFlowType getContentType(Content c) { +private Gvn::GvnType getContentType(Content c) { exists(Type t | result = Gvn::getGlobalValueNumber(t) | t = c.(FieldContent).getField().getType() or @@ -2031,7 +2043,7 @@ class LibraryCodeNode extends NodeImpl, TLibraryCodeNode { override Callable getEnclosingCallableImpl() { result = callCfn.getEnclosingCallable() } - override DataFlowType getDataFlowType() { + override Gvn::GvnType getDataFlowType() { exists(LibraryFlow::AdjustedAccessPath ap | state = LibraryFlow::TLibraryCodeNodeAfterReadState(ap) and if sinkAp.length() = 0 and state.isLastReadState() and preservesValue = true @@ -2194,6 +2206,20 @@ private class ReadStepConfiguration extends ControlFlowReachabilityConfiguration predicate readStep = readStepImpl/3; +/** + * An entity used to represent the type of data-flow node. Two nodes will have + * the same `DataFlowType` when the underlying `Type`s are structurally equal + * modulo type parameters and identity conversions. + * + * For example, `Func` and `Func` are mapped to the same + * `DataFlowType`, while `Func` and `Func` are not, because + * `string` is not a type parameter. + */ +class DataFlowType extends Gvn::GvnType { + pragma[nomagic] + DataFlowType() { this = any(NodeImpl n).getDataFlowType() } +} + /** Gets the type of `n` used for type pruning. */ DataFlowType getNodeType(NodeImpl n) { result = n.getDataFlowType() } @@ -2323,8 +2349,6 @@ class CastNode extends Node { class DataFlowExpr = DotNet::Expr; -class DataFlowType = Gvn::GvnType; - /** Holds if `e` is an expression that always has the same Boolean value `val`. */ private predicate constantBooleanExpr(Expr e, boolean val) { e = any(AbstractValues::BooleanValue bv | val = bv.getValue()).getAnExpr() diff --git a/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll b/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll index 2b5661c2001..759921cf5be 100644 --- a/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll +++ b/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll @@ -222,10 +222,10 @@ class Content extends TContent { Location getLocation() { none() } /** Gets the type of the object containing this content. */ - deprecated DataFlowType getContainerType() { none() } + deprecated Gvn::GvnType getContainerType() { none() } /** Gets the type of this content. */ - deprecated DataFlowType getType() { none() } + deprecated Gvn::GvnType getType() { none() } } /** A reference to a field. */ @@ -241,11 +241,11 @@ class FieldContent extends Content, TFieldContent { override Location getLocation() { result = f.getLocation() } - deprecated override DataFlowType getContainerType() { + deprecated override Gvn::GvnType getContainerType() { result = Gvn::getGlobalValueNumber(f.getDeclaringType()) } - deprecated override DataFlowType getType() { result = Gvn::getGlobalValueNumber(f.getType()) } + deprecated override Gvn::GvnType getType() { result = Gvn::getGlobalValueNumber(f.getType()) } } /** A reference to a property. */ @@ -261,11 +261,11 @@ class PropertyContent extends Content, TPropertyContent { override Location getLocation() { result = p.getLocation() } - deprecated override DataFlowType getContainerType() { + deprecated override Gvn::GvnType getContainerType() { result = Gvn::getGlobalValueNumber(p.getDeclaringType()) } - deprecated override DataFlowType getType() { result = Gvn::getGlobalValueNumber(p.getType()) } + deprecated override Gvn::GvnType getType() { result = Gvn::getGlobalValueNumber(p.getType()) } } /** A reference to an element in a collection. */ diff --git a/csharp/ql/test/experimental/ir/ir/PrintAst.expected b/csharp/ql/test/experimental/ir/ir/PrintAst.expected index fddb231db2a..db6cea8c393 100644 --- a/csharp/ql/test/experimental/ir/ir/PrintAst.expected +++ b/csharp/ql/test/experimental/ir/ir/PrintAst.expected @@ -148,6 +148,10 @@ assignop.cs: # 17| 0: [IntLiteral] 2 # 17| 1: [LocalVariableAccess] access to local variable c casts.cs: +# 1| [Class] Casts_A +# 5| [Class] Casts_B +#-----| 3: (Base types) +# 5| 0: [Class] Casts_A # 9| [Class] Casts # 11| 5: [Method] Main # 12| 4: [BlockStmt] {...} @@ -167,10 +171,6 @@ casts.cs: # 15| 0: [LocalVariableAccess] access to local variable Aobj # 15| 1: [TypeAccess] access to type Casts_B # 15| 1: [LocalVariableAccess] access to local variable bobjAS -# 1| [Class] Casts_A -# 5| [Class] Casts_B -#-----| 3: (Base types) -# 5| 0: [Class] Casts_A collections.cs: # 3| [Class] Collections # 5| 5: [Class] MyClass @@ -469,6 +469,9 @@ inheritance_polymorphism.cs: # 4| 4: [BlockStmt] {...} # 5| 0: [ReturnStmt] return ...; # 5| 0: [IntLiteral] 0 +# 9| [Class] B +#-----| 3: (Base types) +# 9| 0: [Class] A # 13| [Class] C #-----| 3: (Base types) # 13| 0: [Class] B @@ -502,9 +505,6 @@ inheritance_polymorphism.cs: # 34| 6: [ExprStmt] ...; # 34| 0: [MethodCall] call to method function # 34| -1: [LocalVariableAccess] access to local variable objC -# 9| [Class] B -#-----| 3: (Base types) -# 9| 0: [Class] A inoutref.cs: # 1| [Class] MyClass # 2| 5: [Field] fld diff --git a/csharp/ql/test/library-tests/csharp7.2/PrintAst.expected b/csharp/ql/test/library-tests/csharp7.2/PrintAst.expected index 03f61e18065..d6c9866cc08 100644 --- a/csharp/ql/test/library-tests/csharp7.2/PrintAst.expected +++ b/csharp/ql/test/library-tests/csharp7.2/PrintAst.expected @@ -22,6 +22,9 @@ csharp72.cs: # 28| 0: [RefExpr] ref ... # 28| 0: [FieldAccess] access to field s # 31| 7: [DelegateType] Del +# 34| [Struct] ReadonlyStruct +# 38| [Struct] RefStruct +# 42| [Struct] ReadonlyRefStruct # 46| [Class] NumericLiterals # 48| 5: [Field] binaryValue # 48| 1: [AssignExpr] ... = ... @@ -34,6 +37,3 @@ csharp72.cs: # 53| 1: [FieldAccess] access to field X # 55| 6: [Method] F # 55| 4: [BlockStmt] {...} -# 34| [Struct] ReadonlyStruct -# 38| [Struct] RefStruct -# 42| [Struct] ReadonlyRefStruct diff --git a/csharp/ql/test/library-tests/csharp7.3/PrintAst.expected b/csharp/ql/test/library-tests/csharp7.3/PrintAst.expected index 81e247bfb4f..4574aa10590 100644 --- a/csharp/ql/test/library-tests/csharp7.3/PrintAst.expected +++ b/csharp/ql/test/library-tests/csharp7.3/PrintAst.expected @@ -50,6 +50,15 @@ csharp73.cs: # 22| 0: [IntLiteral] 10 # 22| 1: [LocalVariableAccess] access to local variable t # 25| 1: [BlockStmt] {...} +# 30| [Class] UnmanagedConstraint<> +#-----| 1: (Type parameters) +# 30| 0: [TypeParameter] T +# 34| [Class] EnumConstraint<> +#-----| 1: (Type parameters) +# 34| 0: [TypeParameter] T +# 38| [Class] DelegateConstraint<> +#-----| 1: (Type parameters) +# 38| 0: [TypeParameter] T # 42| [Class] ExpressionVariables # 44| 4: [InstanceConstructor] ExpressionVariables #-----| 2: (Parameters) @@ -69,12 +78,3 @@ csharp73.cs: # 51| 0: [InterpolatedStringExpr] $"..." # 51| 0: [StringLiteral] "x is " # 51| 1: [LocalVariableAccess] access to local variable x -# 30| [Class] UnmanagedConstraint<> -#-----| 1: (Type parameters) -# 30| 0: [TypeParameter] T -# 34| [Class] EnumConstraint<> -#-----| 1: (Type parameters) -# 34| 0: [TypeParameter] T -# 38| [Class] DelegateConstraint<> -#-----| 1: (Type parameters) -# 38| 0: [TypeParameter] T diff --git a/csharp/ql/test/library-tests/csharp8/PrintAst.expected b/csharp/ql/test/library-tests/csharp8/PrintAst.expected index d0c199b74bd..714099744c3 100644 --- a/csharp/ql/test/library-tests/csharp8/PrintAst.expected +++ b/csharp/ql/test/library-tests/csharp8/PrintAst.expected @@ -13,26 +13,6 @@ AlternateInterpolatedStrings.cs: # 6| 1: [IntLiteral] 12 # 6| 1: [FieldAccess] access to field s2 AsyncStreams.cs: -# 24| [NamespaceDeclaration] namespace ... { ... } -# 26| 1: [Interface] IAsyncDisposable -# 28| 4: [Method] DisposeAsync -# 32| [NamespaceDeclaration] namespace ... { ... } -# 34| 1: [Interface] IAsyncEnumerable<> -#-----| 1: (Type parameters) -# 34| 0: [TypeParameter] T -# 36| 4: [Method] GetAsyncEnumerator -#-----| 2: (Parameters) -# 36| 0: [Parameter] cancellationToken -# 36| 1: [DefaultValueExpr] default(...) -# 36| 0: [TypeAccess] access to type CancellationToken -# 39| 2: [Interface] IAsyncEnumerator<> -#-----| 1: (Type parameters) -# 39| 0: [TypeParameter] T -#-----| 3: (Base types) -# 39| 1: [Interface] IAsyncDisposable -# 41| 4: [Property] Current -# 41| 3: [Getter] get_Current -# 42| 5: [Method] MoveNextAsync # 8| [Class] AsyncStreams # 10| 5: [Method] Items # 10| 4: [BlockStmt] {...} @@ -56,6 +36,26 @@ AsyncStreams.cs: # 20| 0: [MethodCall] call to method WriteLine # 20| -1: [TypeAccess] access to type Console # 20| 0: [LocalVariableAccess] access to local variable item +# 24| [NamespaceDeclaration] namespace ... { ... } +# 26| 1: [Interface] IAsyncDisposable +# 28| 4: [Method] DisposeAsync +# 32| [NamespaceDeclaration] namespace ... { ... } +# 34| 1: [Interface] IAsyncEnumerable<> +#-----| 1: (Type parameters) +# 34| 0: [TypeParameter] T +# 36| 4: [Method] GetAsyncEnumerator +#-----| 2: (Parameters) +# 36| 0: [Parameter] cancellationToken +# 36| 1: [DefaultValueExpr] default(...) +# 36| 0: [TypeAccess] access to type CancellationToken +# 39| 2: [Interface] IAsyncEnumerator<> +#-----| 1: (Type parameters) +# 39| 0: [TypeParameter] T +#-----| 3: (Base types) +# 39| 1: [Interface] IAsyncDisposable +# 41| 4: [Property] Current +# 41| 3: [Getter] get_Current +# 42| 5: [Method] MoveNextAsync DefaultInterfaceMethods.cs: # 3| [Interface] IPerson # 5| 4: [IndexerProperty] Name @@ -332,7 +332,6 @@ NullableRefTypes.cs: # 129| 14: [Field] j # 130| 15: [Field] k # 131| 16: [Field] l -# 165| [Struct] MyStruct # 136| [Class] ToStringWithTypes2 # 138| 5: [Field] a # 139| 6: [Field] b @@ -361,6 +360,7 @@ NullableRefTypes.cs: # 160| 1: [LocalVariableAccess] access to local variable a # 161| 1: [ReturnStmt] return ...; # 161| 0: [LocalVariableAccess] access to local variable a +# 165| [Struct] MyStruct # 171| [Class] TestNullableFlowStates # 173| 5: [Method] MaybeNull # 175| 6: [Method] Check @@ -938,51 +938,6 @@ patterns.cs: # 164| 9: [Method] Deconstruct # 165| 4: [BlockStmt] {...} ranges.cs: -# 25| [NamespaceDeclaration] namespace ... { ... } -# 27| 1: [Struct] Index -# 29| 5: [InstanceConstructor] Index -#-----| 2: (Parameters) -# 29| 0: [Parameter] value -# 29| 1: [Parameter] fromEnd -# 29| 1: [BoolLiteral] false -# 29| 4: [BlockStmt] {...} -# 30| 6: [ImplicitConversionOperator] implicit conversion -#-----| 2: (Parameters) -# 30| 0: [Parameter] value -# 30| 4: [DefaultValueExpr] default(...) -# 30| 0: [TypeAccess] access to type Index -# 33| 2: [Struct] Range -# 35| 5: [InstanceConstructor] Range -#-----| 2: (Parameters) -# 35| 0: [Parameter] start -# 35| 1: [Parameter] end -# 35| 4: [ThrowExpr] throw ... -# 35| 0: [NullLiteral] null -# 36| 6: [Method] StartAt -#-----| 2: (Parameters) -# 36| 0: [Parameter] start -# 36| 4: [ThrowExpr] throw ... -# 36| 0: [NullLiteral] null -# 37| 7: [Method] EndAt -#-----| 2: (Parameters) -# 37| 0: [Parameter] end -# 37| 4: [ThrowExpr] throw ... -# 37| 0: [NullLiteral] null -# 38| 8: [Property] All -# 38| 3: [Getter] get_All -# 38| 4: [ThrowExpr] throw ... -# 38| 0: [NullLiteral] null -# 39| 9: [Method] Create -#-----| 2: (Parameters) -# 39| 0: [Parameter] start -# 39| 1: [Parameter] end -# 39| 4: [ThrowExpr] throw ... -# 39| 0: [NullLiteral] null -# 40| 10: [ImplicitConversionOperator] implicit conversion -#-----| 2: (Parameters) -# 40| 0: [Parameter] r -# 40| 4: [ThrowExpr] throw ... -# 40| 0: [NullLiteral] null # 5| [Class] Ranges # 7| 5: [Method] F # 8| 4: [BlockStmt] {...} @@ -1090,3 +1045,48 @@ ranges.cs: # 20| 1: [OperatorCall] call to operator implicit conversion # 20| 0: [RangeExpr] ... .. ... # 20| 1: [LocalVariableAccess] access to local variable slice8 +# 25| [NamespaceDeclaration] namespace ... { ... } +# 27| 1: [Struct] Index +# 29| 5: [InstanceConstructor] Index +#-----| 2: (Parameters) +# 29| 0: [Parameter] value +# 29| 1: [Parameter] fromEnd +# 29| 1: [BoolLiteral] false +# 29| 4: [BlockStmt] {...} +# 30| 6: [ImplicitConversionOperator] implicit conversion +#-----| 2: (Parameters) +# 30| 0: [Parameter] value +# 30| 4: [DefaultValueExpr] default(...) +# 30| 0: [TypeAccess] access to type Index +# 33| 2: [Struct] Range +# 35| 5: [InstanceConstructor] Range +#-----| 2: (Parameters) +# 35| 0: [Parameter] start +# 35| 1: [Parameter] end +# 35| 4: [ThrowExpr] throw ... +# 35| 0: [NullLiteral] null +# 36| 6: [Method] StartAt +#-----| 2: (Parameters) +# 36| 0: [Parameter] start +# 36| 4: [ThrowExpr] throw ... +# 36| 0: [NullLiteral] null +# 37| 7: [Method] EndAt +#-----| 2: (Parameters) +# 37| 0: [Parameter] end +# 37| 4: [ThrowExpr] throw ... +# 37| 0: [NullLiteral] null +# 38| 8: [Property] All +# 38| 3: [Getter] get_All +# 38| 4: [ThrowExpr] throw ... +# 38| 0: [NullLiteral] null +# 39| 9: [Method] Create +#-----| 2: (Parameters) +# 39| 0: [Parameter] start +# 39| 1: [Parameter] end +# 39| 4: [ThrowExpr] throw ... +# 39| 0: [NullLiteral] null +# 40| 10: [ImplicitConversionOperator] implicit conversion +#-----| 2: (Parameters) +# 40| 0: [Parameter] r +# 40| 4: [ThrowExpr] throw ... +# 40| 0: [NullLiteral] null diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 81135dbd04c..cea66cac21a 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -41,8 +41,66 @@ public class FileExtractor { public static final Pattern JSON_OBJECT_START = Pattern.compile("^(?s)\\s*\\{\\s*\"([^\"]|\\\\.)*\"\\s*:.*"); - /** The charset for decoding UTF-8 strings. */ - private static final Charset UTF8_CHARSET = Charset.forName("UTF-8"); + /** + * Returns true if the byte sequence contains invalid UTF-8 or unprintable ASCII characters. + */ + private static boolean hasUnprintableUtf8(byte[] bytes, int length) { + // Constants for bytes with N high-order 1-bits. + // They are typed as `int` as the subsequent byte-to-int promotion would + // otherwise fill the high-order `int` bits with 1s. + final int high1 = 0b10000000; + final int high2 = 0b11000000; + final int high3 = 0b11100000; + final int high4 = 0b11110000; + final int high5 = 0b11111000; + + int startIndex = skipBOM(bytes, length); + for (int i = startIndex; i < length; ++i) { + int b = bytes[i]; + if ((b & high1) == 0) { // 0xxxxxxx is an ASCII character + // ASCII values 0-31 are unprintable, except 9-13 are whitespace. + // 127 is the unprintable DEL character. + if (b <= 8 || 14 <= b && b <= 31 || b == 127) { + return true; + } + } else { + // Check for malformed UTF-8 multibyte code point + int trailingBytes = 0; + if ((b & high3) == high2) { + trailingBytes = 1; // 110xxxxx 10xxxxxx + } else if ((b & high4) == high3) { + trailingBytes = 2; // 1110xxxx 10xxxxxx 10xxxxxx + } else if ((b & high5) == high4) { + trailingBytes = 3; // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + } else { + return true; // 10xxxxxx and 11111xxx are not valid here. + } + // Trailing bytes must be of form 10xxxxxx + while (trailingBytes > 0) { + ++i; + --trailingBytes; + if (i >= length) { + return false; + } + if ((bytes[i] & high2) != high1) { + return true; + } + } + } + } + return false; + } + + /** Returns the index after the initial BOM, if any, otherwise 0. */ + private static int skipBOM(byte[] bytes, int length) { + if (length >= 2 + && (bytes[0] == (byte) 0xfe && bytes[1] == (byte) 0xff + || bytes[0] == (byte) 0xff && bytes[1] == (byte) 0xfe)) { + return 2; + } else { + return 0; + } + } /** Information about supported file types. */ public static enum FileType { @@ -66,6 +124,10 @@ public class FileExtractor { @Override protected boolean contains(File f, String lcExt, ExtractorConfig config) { + if (isBinaryFile(f, lcExt, config)) { + return false; + } + if (super.contains(f, lcExt, config)) return true; // detect Node.js scripts that are meant to be run from @@ -90,6 +152,32 @@ public class FileExtractor { public String toString() { return "javascript"; } + + /** Number of bytes to read from the beginning of a ".js" file to detect if it is a binary file. */ + private static final int fileHeaderSize = 128; + + /** Computes if `f` is a binary file based on whether the initial `fileHeaderSize` bytes are printable UTF-8 chars. */ + private boolean isBinaryFile(File f, String lcExt, ExtractorConfig config) { + if (!config.getDefaultEncoding().equals(StandardCharsets.UTF_8.name())) { + return false; + } + try (FileInputStream fis = new FileInputStream(f)) { + byte[] bytes = new byte[fileHeaderSize]; + int length = fis.read(bytes); + + if (length == -1) return false; + + // Avoid invalid or unprintable UTF-8 files. + if (hasUnprintableUtf8(bytes, length)) { + return true; + } + + return false; + } catch (IOException e) { + Exceptions.ignore(e, "Let extractor handle this one."); + } + return false; + } }, JSON(".json") { @@ -160,7 +248,7 @@ public class FileExtractor { if (length == -1) return false; // Avoid invalid or unprintable UTF-8 files. - if (config.getDefaultEncoding().equals("UTF-8") && hasUnprintableUtf8(bytes, length)) { + if (config.getDefaultEncoding().equals(StandardCharsets.UTF_8.name()) && hasUnprintableUtf8(bytes, length)) { return true; } @@ -182,17 +270,6 @@ public class FileExtractor { return false; } - /** Returns the index after the initial BOM, if any, otherwise 0. */ - private int skipBOM(byte[] bytes, int length) { - if (length >= 2 - && (bytes[0] == (byte) 0xfe && bytes[1] == (byte) 0xff - || bytes[0] == (byte) 0xff && bytes[1] == (byte) 0xfe)) { - return 2; - } else { - return 0; - } - } - private boolean isXml(byte[] bytes, int length) { int startIndex = skipBOM(bytes, length); // Check for `<` encoded in Ascii/UTF-8 or litte-endian UTF-16. @@ -211,56 +288,6 @@ public class FileExtractor { return s.startsWith("! TOUCHSTONE file ") || s.startsWith("[Version] 2.0"); } - /** - * Returns true if the byte sequence contains invalid UTF-8 or unprintable ASCII characters. - */ - private boolean hasUnprintableUtf8(byte[] bytes, int length) { - // Constants for bytes with N high-order 1-bits. - // They are typed as `int` as the subsequent byte-to-int promotion would - // otherwise fill the high-order `int` bits with 1s. - final int high1 = 0b10000000; - final int high2 = 0b11000000; - final int high3 = 0b11100000; - final int high4 = 0b11110000; - final int high5 = 0b11111000; - - int startIndex = skipBOM(bytes, length); - for (int i = startIndex; i < length; ++i) { - int b = bytes[i]; - if ((b & high1) == 0) { // 0xxxxxxx is an ASCII character - // ASCII values 0-31 are unprintable, except 9-13 are whitespace. - // 127 is the unprintable DEL character. - if (b <= 8 || 14 <= b && b <= 31 || b == 127) { - return true; - } - } else { - // Check for malformed UTF-8 multibyte code point - int trailingBytes = 0; - if ((b & high3) == high2) { - trailingBytes = 1; // 110xxxxx 10xxxxxx - } else if ((b & high4) == high3) { - trailingBytes = 2; // 1110xxxx 10xxxxxx 10xxxxxx - } else if ((b & high5) == high4) { - trailingBytes = 3; // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - } else { - return true; // 10xxxxxx and 11111xxx are not valid here. - } - // Trailing bytes must be of form 10xxxxxx - while (trailingBytes > 0) { - ++i; - --trailingBytes; - if (i >= length) { - return false; - } - if ((bytes[i] & high2) != high1) { - return true; - } - } - } - } - return false; - } - /** * Returns true if the byte sequence starts with a shebang line that is not recognized as a * JavaScript interpreter. @@ -288,7 +315,7 @@ public class FileExtractor { // Extract the shebang text int startOfText = startIndex + "#!".length(); int lengthOfText = endOfLine - startOfText; - String text = new String(bytes, startOfText, lengthOfText, UTF8_CHARSET); + String text = new String(bytes, startOfText, lengthOfText, StandardCharsets.UTF_8); // Check if the shebang is a recognized JavaScript intepreter. return !NODE_INVOCATION.matcher(text).find(); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/Main.java b/javascript/extractor/src/com/semmle/js/extractor/Main.java index bb34dfbe2be..e29140f9181 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/Main.java +++ b/javascript/extractor/src/com/semmle/js/extractor/Main.java @@ -43,7 +43,7 @@ public class Main { * A version identifier that should be updated every time the extractor changes in such a way that * it may produce different tuples for the same file under the same {@link ExtractorConfig}. */ - public static final String EXTRACTOR_VERSION = "2020-08-19"; + public static final String EXTRACTOR_VERSION = "2020-08-20-2"; public static final Pattern NEWLINE = Pattern.compile("\n"); diff --git a/javascript/ql/examples/queries/dataflow/StoredXss/StoredXssTrackedNode.ql b/javascript/ql/examples/queries/dataflow/StoredXss/StoredXssTypeTracking.ql similarity index 66% rename from javascript/ql/examples/queries/dataflow/StoredXss/StoredXssTrackedNode.ql rename to javascript/ql/examples/queries/dataflow/StoredXss/StoredXssTypeTracking.ql index 70e79b2220e..340ae61f7f0 100644 --- a/javascript/ql/examples/queries/dataflow/StoredXss/StoredXssTrackedNode.ql +++ b/javascript/ql/examples/queries/dataflow/StoredXss/StoredXssTypeTracking.ql @@ -15,18 +15,18 @@ import DataFlow::PathGraph /** * An instance of `mysql.createConnection()`, tracked globally. */ -class MysqlConnection extends TrackedNode { - MysqlConnection() { this = moduleImport("mysql").getAMemberCall("createConnection") } - - /** - * Gets a call to the `query` method on this connection object. - */ - MethodCallNode getAQueryCall() { - this.flowsTo(result.getReceiver()) and - result.getMethodName() = "query" - } +DataFlow::SourceNode mysqlConnection(DataFlow::TypeTracker t) { + t.start() and + result = moduleImport("mysql").getAMemberCall("createConnection") + or + exists(DataFlow::TypeTracker t2 | result = mysqlConnection(t2).track(t2, t)) } +/** + * An instance of `mysql.createConnection()`, tracked globally. + */ +DataFlow::SourceNode mysqlConnection() { result = mysqlConnection(DataFlow::TypeTracker::end()) } + /** * Data returned from a MySQL query. * @@ -42,7 +42,7 @@ class MysqlConnection extends TrackedNode { * ``` */ class MysqlSource extends StoredXss::Source { - MysqlSource() { this = any(MysqlConnection con).getAQueryCall().getCallback(1).getParameter(1) } + MysqlSource() { this = mysqlConnection().getAMethodCall("query").getCallback(1).getParameter(1) } } from StoredXss::Configuration cfg, PathNode source, PathNode sink diff --git a/javascript/ql/src/semmle/javascript/NodeJS.qll b/javascript/ql/src/semmle/javascript/NodeJS.qll index 43fbb256998..64fd9a8d47a 100644 --- a/javascript/ql/src/semmle/javascript/NodeJS.qll +++ b/javascript/ql/src/semmle/javascript/NodeJS.qll @@ -162,6 +162,17 @@ private predicate isRequire(DataFlow::Node nd) { not nd.getFile().getExtension() = "mjs" or isRequire(nd.getAPredecessor()) + or + // `import { createRequire } from 'module';` support. + // specialized to ES2015 modules to avoid recursion in the `DataFlow::moduleImport()` predicate. + exists(ImportDeclaration imp | imp.getImportedPath().getValue() = "module" | + nd = + imp + .getImportedModuleNode() + .(DataFlow::SourceNode) + .getAPropertyRead("createRequire") + .getACall() + ) } /** diff --git a/javascript/ql/src/semmle/javascript/Promises.qll b/javascript/ql/src/semmle/javascript/Promises.qll index c45b250dd65..a893c185828 100644 --- a/javascript/ql/src/semmle/javascript/Promises.qll +++ b/javascript/ql/src/semmle/javascript/Promises.qll @@ -455,6 +455,49 @@ private class PromiseTaintStep extends TaintTracking::AdditionalTaintStep { } } +/** + * Defines flow steps for return on async functions. + */ +private module AsyncReturnSteps { + private predicate valueProp = Promises::valueProp/0; + + private predicate errorProp = Promises::errorProp/0; + + private import semmle.javascript.dataflow.internal.FlowSteps + + /** + * A data-flow step for ordinary and exceptional returns from async functions. + */ + private class AsyncReturn extends PreCallGraphStep { + override predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop) { + exists(DataFlow::FunctionNode f | f.getFunction().isAsync() | + // ordinary return + prop = valueProp() and + pred = f.getAReturn() and + succ = f.getReturnNode() + or + // exceptional return + prop = errorProp() and + localExceptionStepWithAsyncFlag(pred, succ, true) + ) + } + } + + /** + * A data-flow step for ordinary return from an async function in a taint configuration. + */ + private class AsyncTaintReturn extends TaintTracking::AdditionalTaintStep, DataFlow::FunctionNode { + Function f; + + AsyncTaintReturn() { this.getFunction() = f and f.isAsync() } + + override predicate step(DataFlow::Node pred, DataFlow::Node succ) { + returnExpr(f, pred, _) and + succ.(DataFlow::FunctionReturnNode).getFunction() = f + } + } +} + /** * Provides classes for working with the `bluebird` library (http://bluebirdjs.com). */ diff --git a/javascript/ql/src/semmle/javascript/dataflow/Configuration.qll b/javascript/ql/src/semmle/javascript/dataflow/Configuration.qll index 203b282120f..8697d95a6b2 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/Configuration.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/Configuration.qll @@ -982,8 +982,8 @@ private predicate appendStep( private predicate flowThroughCall( DataFlow::Node input, DataFlow::Node output, DataFlow::Configuration cfg, PathSummary summary ) { - exists(Function f, DataFlow::ValueNode ret | - ret.asExpr() = f.getAReturnedExpr() and + exists(Function f, DataFlow::FunctionReturnNode ret | + ret.getFunction() = f and (calls(output, f) or callsBound(output, f, _)) and // Do not consider partial calls reachableFromInput(f, output, input, ret, cfg, summary) and not isBarrierEdge(cfg, ret, output) and @@ -1000,6 +1000,17 @@ private predicate flowThroughCall( not isLabeledBarrierEdge(cfg, ret, output, summary.getEndLabel()) and not cfg.isLabeledBarrier(output, summary.getEndLabel()) ) + or + // exception thrown inside an immediately awaited function call. + exists(DataFlow::FunctionNode f, DataFlow::Node invk, DataFlow::Node ret | + f.getFunction().isAsync() + | + (calls(invk, f.getFunction()) or callsBound(invk, f.getFunction(), _)) and + reachableFromInput(f.getFunction(), invk, input, ret, cfg, summary) and + output = invk.asExpr().getExceptionTarget() and + f.getExceptionalReturn() = getThrowTarget(ret) and + invk = getAwaitOperand(_) + ) } /** @@ -1019,10 +1030,16 @@ private predicate storeStep( isAdditionalStoreStep(pred, succ, prop, cfg) and summary = PathSummary::level() or - exists(Function f, DataFlow::Node mid | + exists(Function f, DataFlow::Node mid, DataFlow::Node invk | + not f.isAsync() and invk = succ + or + // store in an immediately awaited function call + f.isAsync() and + invk = getAwaitOperand(succ) + | // `f` stores its parameter `pred` in property `prop` of a value that flows back to the caller, // and `succ` is an invocation of `f` - reachableFromInput(f, succ, pred, mid, cfg, summary) and + reachableFromInput(f, invk, pred, mid, cfg, summary) and ( returnedPropWrite(f, _, prop, mid) or @@ -1030,12 +1047,22 @@ private predicate storeStep( isAdditionalStoreStep(mid, base, prop, cfg) ) or - succ instanceof DataFlow::NewNode and + invk instanceof DataFlow::NewNode and receiverPropWrite(f, prop, mid) ) ) } +/** + * Gets a dataflow-node for the operand of the await-expression `await`. + */ +private DataFlow::Node getAwaitOperand(DataFlow::Node await) { + exists(AwaitExpr awaitExpr | + result = awaitExpr.getOperand().getUnderlyingValue().flow() and + await.asExpr() = awaitExpr + ) +} + /** * Holds if `f` may `read` property `prop` of parameter `parm`. */ @@ -1128,8 +1155,14 @@ private predicate loadStep( isAdditionalLoadStep(pred, succ, prop, cfg) and summary = PathSummary::level() or - exists(Function f, DataFlow::Node read | - parameterPropRead(f, succ, pred, prop, read, cfg) and + exists(Function f, DataFlow::Node read, DataFlow::Node invk | + not f.isAsync() and invk = succ + or + // load from an immediately awaited function call + f.isAsync() and + invk = getAwaitOperand(succ) + | + parameterPropRead(f, invk, pred, prop, read, cfg) and reachesReturn(f, read, cfg, summary) ) } @@ -1624,6 +1657,9 @@ class MidPathNode extends PathNode, MkMidNode { // Skip the exceptional return on functions, as this highlights the entire function. nd = any(DataFlow::FunctionNode f).getExceptionalReturn() or + // Skip the special return node for functions, as this highlights the entire function (and the returned expr is the previous node). + nd = any(DataFlow::FunctionNode f).getReturnNode() + or // Skip the synthetic 'this' node, as a ThisExpr will be the next node anyway nd = DataFlow::thisNode(_) or diff --git a/javascript/ql/src/semmle/javascript/dataflow/DataFlow.qll b/javascript/ql/src/semmle/javascript/dataflow/DataFlow.qll index 62d6849f868..f1525ad75a2 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/DataFlow.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/DataFlow.qll @@ -922,6 +922,32 @@ module DataFlow { override File getFile() { result = function.getFile() } } + /** + * A data flow node representing the values returned by a function. + */ + class FunctionReturnNode extends DataFlow::Node, TFunctionReturnNode { + Function function; + + FunctionReturnNode() { this = TFunctionReturnNode(function) } + + override string toString() { result = "return of " + function.describe() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + function.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + override BasicBlock getBasicBlock() { result = function.(ExprOrStmt).getBasicBlock() } + + /** + * Gets the function corresponding to this return node. + */ + Function getFunction() { result = function } + + override File getFile() { result = function.getFile() } + } + /** * A data flow node representing the exceptions thrown by the callee of an invocation. */ @@ -1265,6 +1291,13 @@ module DataFlow { nd = TExceptionalFunctionReturnNode(function) } + /** + * INTERNAL: Use `FunctionReturnNode` instead. + */ + predicate functionReturnNode(DataFlow::Node nd, Function function) { + nd = TFunctionReturnNode(function) + } + /** * Gets the data flow node corresponding the given l-value expression, if * such a node exists. @@ -1460,6 +1493,11 @@ module DataFlow { localCall(succExpr, f) ) ) + or + // from returned expr to the FunctionReturnNode. + exists(Function f | not f.isAsync() | + DataFlow::functionReturnNode(succ, f) and pred = valueNode(f.getAReturnedExpr()) + ) } /** diff --git a/javascript/ql/src/semmle/javascript/dataflow/Nodes.qll b/javascript/ql/src/semmle/javascript/dataflow/Nodes.qll index aeb69f25179..b8327265f70 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/Nodes.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/Nodes.qll @@ -491,6 +491,16 @@ class FunctionNode extends DataFlow::ValueNode, DataFlow::SourceNode { DataFlow::ExceptionalFunctionReturnNode getExceptionalReturn() { DataFlow::exceptionalFunctionReturnNode(result, astNode) } + + /** + * Gets the data flow node representing the value returned from this function. + * + * Note that this differs from `getAReturn()`, in that every function has exactly + * one canonical return node, but may have multiple (or zero) returned expressions. + * The result of `getAReturn()` is always a predecessor of `getReturnNode()` + * in the data-flow graph. + */ + DataFlow::FunctionReturnNode getReturnNode() { DataFlow::functionReturnNode(result, astNode) } } /** diff --git a/javascript/ql/src/semmle/javascript/dataflow/Sources.qll b/javascript/ql/src/semmle/javascript/dataflow/Sources.qll index 8b54b5029df..c3017585c71 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/Sources.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/Sources.qll @@ -2,7 +2,7 @@ * Provides support for intra-procedural tracking of a customizable * set of data flow nodes. * - * Note that unlike `TrackedNodes`, this library only performs + * Note that unlike `TypeTracking.qll`, this library only performs * local tracking within a function. */ @@ -319,6 +319,9 @@ module SourceNode { this = DataFlow::destructuredModuleImportNode(_) or this = DataFlow::globalAccessPathRootPseudoNode() + or + // Include return nodes because they model the implicit Promise creation in async functions. + DataFlow::functionReturnNode(this, _) } } } diff --git a/javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll b/javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll index beacfb554e6..14f83514ae3 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll @@ -1,4 +1,52 @@ /** + * DEPRECATED: Use `TypeTracking.qll` instead. + * + * The following `TrackedNode` usage is usually equivalent to the type tracking usage below. + * + * ``` + * class MyTrackedNode extends TrackedNode { + * MyTrackedNode() { isInteresting(this) } + * } + * + * DataFlow::Node getMyTrackedNodeLocation(MyTrackedNode n) { + * n.flowsTo(result) + * } + * ``` + * + * ``` + * DataFlow::SourceNode getMyTrackedNodeLocation(DataFlow::SourceNode start, DataFlow::TypeTracker t) { + * t.start() and + * isInteresting(result) and + * result = start + * or + * exists (DataFlow::TypeTracker t2 | + * result = getMyTrackedNodeLocation(start, t2).track(t2, t) + * ) + * } + * + * DataFlow::SourceNode getMyTrackedNodeLocation(DataFlow::SourceNode n) { + * result = getMyTrackedNodeLocation(n, DataFlow::TypeTracker::end()) + * } + * ``` + * + * In rare cases, additional tracking is required, for instance when tracking string constants, and the following type tracking formulation is required instead. + * + * ``` + * DataFlow::Node getMyTrackedNodeLocation(DataFlow::Node start, DataFlow::TypeTracker t) { + * t.start() and + * isInteresting(result) and + * result = start + * or + * exists(DataFlow::TypeTracker t2 | + * t = t2.smallstep(getMyTrackedNodeLocation(start, t2), result) + * ) + * } + * + * DataFlow::Node getMyTrackedNodeLocation(DataFlow::Node n) { + * result = getMyTrackedNodeLocation(n, DataFlow::TypeTracker::end()) + * } + * ``` + * * Provides support for inter-procedural tracking of a customizable * set of data flow nodes. */ @@ -12,7 +60,7 @@ private import internal.FlowSteps as FlowSteps * To track additional values, extends this class with additional * subclasses. */ -abstract class TrackedNode extends DataFlow::Node { +abstract deprecated class TrackedNode extends DataFlow::Node { /** * Holds if this node flows into `sink` in zero or more (possibly * inter-procedural) steps. @@ -26,7 +74,7 @@ abstract class TrackedNode extends DataFlow::Node { * To track additional expressions, extends this class with additional * subclasses. */ -abstract class TrackedExpr extends Expr { +abstract deprecated class TrackedExpr extends Expr { predicate flowsTo(Expr sink) { exists(TrackedExprNode ten | ten.asExpr() = this | ten.flowsTo(DataFlow::valueNode(sink))) } @@ -35,7 +83,7 @@ abstract class TrackedExpr extends Expr { /** * Turn all `TrackedExpr`s into `TrackedNode`s. */ -private class TrackedExprNode extends TrackedNode { +deprecated private class TrackedExprNode extends TrackedNode { TrackedExprNode() { asExpr() instanceof TrackedExpr } } @@ -64,7 +112,9 @@ private module NodeTracking { * * Summary steps through function calls are not taken into account. */ - private predicate basicFlowStep(DataFlow::Node pred, DataFlow::Node succ, PathSummary summary) { + deprecated private predicate basicFlowStep( + DataFlow::Node pred, DataFlow::Node succ, PathSummary summary + ) { isRelevant(pred) and ( // Local flow @@ -94,7 +144,7 @@ private module NodeTracking { * * No call/return matching is done, so this is a relatively coarse over-approximation. */ - private predicate isRelevant(DataFlow::Node nd) { + deprecated private predicate isRelevant(DataFlow::Node nd) { nd instanceof TrackedNode or exists(DataFlow::Node mid | isRelevant(mid) | @@ -115,7 +165,7 @@ private module NodeTracking { * either `pred` is an argument of `f` and `succ` the corresponding parameter, or * `pred` is a variable definition whose value is captured by `f` at `succ`. */ - private predicate callInputStep( + deprecated private predicate callInputStep( Function f, DataFlow::Node invk, DataFlow::Node pred, DataFlow::Node succ ) { isRelevant(pred) and @@ -136,7 +186,7 @@ private module NodeTracking { * that is captured by `f`, may flow to `nd` (possibly through callees, but not containing * any unmatched calls or returns) along a path summarized by `summary`. */ - private predicate reachableFromInput( + deprecated private predicate reachableFromInput( Function f, DataFlow::Node invk, DataFlow::Node input, DataFlow::Node nd, PathSummary summary ) { callInputStep(f, invk, input, nd) and @@ -154,7 +204,7 @@ private module NodeTracking { * Holds if `nd` may flow into a return statement of `f` * (possibly through callees) along a path summarized by `summary`. */ - private predicate reachesReturn(Function f, DataFlow::Node nd, PathSummary summary) { + deprecated private predicate reachesReturn(Function f, DataFlow::Node nd, PathSummary summary) { returnExpr(f, nd, _) and summary = PathSummary::level() or @@ -170,7 +220,7 @@ private module NodeTracking { * which is either an argument or a definition captured by the function, flows, * possibly through callees. */ - private predicate flowThroughCall(DataFlow::Node input, DataFlow::Node output) { + deprecated private predicate flowThroughCall(DataFlow::Node input, DataFlow::Node output) { exists(Function f, DataFlow::ValueNode ret | ret.asExpr() = f.getAReturnedExpr() and reachableFromInput(f, output, input, ret, _) @@ -187,13 +237,13 @@ private module NodeTracking { /** * Holds if `pred` may flow into property `prop` of `succ` along a path summarized by `summary`. */ - private predicate storeStep( + deprecated private predicate storeStep( DataFlow::Node pred, DataFlow::SourceNode succ, string prop, PathSummary summary ) { basicStoreStep(pred, succ, prop) and summary = PathSummary::level() or - exists(Function f, DataFlow::Node mid | + exists(Function f, DataFlow::Node mid | not f.isAsync() | // `f` stores its parameter `pred` in property `prop` of a value that flows back to the caller, // and `succ` is an invocation of `f` reachableFromInput(f, succ, pred, mid, summary) and @@ -210,13 +260,13 @@ private module NodeTracking { * Holds if property `prop` of `pred` may flow into `succ` along a path summarized by * `summary`. */ - private predicate loadStep( + deprecated private predicate loadStep( DataFlow::Node pred, DataFlow::Node succ, string prop, PathSummary summary ) { basicLoadStep(pred, succ, prop) and summary = PathSummary::level() or - exists(Function f, DataFlow::SourceNode parm | + exists(Function f, DataFlow::SourceNode parm | not f.isAsync() | argumentPassing(succ, pred, f, parm) and reachesReturn(f, parm.getAPropertyRead(prop), summary) ) @@ -226,7 +276,7 @@ private module NodeTracking { * Holds if `rhs` is the right-hand side of a write to property `prop`, and `nd` is reachable * from the base of that write (possibly through callees) along a path summarized by `summary`. */ - private predicate reachableFromStoreBase( + deprecated private predicate reachableFromStoreBase( string prop, DataFlow::Node rhs, DataFlow::Node nd, PathSummary summary ) { storeStep(rhs, nd, prop, summary) @@ -244,7 +294,7 @@ private module NodeTracking { * * In other words, `pred` may flow to `succ` through a property. */ - private predicate flowThroughProperty( + deprecated private predicate flowThroughProperty( DataFlow::Node pred, DataFlow::Node succ, PathSummary summary ) { exists(string prop, DataFlow::Node base, PathSummary oldSummary, PathSummary newSummary | @@ -259,7 +309,7 @@ private module NodeTracking { * invokes `cb`, passing `arg` as its `i`th argument. `arg` flows along a path summarized * by `summary`, while `cb` is only tracked locally. */ - private predicate summarizedHigherOrderCall( + deprecated private predicate summarizedHigherOrderCall( DataFlow::Node arg, DataFlow::Node cb, int i, PathSummary summary ) { exists( @@ -293,7 +343,7 @@ private module NodeTracking { * Alternatively, the callback can flow into a call `f(callback)` which itself provides the `arg`. * That is, `arg` refers to a value defined in `f` or one of its callees. */ - predicate higherOrderCall( + deprecated predicate higherOrderCall( DataFlow::Node arg, DataFlow::SourceNode callback, int i, PathSummary summary ) { // Summarized call @@ -328,7 +378,7 @@ private module NodeTracking { * of `cb`. `arg` flows along a path summarized by `summary`, while `cb` is only tracked * locally. */ - private predicate flowIntoHigherOrderCall( + deprecated private predicate flowIntoHigherOrderCall( DataFlow::Node pred, DataFlow::Node succ, PathSummary summary ) { exists(DataFlow::FunctionNode cb, int i, PathSummary oldSummary | @@ -341,7 +391,9 @@ private module NodeTracking { /** * Holds if there is a flow step from `pred` to `succ` described by `summary`. */ - private predicate flowStep(DataFlow::Node pred, DataFlow::Node succ, PathSummary summary) { + deprecated private predicate flowStep( + DataFlow::Node pred, DataFlow::Node succ, PathSummary summary + ) { basicFlowStep(pred, succ, summary) or // Flow through a function that returns a value that depends on one of its arguments @@ -360,7 +412,7 @@ private module NodeTracking { * Holds if there is a path from `source` to `nd` along a path summarized by * `summary`. */ - predicate flowsTo(TrackedNode source, DataFlow::Node nd, PathSummary summary) { + deprecated predicate flowsTo(TrackedNode source, DataFlow::Node nd, PathSummary summary) { source = nd and summary = PathSummary::level() or diff --git a/javascript/ql/src/semmle/javascript/dataflow/TypeTracking.qll b/javascript/ql/src/semmle/javascript/dataflow/TypeTracking.qll index 4ed89a97f3a..0cf65bb97f5 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/TypeTracking.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/TypeTracking.qll @@ -1,7 +1,7 @@ /** * Provides the `TypeTracker` class for tracking types interprocedurally. * - * This provides an alternative to `DataFlow::TrackedNode` and `AbstractValue` + * This provides an alternative to `AbstractValue` * for tracking certain types interprocedurally without computing which source * a given value came from. */ diff --git a/javascript/ql/src/semmle/javascript/dataflow/internal/DataFlowNode.qll b/javascript/ql/src/semmle/javascript/dataflow/internal/DataFlowNode.qll index 9c14fbb7656..f343f0a8cd5 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/internal/DataFlowNode.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/internal/DataFlowNode.qll @@ -27,6 +27,7 @@ newtype TNode = exists(decl.getASpecifier().getImportedName()) } or THtmlAttributeNode(HTML::Attribute attr) or + TFunctionReturnNode(Function f) or TExceptionalFunctionReturnNode(Function f) or TExceptionalInvocationReturnNode(InvokeExpr e) or TGlobalAccessPathRoot() diff --git a/javascript/ql/src/semmle/javascript/dataflow/internal/FlowSteps.qll b/javascript/ql/src/semmle/javascript/dataflow/internal/FlowSteps.qll index 988a1b7c059..57c7682963a 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/internal/FlowSteps.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/internal/FlowSteps.qll @@ -61,13 +61,40 @@ predicate localFlowStep( * Holds if an exception thrown from `pred` can propagate locally to `succ`. */ predicate localExceptionStep(DataFlow::Node pred, DataFlow::Node succ) { + localExceptionStepWithAsyncFlag(pred, succ, false) +} + +/** + * Holds if an exception thrown from `pred` can propagate locally to `succ`. + * + * The `async` flag is true if the step involves wrapping the exception in a rejected Promise. + */ +predicate localExceptionStepWithAsyncFlag(DataFlow::Node pred, DataFlow::Node succ, boolean async) { + exists(DataFlow::Node target | target = getThrowTarget(pred) | + async = false and + succ = target and + not succ = any(DataFlow::FunctionNode f | f.getFunction().isAsync()).getExceptionalReturn() + or + async = true and + exists(DataFlow::FunctionNode f | f.getExceptionalReturn() = target | + succ = f.getReturnNode() // returns a rejected promise - therefore using the ordinary return node. + ) + ) +} + +/** + * Gets the dataflow-node that an exception thrown at `thrower` will flow to. + * + * The predicate that all functions are not async. + */ +DataFlow::Node getThrowTarget(DataFlow::Node thrower) { exists(Expr expr | expr = any(ThrowStmt throw).getExpr() and - pred = expr.flow() + thrower = expr.flow() or - DataFlow::exceptionalInvocationReturnNode(pred, expr) + DataFlow::exceptionalInvocationReturnNode(thrower, expr) | - succ = expr.getExceptionTarget() + result = expr.getExceptionTarget() ) } @@ -213,14 +240,14 @@ private module CachedSteps { /** * Holds if there is a flow step from `pred` to `succ` through: - * - returning a value from a function call, or + * - returning a value from a function call (from the special `FunctionReturnNode`), or * - throwing an exception out of a function call, or * - the receiver flowing out of a constructor call. */ cached predicate returnStep(DataFlow::Node pred, DataFlow::Node succ) { exists(Function f | calls(succ, f) or callsBound(succ, f, _) | - returnExpr(f, pred, _) + DataFlow::functionReturnNode(pred, f) or succ instanceof DataFlow::NewNode and DataFlow::thisNode(pred, f) diff --git a/javascript/ql/test/library-tests/CallGraphs/FullTest/tests.expected b/javascript/ql/test/library-tests/CallGraphs/FullTest/tests.expected index 86cd32e4178..15587d7d5b3 100644 --- a/javascript/ql/test/library-tests/CallGraphs/FullTest/tests.expected +++ b/javascript/ql/test/library-tests/CallGraphs/FullTest/tests.expected @@ -67,6 +67,7 @@ test_getAFunctionValue | es2015.js:2:14:4:3 | () {\\n ... ");\\n } | es2015.js:2:14:4:3 | () {\\n ... ");\\n } | | es2015.js:6:5:6:16 | ExampleClass | es2015.js:2:14:4:3 | () {\\n ... ");\\n } | | es2015.js:8:2:12:1 | functio ... \\n };\\n} | es2015.js:8:2:12:1 | functio ... \\n };\\n} | +| es2015.js:8:2:12:1 | return of anonymous function | es2015.js:9:10:11:3 | () => { ... ();\\n } | | es2015.js:9:10:11:3 | () => { ... ();\\n } | es2015.js:9:10:11:3 | () => { ... ();\\n } | | es2015.js:10:5:10:20 | arguments.callee | es2015.js:8:2:12:1 | functio ... \\n };\\n} | | es2015.js:10:5:10:22 | arguments.callee() | es2015.js:9:10:11:3 | () => { ... ();\\n } | diff --git a/javascript/ql/test/library-tests/DataFlow/flowStep.expected b/javascript/ql/test/library-tests/DataFlow/flowStep.expected index cfb92ff6339..2f8e58861ad 100644 --- a/javascript/ql/test/library-tests/DataFlow/flowStep.expected +++ b/javascript/ql/test/library-tests/DataFlow/flowStep.expected @@ -17,10 +17,12 @@ | sources.js:1:6:1:6 | x | sources.js:1:11:1:11 | x | | sources.js:1:6:1:11 | x => x | sources.js:1:5:1:12 | (x => x) | | sources.js:1:11:1:11 | x | sources.js:1:1:1:12 | new (x => x) | +| sources.js:1:11:1:11 | x | sources.js:1:6:1:11 | return of anonymous function | | sources.js:3:2:5:1 | functio ... x+19;\\n} | sources.js:3:1:5:2 | (functi ... +19;\\n}) | | sources.js:3:11:3:11 | x | sources.js:3:11:3:11 | x | | sources.js:3:11:3:11 | x | sources.js:4:10:4:10 | x | | sources.js:4:10:4:13 | x+19 | sources.js:3:1:5:6 | (functi ... \\n})(23) | +| sources.js:4:10:4:13 | x+19 | sources.js:3:2:5:1 | return of anonymous function | | sources.js:5:4:5:5 | 23 | sources.js:3:11:3:11 | x | | sources.js:9:14:9:18 | array | sources.js:9:14:9:18 | array | | sources.js:9:14:9:18 | array | sources.js:10:19:10:23 | array | @@ -89,7 +91,9 @@ | tst.js:16:13:16:13 | a | tst.js:16:13:16:13 | a | | tst.js:16:13:16:13 | a | tst.js:18:12:18:12 | a | | tst.js:18:12:18:12 | a | tst.js:16:1:20:9 | (functi ... ("arg") | +| tst.js:18:12:18:12 | a | tst.js:16:2:20:1 | return of function f | | tst.js:19:10:19:11 | "" | tst.js:16:1:20:9 | (functi ... ("arg") | +| tst.js:19:10:19:11 | "" | tst.js:16:2:20:1 | return of function f | | tst.js:20:4:20:8 | "arg" | tst.js:16:13:16:13 | a | | tst.js:22:5:22:25 | readFileSync | tst.js:23:1:23:12 | readFileSync | | tst.js:22:7:22:18 | readFileSync | tst.js:22:5:22:25 | readFileSync | @@ -102,11 +106,13 @@ | tst.js:28:2:28:1 | x | tst.js:29:3:29:3 | x | | tst.js:28:2:29:3 | () =>\\n x | tst.js:28:1:30:1 | (() =>\\n ... ables\\n) | | tst.js:29:3:29:3 | x | tst.js:28:1:30:3 | (() =>\\n ... les\\n)() | +| tst.js:29:3:29:3 | x | tst.js:28:2:29:3 | return of anonymous function | | tst.js:32:1:32:0 | x | tst.js:33:10:33:10 | x | | tst.js:32:1:34:1 | functio ... ables\\n} | tst.js:32:10:32:10 | g | | tst.js:32:10:32:10 | g | tst.js:35:1:35:1 | g | | tst.js:32:10:32:10 | g | tst.js:60:1:60:1 | g | | tst.js:32:10:32:10 | g | tst.js:62:4:62:4 | g | +| tst.js:33:10:33:10 | x | tst.js:32:1:34:1 | return of function g | | tst.js:37:5:42:1 | o | tst.js:43:1:43:1 | o | | tst.js:37:5:42:1 | o | tst.js:44:1:44:1 | o | | tst.js:37:5:42:1 | o | tst.js:61:3:61:3 | o | @@ -142,6 +148,7 @@ | tst.js:90:15:90:15 | o | tst.js:90:4:90:11 | { r: z } | | tst.js:90:15:90:15 | o | tst.js:90:4:90:15 | { r: z } = o | | tst.js:91:10:91:18 | x + y + z | tst.js:87:1:96:2 | (functi ... r: 0\\n}) | +| tst.js:91:10:91:18 | x + y + z | tst.js:87:2:92:1 | return of anonymous function | | tst.js:92:4:96:1 | {\\n p: ... r: 0\\n} | tst.js:87:11:87:24 | { p: x, ...o } | | tst.js:98:2:103:1 | functio ... + z;\\n} | tst.js:98:1:103:2 | (functi ... + z;\\n}) | | tst.js:98:11:98:24 | rest | tst.js:99:15:99:18 | rest | @@ -156,6 +163,7 @@ | tst.js:101:13:101:16 | rest | tst.js:101:3:101:9 | [ , z ] | | tst.js:101:13:101:16 | rest | tst.js:101:3:101:16 | [ , z ] = rest | | tst.js:102:10:102:18 | x + y + z | tst.js:98:1:103:17 | (functi ... 3, 0 ]) | +| tst.js:102:10:102:18 | x + y + z | tst.js:98:2:103:1 | return of anonymous function | | tst.js:103:4:103:16 | [ 19, 23, 0 ] | tst.js:98:11:98:24 | [ x, ...rest ] | | tst.js:105:1:105:1 | x | tst.js:105:1:105:6 | x ?? y | | tst.js:105:6:105:6 | y | tst.js:105:1:105:6 | x ?? y | diff --git a/javascript/ql/test/library-tests/DataFlow/sources.expected b/javascript/ql/test/library-tests/DataFlow/sources.expected index 0578eb1777d..021ebc9ae3d 100644 --- a/javascript/ql/test/library-tests/DataFlow/sources.expected +++ b/javascript/ql/test/library-tests/DataFlow/sources.expected @@ -2,8 +2,10 @@ | arguments.js:1:1:12:4 | (functi ... );\\n})() | | arguments.js:1:2:1:1 | this | | arguments.js:1:2:12:1 | functio ... , 3);\\n} | +| arguments.js:1:2:12:1 | return of anonymous function | | arguments.js:2:5:2:4 | this | | arguments.js:2:5:10:5 | functio ... ;\\n } | +| arguments.js:2:5:10:5 | return of function f | | arguments.js:2:16:2:16 | x | | arguments.js:4:28:4:39 | arguments[0] | | arguments.js:5:25:5:36 | arguments[1] | @@ -14,20 +16,24 @@ | eval.js:1:1:1:0 | this | | eval.js:1:1:1:0 | this | | eval.js:1:1:5:1 | functio ... eval`\\n} | +| eval.js:1:1:5:1 | return of function k | | eval.js:3:3:3:6 | eval | | eval.js:3:3:3:16 | eval("x = 23") | | file://:0:0:0:0 | global access path | | sources.js:1:1:1:0 | this | | sources.js:1:1:1:12 | new (x => x) | | sources.js:1:6:1:6 | x | +| sources.js:1:6:1:11 | return of anonymous function | | sources.js:1:6:1:11 | x => x | | sources.js:3:1:5:6 | (functi ... \\n})(23) | | sources.js:3:2:3:1 | this | | sources.js:3:2:5:1 | functio ... x+19;\\n} | +| sources.js:3:2:5:1 | return of anonymous function | | sources.js:3:11:3:11 | x | | sources.js:7:1:7:3 | /x/ | | sources.js:9:1:9:0 | this | | sources.js:9:1:12:1 | functio ... ey; }\\n} | +| sources.js:9:1:12:1 | return of function foo | | sources.js:9:14:9:18 | array | | sources.js:10:12:10:14 | key | | sources.js:11:12:11:18 | { key } | @@ -36,12 +42,14 @@ | tst2.ts:3:3:3:8 | setX() | | tst2.ts:7:1:7:0 | this | | tst2.ts:7:1:9:1 | functio ... = 23;\\n} | +| tst2.ts:7:1:9:1 | return of function setX | | tst2.ts:8:3:8:5 | A.x | | tst2.ts:11:11:11:13 | A.x | | tst2.ts:13:1:13:40 | class S ... ing> {} | | tst2.ts:13:26:13:29 | List | | tst2.ts:13:39:13:38 | (...arg ... rgs); } | | tst2.ts:13:39:13:38 | args | +| tst2.ts:13:39:13:38 | return of default constructor of class StringList | | tst2.ts:13:39:13:38 | super(...args) | | tst2.ts:13:39:13:38 | this | | tst.js:1:1:1:0 | this | @@ -50,6 +58,7 @@ | tst.js:16:1:20:9 | (functi ... ("arg") | | tst.js:16:2:16:1 | this | | tst.js:16:2:20:1 | functio ... n "";\\n} | +| tst.js:16:2:20:1 | return of function f | | tst.js:16:13:16:13 | a | | tst.js:17:7:17:10 | Math | | tst.js:17:7:17:17 | Math.random | @@ -57,13 +66,16 @@ | tst.js:22:7:22:18 | readFileSync | | tst.js:28:1:30:3 | (() =>\\n ... les\\n)() | | tst.js:28:2:29:3 | () =>\\n x | +| tst.js:28:2:29:3 | return of anonymous function | | tst.js:32:1:32:0 | this | | tst.js:32:1:34:1 | functio ... ables\\n} | +| tst.js:32:1:34:1 | return of function g | | tst.js:32:12:32:12 | b | | tst.js:35:1:35:7 | g(true) | | tst.js:37:9:42:1 | {\\n x: ... ;\\n }\\n} | | tst.js:39:4:39:3 | this | | tst.js:39:4:41:3 | () {\\n this;\\n } | +| tst.js:39:4:41:3 | return of method m | | tst.js:43:1:43:3 | o.x | | tst.js:44:1:44:3 | o.m | | tst.js:44:1:44:5 | o.m() | @@ -73,6 +85,7 @@ | tst.js:49:17:49:17 | B | | tst.js:50:14:50:13 | this | | tst.js:50:14:53:3 | () {\\n ... et`\\n } | +| tst.js:50:14:53:3 | return of constructor of class A | | tst.js:51:5:51:13 | super(42) | | tst.js:58:1:58:3 | tag | | tst.js:61:1:61:5 | ::o.m | @@ -80,6 +93,7 @@ | tst.js:62:1:62:4 | o::g | | tst.js:64:1:64:0 | this | | tst.js:64:1:67:1 | functio ... lysed\\n} | +| tst.js:64:1:67:1 | return of function h | | tst.js:65:3:65:10 | yield 42 | | tst.js:66:13:66:25 | function.sent | | tst.js:68:12:68:14 | h() | @@ -87,6 +101,7 @@ | tst.js:69:1:69:13 | iter.next(23) | | tst.js:71:1:71:0 | this | | tst.js:71:1:73:1 | async f ... lysed\\n} | +| tst.js:71:1:73:1 | return of function k | | tst.js:72:3:72:11 | await p() | | tst.js:72:9:72:9 | p | | tst.js:72:9:72:11 | p() | @@ -97,6 +112,7 @@ | tst.js:87:1:96:2 | (functi ... r: 0\\n}) | | tst.js:87:2:87:1 | this | | tst.js:87:2:92:1 | functio ... + z;\\n} | +| tst.js:87:2:92:1 | return of anonymous function | | tst.js:87:11:87:24 | { p: x, ...o } | | tst.js:87:13:87:16 | p: x | | tst.js:87:22:87:22 | ...o | @@ -106,6 +122,7 @@ | tst.js:98:1:103:17 | (functi ... 3, 0 ]) | | tst.js:98:2:98:1 | this | | tst.js:98:2:103:1 | functio ... + z;\\n} | +| tst.js:98:2:103:1 | return of anonymous function | | tst.js:98:11:98:24 | [ x, ...rest ] | | tst.js:98:13:98:13 | x | | tst.js:98:19:98:22 | ...rest | @@ -114,6 +131,7 @@ | tst.js:103:4:103:16 | [ 19, 23, 0 ] | | tst.js:107:2:107:1 | this | | tst.js:107:2:113:1 | functio ... v2c;\\n} | +| tst.js:107:2:113:1 | return of anonymous function | | tst.js:108:7:108:9 | v1a | | tst.js:108:12:108:20 | v1b = o1b | | tst.js:108:18:108:20 | o1b | diff --git a/javascript/ql/test/library-tests/Files/binary.js b/javascript/ql/test/library-tests/Files/binary.js new file mode 100644 index 00000000000..4e5fef36bd3 Binary files /dev/null and b/javascript/ql/test/library-tests/Files/binary.js differ diff --git a/javascript/ql/test/library-tests/InterProceduralFlow/DataFlow.expected b/javascript/ql/test/library-tests/InterProceduralFlow/DataFlow.expected index a7f2e7ce5af..664e6fada2b 100644 --- a/javascript/ql/test/library-tests/InterProceduralFlow/DataFlow.expected +++ b/javascript/ql/test/library-tests/InterProceduralFlow/DataFlow.expected @@ -1,6 +1,15 @@ | a.js:1:15:1:23 | "tainted" | b.js:4:13:4:40 | whoKnow ... Tainted | | a.js:1:15:1:23 | "tainted" | b.js:6:13:6:13 | x | | a.js:2:15:2:28 | "also tainted" | b.js:5:13:5:29 | notTaintedTrustMe | +| async.js:2:16:2:23 | "source" | async.js:8:15:8:27 | await async() | +| async.js:2:16:2:23 | "source" | async.js:13:15:13:20 | sync() | +| async.js:2:16:2:23 | "source" | async.js:27:17:27:17 | e | +| async.js:2:16:2:23 | "source" | async.js:36:17:36:17 | e | +| async.js:2:16:2:23 | "source" | async.js:41:17:41:17 | e | +| async.js:2:16:2:23 | "source" | async.js:54:17:54:36 | unpack(pack(source)) | +| async.js:79:16:79:23 | "source" | async.js:80:14:80:36 | (await ... ce))).p | +| async.js:79:16:79:23 | "source" | async.js:92:15:92:30 | await (getP(o3)) | +| async.js:96:18:96:25 | "source" | async.js:101:15:101:27 | await readP() | | callback.js:16:14:16:21 | "source" | callback.js:13:14:13:14 | x | | callback.js:17:15:17:23 | "source2" | callback.js:13:14:13:14 | x | | callback.js:27:15:27:23 | "source3" | callback.js:13:14:13:14 | x | diff --git a/javascript/ql/test/library-tests/InterProceduralFlow/DataFlowConfig.qll b/javascript/ql/test/library-tests/InterProceduralFlow/DataFlowConfig.qll index 7e779496175..eccf834a0e1 100644 --- a/javascript/ql/test/library-tests/InterProceduralFlow/DataFlowConfig.qll +++ b/javascript/ql/test/library-tests/InterProceduralFlow/DataFlowConfig.qll @@ -17,14 +17,15 @@ class TestDataFlowConfiguration extends DataFlow::Configuration { ) } + override predicate isBarrier(DataFlow::Node node) { + exists(Function f | + f.getName().matches("%noReturnTracking%") and + node = f.getAReturnedExpr().flow() + ) + } + override predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node snk) { src = src and snk.asExpr().(PropAccess).getPropertyName() = "notTracked" - or - exists(Function f | - f.getName().matches("%noReturnTracking%") and - src = f.getAReturnedExpr().flow() and - snk.(DataFlow::InvokeNode).getACallee() = f - ) } } diff --git a/javascript/ql/test/library-tests/InterProceduralFlow/GermanFlow.expected b/javascript/ql/test/library-tests/InterProceduralFlow/GermanFlow.expected index 112ec0ba9fa..f5f0614794f 100644 --- a/javascript/ql/test/library-tests/InterProceduralFlow/GermanFlow.expected +++ b/javascript/ql/test/library-tests/InterProceduralFlow/GermanFlow.expected @@ -1,6 +1,15 @@ | a.js:1:15:1:23 | "tainted" | b.js:4:13:4:40 | whoKnow ... Tainted | | a.js:1:15:1:23 | "tainted" | b.js:6:13:6:13 | x | | a.js:2:15:2:28 | "also tainted" | b.js:5:13:5:29 | notTaintedTrustMe | +| async.js:2:16:2:23 | "source" | async.js:8:15:8:27 | await async() | +| async.js:2:16:2:23 | "source" | async.js:13:15:13:20 | sync() | +| async.js:2:16:2:23 | "source" | async.js:27:17:27:17 | e | +| async.js:2:16:2:23 | "source" | async.js:36:17:36:17 | e | +| async.js:2:16:2:23 | "source" | async.js:41:17:41:17 | e | +| async.js:2:16:2:23 | "source" | async.js:54:17:54:36 | unpack(pack(source)) | +| async.js:79:16:79:23 | "source" | async.js:80:14:80:36 | (await ... ce))).p | +| async.js:79:16:79:23 | "source" | async.js:92:15:92:30 | await (getP(o3)) | +| async.js:96:18:96:25 | "source" | async.js:101:15:101:27 | await readP() | | callback.js:16:14:16:21 | "source" | callback.js:13:14:13:14 | x | | callback.js:17:15:17:23 | "source2" | callback.js:13:14:13:14 | x | | callback.js:27:15:27:23 | "source3" | callback.js:13:14:13:14 | x | diff --git a/javascript/ql/test/library-tests/InterProceduralFlow/TaintTracking.expected b/javascript/ql/test/library-tests/InterProceduralFlow/TaintTracking.expected index b6122f6e2a4..eeac69e3af1 100644 --- a/javascript/ql/test/library-tests/InterProceduralFlow/TaintTracking.expected +++ b/javascript/ql/test/library-tests/InterProceduralFlow/TaintTracking.expected @@ -1,6 +1,17 @@ | a.js:1:15:1:23 | "tainted" | b.js:4:13:4:40 | whoKnow ... Tainted | | a.js:1:15:1:23 | "tainted" | b.js:6:13:6:13 | x | | a.js:2:15:2:28 | "also tainted" | b.js:5:13:5:29 | notTaintedTrustMe | +| async.js:2:16:2:23 | "source" | async.js:7:14:7:20 | async() | +| async.js:2:16:2:23 | "source" | async.js:8:15:8:27 | await async() | +| async.js:2:16:2:23 | "source" | async.js:13:15:13:20 | sync() | +| async.js:2:16:2:23 | "source" | async.js:14:15:14:26 | await sync() | +| async.js:2:16:2:23 | "source" | async.js:27:17:27:17 | e | +| async.js:2:16:2:23 | "source" | async.js:36:17:36:17 | e | +| async.js:2:16:2:23 | "source" | async.js:41:17:41:17 | e | +| async.js:2:16:2:23 | "source" | async.js:54:17:54:36 | unpack(pack(source)) | +| async.js:79:16:79:23 | "source" | async.js:80:14:80:36 | (await ... ce))).p | +| async.js:79:16:79:23 | "source" | async.js:92:15:92:30 | await (getP(o3)) | +| async.js:96:18:96:25 | "source" | async.js:101:15:101:27 | await readP() | | callback.js:16:14:16:21 | "source" | callback.js:13:14:13:14 | x | | callback.js:17:15:17:23 | "source2" | callback.js:13:14:13:14 | x | | callback.js:27:15:27:23 | "source3" | callback.js:13:14:13:14 | x | diff --git a/javascript/ql/test/library-tests/InterProceduralFlow/TaintTracking.ql b/javascript/ql/test/library-tests/InterProceduralFlow/TaintTracking.ql index f9b1a552086..ae4ab0cd6a5 100644 --- a/javascript/ql/test/library-tests/InterProceduralFlow/TaintTracking.ql +++ b/javascript/ql/test/library-tests/InterProceduralFlow/TaintTracking.ql @@ -17,15 +17,16 @@ class TestTaintTrackingConfiguration extends TaintTracking::Configuration { ) } + override predicate isSanitizer(DataFlow::Node node) { + exists(Function f | + f.getName().matches("%noReturnTracking%") and + node = f.getAReturnedExpr().flow() + ) + } + override predicate isSanitizerEdge(DataFlow::Node src, DataFlow::Node snk) { src = src and snk.asExpr().(PropAccess).getPropertyName() = "notTracked" - or - exists(Function f | - f.getName().matches("%noReturnTracking%") and - src = f.getAReturnedExpr().flow() and - snk.(DataFlow::InvokeNode).getACallee() = f - ) } } diff --git a/javascript/ql/test/library-tests/InterProceduralFlow/TrackedNodes.expected b/javascript/ql/test/library-tests/InterProceduralFlow/TrackedNodes.expected deleted file mode 100644 index be2f5988432..00000000000 --- a/javascript/ql/test/library-tests/InterProceduralFlow/TrackedNodes.expected +++ /dev/null @@ -1,55 +0,0 @@ -| missing | callback.js:17:15:17:23 | "source2" | callback.js:8:16:8:20 | xs[i] | -| missing | callback.js:17:15:17:23 | "source2" | callback.js:12:16:12:16 | x | -| missing | callback.js:17:15:17:23 | "source2" | callback.js:12:16:12:16 | x | -| missing | callback.js:17:15:17:23 | "source2" | callback.js:13:14:13:14 | x | -| missing | promises.js:1:2:1:2 | source | promises.js:6:26:6:28 | val | -| missing | promises.js:1:2:1:2 | source | promises.js:6:26:6:28 | val | -| missing | promises.js:1:2:1:2 | source | promises.js:7:16:7:18 | val | -| missing | promises.js:1:2:1:2 | source | promises.js:37:11:37:11 | v | -| missing | promises.js:1:2:1:2 | source | promises.js:37:11:37:11 | v | -| missing | promises.js:1:2:1:2 | source | promises.js:38:32:38:32 | v | -| missing | promises.js:2:16:2:24 | "tainted" | promises.js:6:26:6:28 | val | -| missing | promises.js:2:16:2:24 | "tainted" | promises.js:6:26:6:28 | val | -| missing | promises.js:2:16:2:24 | "tainted" | promises.js:7:16:7:18 | val | -| missing | promises.js:2:16:2:24 | "tainted" | promises.js:37:11:37:11 | v | -| missing | promises.js:2:16:2:24 | "tainted" | promises.js:37:11:37:11 | v | -| missing | promises.js:2:16:2:24 | "tainted" | promises.js:38:32:38:32 | v | -| missing | promises.js:10:30:17:3 | exceptional return of anonymous function | promises.js:20:7:20:7 | v | -| missing | promises.js:10:30:17:3 | exceptional return of anonymous function | promises.js:20:7:20:7 | v | -| missing | promises.js:10:30:17:3 | exceptional return of anonymous function | promises.js:21:20:21:20 | v | -| missing | promises.js:10:30:17:3 | exceptional return of anonymous function | promises.js:23:19:23:19 | v | -| missing | promises.js:10:30:17:3 | exceptional return of anonymous function | promises.js:23:19:23:19 | v | -| missing | promises.js:10:30:17:3 | exceptional return of anonymous function | promises.js:24:20:24:20 | v | -| missing | promises.js:11:22:11:31 | "resolved" | promises.js:18:18:18:18 | v | -| missing | promises.js:11:22:11:31 | "resolved" | promises.js:18:18:18:18 | v | -| missing | promises.js:11:22:11:31 | "resolved" | promises.js:19:20:19:20 | v | -| missing | promises.js:12:22:12:31 | "rejected" | promises.js:20:7:20:7 | v | -| missing | promises.js:12:22:12:31 | "rejected" | promises.js:20:7:20:7 | v | -| missing | promises.js:12:22:12:31 | "rejected" | promises.js:21:20:21:20 | v | -| missing | promises.js:12:22:12:31 | "rejected" | promises.js:23:19:23:19 | v | -| missing | promises.js:12:22:12:31 | "rejected" | promises.js:23:19:23:19 | v | -| missing | promises.js:12:22:12:31 | "rejected" | promises.js:24:20:24:20 | v | -| missing | promises.js:13:9:13:21 | exceptional return of Math.random() | promises.js:20:7:20:7 | v | -| missing | promises.js:13:9:13:21 | exceptional return of Math.random() | promises.js:20:7:20:7 | v | -| missing | promises.js:13:9:13:21 | exceptional return of Math.random() | promises.js:21:20:21:20 | v | -| missing | promises.js:13:9:13:21 | exceptional return of Math.random() | promises.js:23:19:23:19 | v | -| missing | promises.js:13:9:13:21 | exceptional return of Math.random() | promises.js:23:19:23:19 | v | -| missing | promises.js:13:9:13:21 | exceptional return of Math.random() | promises.js:24:20:24:20 | v | -| missing | promises.js:14:7:14:21 | exceptional return of res(res_source) | promises.js:20:7:20:7 | v | -| missing | promises.js:14:7:14:21 | exceptional return of res(res_source) | promises.js:20:7:20:7 | v | -| missing | promises.js:14:7:14:21 | exceptional return of res(res_source) | promises.js:21:20:21:20 | v | -| missing | promises.js:14:7:14:21 | exceptional return of res(res_source) | promises.js:23:19:23:19 | v | -| missing | promises.js:14:7:14:21 | exceptional return of res(res_source) | promises.js:23:19:23:19 | v | -| missing | promises.js:14:7:14:21 | exceptional return of res(res_source) | promises.js:24:20:24:20 | v | -| missing | promises.js:16:7:16:21 | exceptional return of rej(rej_source) | promises.js:20:7:20:7 | v | -| missing | promises.js:16:7:16:21 | exceptional return of rej(rej_source) | promises.js:20:7:20:7 | v | -| missing | promises.js:16:7:16:21 | exceptional return of rej(rej_source) | promises.js:21:20:21:20 | v | -| missing | promises.js:16:7:16:21 | exceptional return of rej(rej_source) | promises.js:23:19:23:19 | v | -| missing | promises.js:16:7:16:21 | exceptional return of rej(rej_source) | promises.js:23:19:23:19 | v | -| missing | promises.js:16:7:16:21 | exceptional return of rej(rej_source) | promises.js:24:20:24:20 | v | -| missing | promises.js:32:24:32:37 | "also tainted" | promises.js:37:11:37:11 | v | -| missing | promises.js:32:24:32:37 | "also tainted" | promises.js:37:11:37:11 | v | -| missing | promises.js:32:24:32:37 | "also tainted" | promises.js:38:32:38:32 | v | -| missing | tst.js:2:17:2:22 | "src1" | tst.js:27:22:27:24 | elt | -| missing | tst.js:2:17:2:22 | "src1" | tst.js:27:22:27:24 | elt | -| missing | tst.js:2:17:2:22 | "src1" | tst.js:28:20:28:22 | elt | diff --git a/javascript/ql/test/library-tests/InterProceduralFlow/TrackedNodes.ql b/javascript/ql/test/library-tests/InterProceduralFlow/TrackedNodes.ql deleted file mode 100644 index 2e0df0437bc..00000000000 --- a/javascript/ql/test/library-tests/InterProceduralFlow/TrackedNodes.ql +++ /dev/null @@ -1,31 +0,0 @@ -import javascript - -/** - * Track all nodes that do not have flow predecessors. - */ -class TrackAllSources extends DataFlow::TrackedNode { - TrackAllSources() { not exists(getAPredecessor()) } -} - -/** - * A data flow configuration that emulates the flow tracking done by - * `DataFlow::TrackedNode`. - */ -class AllSourcesTrackingConfig extends DataFlow::Configuration { - AllSourcesTrackingConfig() { this = "TrackAllTrackedNodes" } - - override predicate isSource(DataFlow::Node src) { src instanceof DataFlow::TrackedNode } - - override predicate isSink(DataFlow::Node snk) { any() } -} - -from DataFlow::Node source, DataFlow::Node sink, AllSourcesTrackingConfig cfg, string problem -where - cfg.hasFlow(source, sink) and - not source.(DataFlow::TrackedNode).flowsTo(sink) and - problem = "missing" - or - not cfg.hasFlow(source, sink) and - source.(DataFlow::TrackedNode).flowsTo(sink) and - problem = "spurious" -select problem, source, sink diff --git a/javascript/ql/test/library-tests/InterProceduralFlow/async.js b/javascript/ql/test/library-tests/InterProceduralFlow/async.js new file mode 100644 index 00000000000..f91cda9cea8 --- /dev/null +++ b/javascript/ql/test/library-tests/InterProceduralFlow/async.js @@ -0,0 +1,102 @@ +(async function () { + let source = "source"; + + async function async() { + return source; + } + let sink = async(); // OK - wrapped in a promise. (NOT OK for taint-tracking configs) + let sink2 = await async(); // NOT OK + + function sync() { + return source; + } + let sink3 = sync(); // NOT OK + let sink4 = await sync(); // OK + + async function throwsAsync() { + throw source; + } + try { + throwsAsync(); + } catch (e) { + let sink5 = e; // OK - throwsAsync just returns a promise. + } + try { + await throwsAsync(); + } catch (e) { + let sink6 = e; // NOT OK + } + + function throws() { + throw source; + } + try { + throws(); + } catch (e) { + let sink5 = e; // NOT OK + } + try { + await throws(); + } catch (e) { + let sink6 = e; // NOT OK + } + + function syncTest() { + function pack(x) { + return { + x: x + } + }; + function unpack(x) { + return x.x; + } + + var sink7 = unpack(pack(source)); // NOT OK + } + + function asyncTest() { + async function pack(x) { + return { + x: x + } + }; + function unpack(x) { + return x.x; + } + + var sink8 = unpack(pack(source)); // OK + let sink9 = unpack(await (pack(source))); // NOT OK - but not found + } +})(); + +async function props() { + async function foo(x) { + return { + p: x + }; + } + + let source = "source"; + let sink = (await (foo(source))).p; // NOT OK - this requires the immidiatly awaited storeStep. + let sink2 = foo("not a source").p; + + async function getP(base) { + return base.p; + } + + async function getQ(base) { + return base.q; + } + + let o3 = { p: source }; + let sink6 = await (getP(o3)); // NOT OK - this requires the immidiatly awaited loadStep + let sink7 = await (getQ(o3)); + + async function readP() { + let source = "source"; + var obj = {x: source}; + return obj.x; + } + + let sink8 = await readP(); // NOT OK +} diff --git a/javascript/ql/test/library-tests/NodeJS/Require.expected b/javascript/ql/test/library-tests/NodeJS/Require.expected index cdbead07939..364ad6fe99d 100644 --- a/javascript/ql/test/library-tests/NodeJS/Require.expected +++ b/javascript/ql/test/library-tests/NodeJS/Require.expected @@ -15,6 +15,7 @@ | g.js:1:43:1:61 | require("electron") | | index.js:1:12:1:26 | require('path') | | index.js:2:1:2:41 | require ... b.js")) | +| mjs-files/createRequire.mjs:4:26:4:49 | require ... erver') | | mjs-files/require-from-js.js:1:12:1:36 | require ... on-me') | | mjs-files/require-from-js.js:2:12:2:39 | require ... me.js') | | mjs-files/require-from-js.js:3:12:3:40 | require ... e.mjs') | diff --git a/javascript/ql/test/library-tests/NodeJS/mjs-files/createRequire.mjs b/javascript/ql/test/library-tests/NodeJS/mjs-files/createRequire.mjs new file mode 100644 index 00000000000..c2cb24f319c --- /dev/null +++ b/javascript/ql/test/library-tests/NodeJS/mjs-files/createRequire.mjs @@ -0,0 +1,4 @@ +import { createRequire } from 'module'; + +const require = createRequire(import.meta.url); +const { ApolloServer } = require('apollo-server'); diff --git a/javascript/ql/test/library-tests/Promises/AdditionalPromises.expected b/javascript/ql/test/library-tests/Promises/AdditionalPromises.expected index b3bf0e22e06..d6845266dcc 100644 --- a/javascript/ql/test/library-tests/Promises/AdditionalPromises.expected +++ b/javascript/ql/test/library-tests/Promises/AdditionalPromises.expected @@ -40,7 +40,7 @@ | flow.js:86:23:86:70 | new Pro ... ource)) | | flow.js:89:3:89:27 | ("foo", ... => {}) | | flow.js:91:21:91:68 | new Pro ... ource)) | -| flow.js:100:28:100:75 | new Pro ... ource)) | +| flow.js:100:34:100:81 | new Pro ... ource)) | | flow.js:103:2:103:48 | new Pro ... "BLA")) | | flow.js:103:2:103:76 | new Pro ... ource}) | | flow.js:105:2:105:48 | new Pro ... "BLA")) | diff --git a/javascript/ql/test/library-tests/Promises/flow.js b/javascript/ql/test/library-tests/Promises/flow.js index d92ef541b85..ff8040f6e9d 100644 --- a/javascript/ql/test/library-tests/Promises/flow.js +++ b/javascript/ql/test/library-tests/Promises/flow.js @@ -97,7 +97,7 @@ return e; } } - var foo = returnsRejected(new Promise((resolve, reject) => reject(source))); + var foo = await returnsRejected(new Promise((resolve, reject) => reject(source))); sink(foo); // NOT OK! new Promise((resolve, reject) => reject("BLA")).catch(x => {return source}).then(x => sink(x)); // NOT OK @@ -129,4 +129,29 @@ new Promise((resolve, reject) => resolve(resolved)).then(x => sink(x)); // NOT OK Promise.resolve(resolved).then(x => sink(x)); // NOT OK -})(); \ No newline at end of file +})(); + + +(async function () { + var source = "source"; + + async function async() { + return source; + } + sink(async()); // OK - wrapped in a promise. (NOT OK for taint-tracking configs) + sink(await async()); // NOT OK + + async function throwsAsync() { + throw source; + } + try { + throwsAsync(); + } catch (e) { + sink(e); // OK - throwsAsync just returns a promise. + } + try { + await throwsAsync(); + } catch (e) { + sink(e); // NOT OK + } + })(); \ No newline at end of file diff --git a/javascript/ql/test/library-tests/Promises/tests.expected b/javascript/ql/test/library-tests/Promises/tests.expected index 5cb2d0b2a24..69e3fd093bc 100644 --- a/javascript/ql/test/library-tests/Promises/tests.expected +++ b/javascript/ql/test/library-tests/Promises/tests.expected @@ -61,7 +61,7 @@ test_PromiseDefinition_getExecutor | flow.js:74:10:74:57 | new Pro ... ource)) | flow.js:74:22:74:56 | (resolv ... source) | | flow.js:86:23:86:70 | new Pro ... ource)) | flow.js:86:35:86:69 | (resolv ... source) | | flow.js:91:21:91:68 | new Pro ... ource)) | flow.js:91:33:91:67 | (resolv ... source) | -| flow.js:100:28:100:75 | new Pro ... ource)) | flow.js:100:40:100:74 | (resolv ... source) | +| flow.js:100:34:100:81 | new Pro ... ource)) | flow.js:100:46:100:80 | (resolv ... source) | | flow.js:103:2:103:48 | new Pro ... "BLA")) | flow.js:103:14:103:47 | (resolv ... ("BLA") | | flow.js:105:2:105:48 | new Pro ... "BLA")) | flow.js:105:14:105:47 | (resolv ... ("BLA") | | flow.js:107:17:107:64 | new Pro ... ource)) | flow.js:107:29:107:63 | (resolv ... source) | @@ -96,7 +96,7 @@ test_PromiseDefinition | flow.js:74:10:74:57 | new Pro ... ource)) | | flow.js:86:23:86:70 | new Pro ... ource)) | | flow.js:91:21:91:68 | new Pro ... ource)) | -| flow.js:100:28:100:75 | new Pro ... ource)) | +| flow.js:100:34:100:81 | new Pro ... ource)) | | flow.js:103:2:103:48 | new Pro ... "BLA")) | | flow.js:105:2:105:48 | new Pro ... "BLA")) | | flow.js:107:17:107:64 | new Pro ... ource)) | @@ -143,7 +143,7 @@ test_PromiseDefinition_getRejectParameter | flow.js:74:10:74:57 | new Pro ... ource)) | flow.js:74:32:74:37 | reject | | flow.js:86:23:86:70 | new Pro ... ource)) | flow.js:86:45:86:50 | reject | | flow.js:91:21:91:68 | new Pro ... ource)) | flow.js:91:43:91:48 | reject | -| flow.js:100:28:100:75 | new Pro ... ource)) | flow.js:100:50:100:55 | reject | +| flow.js:100:34:100:81 | new Pro ... ource)) | flow.js:100:56:100:61 | reject | | flow.js:103:2:103:48 | new Pro ... "BLA")) | flow.js:103:24:103:29 | reject | | flow.js:105:2:105:48 | new Pro ... "BLA")) | flow.js:105:24:105:29 | reject | | flow.js:107:17:107:64 | new Pro ... ource)) | flow.js:107:39:107:44 | reject | @@ -173,7 +173,7 @@ test_PromiseDefinition_getResolveParameter | flow.js:74:10:74:57 | new Pro ... ource)) | flow.js:74:23:74:29 | resolve | | flow.js:86:23:86:70 | new Pro ... ource)) | flow.js:86:36:86:42 | resolve | | flow.js:91:21:91:68 | new Pro ... ource)) | flow.js:91:34:91:40 | resolve | -| flow.js:100:28:100:75 | new Pro ... ource)) | flow.js:100:41:100:47 | resolve | +| flow.js:100:34:100:81 | new Pro ... ource)) | flow.js:100:47:100:53 | resolve | | flow.js:103:2:103:48 | new Pro ... "BLA")) | flow.js:103:15:103:21 | resolve | | flow.js:105:2:105:48 | new Pro ... "BLA")) | flow.js:105:15:105:21 | resolve | | flow.js:107:17:107:64 | new Pro ... ource)) | flow.js:107:30:107:36 | resolve | @@ -232,8 +232,11 @@ flow | flow.js:2:15:2:22 | "source" | flow.js:125:59:125:59 | x | | flow.js:2:15:2:22 | "source" | flow.js:129:69:129:69 | x | | flow.js:2:15:2:22 | "source" | flow.js:131:43:131:43 | x | +| flow.js:136:15:136:22 | "source" | flow.js:142:7:142:19 | await async() | +| flow.js:136:15:136:22 | "source" | flow.js:155:9:155:9 | e | exclusiveTaintFlow | flow2.js:2:15:2:22 | "source" | flow2.js:20:7:20:14 | tainted3 | +| flow.js:136:15:136:22 | "source" | flow.js:141:7:141:13 | async() | | interflow.js:3:18:3:25 | "source" | interflow.js:18:10:18:14 | error | typetrack | flow2.js:4:2:4:31 | Promise ... lean"]) | flow2.js:4:14:4:30 | [source, "clean"] | copy $PromiseResolveField$ | @@ -320,6 +323,7 @@ typetrack | flow.js:89:3:89:47 | ("foo", ... ink(e)) | flow.js:89:3:89:27 | ("foo", ... => {}) | copy $PromiseResolveField$ | | flow.js:89:3:89:47 | ("foo", ... ink(e)) | flow.js:89:40:89:46 | sink(e) | copy $PromiseResolveField$ | | flow.js:89:3:89:47 | ("foo", ... ink(e)) | flow.js:89:40:89:46 | sink(e) | store $PromiseResolveField$ | +| flow.js:100:12:100:82 | await r ... urce))) | flow.js:100:18:100:82 | returns ... urce))) | load $PromiseResolveField$ | | flow.js:103:2:103:76 | new Pro ... ource}) | flow.js:103:2:103:48 | new Pro ... "BLA")) | copy $PromiseResolveField$ | | flow.js:103:2:103:95 | new Pro ... ink(x)) | flow.js:103:88:103:94 | sink(x) | copy $PromiseResolveField$ | | flow.js:103:2:103:95 | new Pro ... ink(x)) | flow.js:103:88:103:94 | sink(x) | store $PromiseResolveField$ | @@ -370,6 +374,8 @@ typetrack | flow.js:131:2:131:45 | Promise ... ink(x)) | flow.js:131:38:131:44 | sink(x) | copy $PromiseResolveField$ | | flow.js:131:2:131:45 | Promise ... ink(x)) | flow.js:131:38:131:44 | sink(x) | store $PromiseResolveField$ | | flow.js:131:33:131:33 | x | flow.js:131:2:131:26 | Promise ... solved) | load $PromiseResolveField$ | +| flow.js:142:7:142:19 | await async() | flow.js:142:13:142:19 | async() | load $PromiseResolveField$ | +| flow.js:153:4:153:22 | await throwsAsync() | flow.js:153:10:153:22 | throwsAsync() | load $PromiseResolveField$ | | interflow.js:6:3:9:23 | loadScr ... eError) | interflow.js:6:3:8:26 | loadScr ... () { }) | copy $PromiseResolveField$ | | promises.js:33:19:35:6 | new Pro ... \\n }) | promises.js:34:17:34:22 | source | copy $PromiseResolveField$ | | promises.js:33:19:35:6 | new Pro ... \\n }) | promises.js:34:17:34:22 | source | store $PromiseResolveField$ | diff --git a/javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected b/javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected index fbd825771a0..e1383b7f9da 100644 --- a/javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected +++ b/javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected @@ -52,7 +52,6 @@ typeInferenceMismatch | exceptions.js:21:17:21:24 | source() | exceptions.js:24:10:24:21 | e.toString() | | exceptions.js:21:17:21:24 | source() | exceptions.js:25:10:25:18 | e.message | | exceptions.js:21:17:21:24 | source() | exceptions.js:26:10:26:19 | e.fileName | -| exceptions.js:48:16:48:23 | source() | exceptions.js:50:10:50:10 | e | | exceptions.js:59:24:59:31 | source() | exceptions.js:61:12:61:12 | e | | exceptions.js:88:6:88:13 | source() | exceptions.js:11:10:11:10 | e | | exceptions.js:88:6:88:13 | source() | exceptions.js:32:10:32:10 | e | diff --git a/javascript/ql/test/library-tests/TaintTracking/DataFlowTracking.expected b/javascript/ql/test/library-tests/TaintTracking/DataFlowTracking.expected index b91d63f6ad0..a304345d522 100644 --- a/javascript/ql/test/library-tests/TaintTracking/DataFlowTracking.expected +++ b/javascript/ql/test/library-tests/TaintTracking/DataFlowTracking.expected @@ -31,7 +31,6 @@ | constructor-calls.js:14:15:14:22 | source() | constructor-calls.js:17:8:17:14 | c.param | | constructor-calls.js:14:15:14:22 | source() | constructor-calls.js:25:8:25:14 | d.param | | exceptions.js:3:15:3:22 | source() | exceptions.js:5:10:5:10 | e | -| exceptions.js:48:16:48:23 | source() | exceptions.js:50:10:50:10 | e | | exceptions.js:59:24:59:31 | source() | exceptions.js:61:12:61:12 | e | | exceptions.js:88:6:88:13 | source() | exceptions.js:11:10:11:10 | e | | exceptions.js:93:11:93:18 | source() | exceptions.js:95:10:95:10 | e | diff --git a/javascript/ql/test/library-tests/TaintTracking/exceptions.js b/javascript/ql/test/library-tests/TaintTracking/exceptions.js index 98e3e7ce009..72d822be9ad 100644 --- a/javascript/ql/test/library-tests/TaintTracking/exceptions.js +++ b/javascript/ql/test/library-tests/TaintTracking/exceptions.js @@ -47,7 +47,7 @@ function test(unsafe, safe) { try { throwAsync(source()); } catch (e) { - sink(e); // OK - but flagged anyway + sink(e); // OK } throwAsync(source()).catch(e => { @@ -58,7 +58,7 @@ function test(unsafe, safe) { try { await throwAsync(source()); } catch (e) { - sink(e); // NOT OK + sink(e); // NOT OK - but not flagged } } } diff --git a/javascript/ql/test/library-tests/TypeTracking/ClassStyle.expected b/javascript/ql/test/library-tests/TypeTracking/ClassStyle.expected index 5eced0f76fd..1a04300fc3d 100644 --- a/javascript/ql/test/library-tests/TypeTracking/ClassStyle.expected +++ b/javascript/ql/test/library-tests/TypeTracking/ClassStyle.expected @@ -45,8 +45,10 @@ test_DataCallback | tst.js:21:1:23:1 | functio ... ata);\\n} | | tst.js:30:26:30:27 | cb | | tst.js:33:17:33:26 | data => {} | +| tst.js:37:1:39:1 | return of function getDataCurry | | tst.js:38:10:38:19 | data => {} | | tst.js:40:32:40:45 | getDataCurry() | +| tst.js:45:1:47:1 | return of function identity | | tst.js:45:19:45:20 | cb | | tst.js:48:32:48:60 | identit ... llback) | | tst.js:51:1:51:37 | functio ... ata) {} | diff --git a/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.expected b/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.expected index 80bf90de091..53bbd6ec126 100644 --- a/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.expected +++ b/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.expected @@ -52,8 +52,10 @@ dataCallback | tst.js:21:1:23:1 | functio ... ata);\\n} | | tst.js:30:26:30:27 | cb | | tst.js:33:17:33:26 | data => {} | +| tst.js:37:1:39:1 | return of function getDataCurry | | tst.js:38:10:38:19 | data => {} | | tst.js:40:32:40:45 | getDataCurry() | +| tst.js:45:1:47:1 | return of function identity | | tst.js:45:19:45:20 | cb | | tst.js:48:32:48:60 | identit ... llback) | | tst.js:51:1:51:37 | functio ... ata) {} | diff --git a/javascript/ql/test/library-tests/frameworks/Electron/BrowserObject.expected b/javascript/ql/test/library-tests/frameworks/Electron/BrowserObject.expected index b2ac72f1e1e..98b7e86d5d3 100644 --- a/javascript/ql/test/library-tests/frameworks/Electron/BrowserObject.expected +++ b/javascript/ql/test/library-tests/frameworks/Electron/BrowserObject.expected @@ -2,6 +2,7 @@ | electron.js:3:10:3:48 | new Bro ... s: {}}) | | electron.js:4:5:4:46 | bv | | electron.js:4:10:4:46 | new Bro ... s: {}}) | +| electron.js:35:1:37:1 | return of function foo | | electron.js:35:14:35:14 | x | | electron.js:35:14:35:14 | x | | electron.js:36:12:36:12 | x | diff --git a/javascript/ql/test/library-tests/frameworks/Express/tests.expected b/javascript/ql/test/library-tests/frameworks/Express/tests.expected index 4d78f507927..d54d70c0aaa 100644 --- a/javascript/ql/test/library-tests/frameworks/Express/tests.expected +++ b/javascript/ql/test/library-tests/frameworks/Express/tests.expected @@ -1867,6 +1867,7 @@ test_RouteSetup_getARouteHandler | src/advanced-routehandler-registration.js:163:1:163:33 | app.get ... t("f")) | src/advanced-routehandler-registration.js:163:14:163:32 | routesMap2.get("f") | | src/auth.js:4:1:4:53 | app.use ... d' }})) | src/auth.js:4:9:4:52 | basicAu ... rd' }}) | | src/csurf-example.js:13:1:13:20 | app.use('/api', api) | src/csurf-example.js:10:11:10:27 | createApiRouter() | +| src/csurf-example.js:13:1:13:20 | app.use('/api', api) | src/csurf-example.js:29:1:37:1 | return of function createApiRouter | | src/csurf-example.js:13:1:13:20 | app.use('/api', api) | src/csurf-example.js:30:16:30:35 | new express.Router() | | src/csurf-example.js:16:1:16:51 | app.use ... lse })) | src/csurf-example.js:16:9:16:50 | bodyPar ... alse }) | | src/csurf-example.js:17:1:17:23 | app.use ... rser()) | src/csurf-example.js:17:9:17:22 | cookieParser() | @@ -1880,6 +1881,7 @@ test_RouteSetup_getARouteHandler | src/express2.js:3:1:4:77 | router. ... sult }) | src/express2.js:4:32:4:76 | functio ... esult } | | src/express2.js:6:1:6:15 | app.use(router) | src/express2.js:2:14:2:23 | e.Router() | | src/express3.js:4:1:7:2 | app.get ... l");\\n}) | src/express3.js:4:23:7:1 | functio ... al");\\n} | +| src/express3.js:12:1:12:21 | app.use ... dler()) | src/express3.js:9:1:11:1 | return of function getHandler | | src/express3.js:12:1:12:21 | app.use ... dler()) | src/express3.js:10:12:10:32 | functio ... res){} | | src/express3.js:12:1:12:21 | app.use ... dler()) | src/express3.js:12:9:12:20 | getHandler() | | src/express4.js:4:1:9:2 | app.get ... c1);\\n}) | src/express4.js:4:23:9:1 | functio ... ic1);\\n} | @@ -1888,8 +1890,10 @@ test_RouteSetup_getARouteHandler | src/express.js:22:1:32:2 | app.pos ... r');\\n}) | src/express.js:22:30:32:1 | functio ... ar');\\n} | | src/express.js:34:1:34:53 | app.get ... andler) | src/exportedHandler.js:1:19:1:55 | functio ... res) {} | | src/express.js:34:1:34:53 | app.get ... andler) | src/express.js:34:14:34:52 | require ... handler | +| src/express.js:39:1:39:21 | app.use ... dler()) | src/express.js:36:1:38:1 | return of function getHandler | | src/express.js:39:1:39:21 | app.use ... dler()) | src/express.js:37:12:37:32 | functio ... res){} | | src/express.js:39:1:39:21 | app.use ... dler()) | src/express.js:39:9:39:20 | getHandler() | +| src/express.js:44:1:44:26 | app.use ... dler()) | src/express.js:41:1:43:1 | return of function getArrowHandler | | src/express.js:44:1:44:26 | app.use ... dler()) | src/express.js:42:12:42:28 | (req, res) => f() | | src/express.js:44:1:44:26 | app.use ... dler()) | src/express.js:44:9:44:25 | getArrowHandler() | | src/express.js:46:1:51:2 | app.pos ... me];\\n}) | src/express.js:46:22:51:1 | functio ... ame];\\n} | @@ -1908,6 +1912,7 @@ test_RouteSetup_getARouteHandler | src/routesetups.js:12:1:12:16 | root.post('', h) | src/routesetups.js:12:15:12:15 | h | | src/subrouter.js:4:1:4:26 | app.use ... rotect) | src/subrouter.js:4:19:4:25 | protect | | src/subrouter.js:5:1:5:29 | app.use ... uter()) | src/subrouter.js:5:14:5:28 | makeSubRouter() | +| src/subrouter.js:5:1:5:29 | app.use ... uter()) | src/subrouter.js:7:1:12:1 | return of function makeSubRouter | | src/subrouter.js:5:1:5:29 | app.use ... uter()) | src/subrouter.js:8:16:8:31 | express.Router() | | src/subrouter.js:9:3:9:35 | router. ... ndler1) | src/subrouter.js:9:27:9:34 | handler1 | | src/subrouter.js:10:3:10:41 | router. ... ndler2) | src/subrouter.js:10:33:10:40 | handler2 | diff --git a/javascript/ql/test/library-tests/frameworks/NodeJSLib/tests.expected b/javascript/ql/test/library-tests/frameworks/NodeJSLib/tests.expected index 8abd0287f95..6c2a02011fa 100644 --- a/javascript/ql/test/library-tests/frameworks/NodeJSLib/tests.expected +++ b/javascript/ql/test/library-tests/frameworks/NodeJSLib/tests.expected @@ -149,10 +149,12 @@ test_RouteSetup_getARouteHandler | createServer.js:4:1:4:47 | require ... => {}) | createServer.js:4:31:4:46 | (req, res) => {} | | src/http.js:4:14:10:2 | http.cr ... foo;\\n}) | src/http.js:4:32:10:1 | functio ... .foo;\\n} | | src/http.js:12:1:16:2 | http.cr ... r");\\n}) | src/http.js:12:19:16:1 | functio ... ar");\\n} | +| src/http.js:57:1:57:31 | http.cr ... dler()) | src/http.js:54:1:56:1 | return of function getHandler | | src/http.js:57:1:57:31 | http.cr ... dler()) | src/http.js:55:12:55:30 | function(req,res){} | | src/http.js:57:1:57:31 | http.cr ... dler()) | src/http.js:57:19:57:30 | getHandler() | | src/http.js:60:1:60:33 | createS ... res){}) | src/http.js:60:14:60:32 | function(req,res){} | | src/http.js:62:1:65:2 | http.cr ... 2");\\n}) | src/http.js:62:19:65:1 | functio ... r2");\\n} | +| src/http.js:70:1:70:36 | http.cr ... dler()) | src/http.js:67:1:69:1 | return of function getArrowHandler | | src/http.js:70:1:70:36 | http.cr ... dler()) | src/http.js:68:12:68:27 | (req,res) => f() | | src/http.js:70:1:70:36 | http.cr ... dler()) | src/http.js:70:19:70:35 | getArrowHandler() | | src/https.js:4:14:10:2 | https.c ... foo;\\n}) | src/https.js:4:33:10:1 | functio ... .foo;\\n} | diff --git a/javascript/ql/test/library-tests/frameworks/connect/tests.expected b/javascript/ql/test/library-tests/frameworks/connect/tests.expected index 09fc12218f9..78bfe6107e8 100644 --- a/javascript/ql/test/library-tests/frameworks/connect/tests.expected +++ b/javascript/ql/test/library-tests/frameworks/connect/tests.expected @@ -47,6 +47,7 @@ test_RouteHandler_getAResponseExpr test_RouteSetup_getARouteHandler | src/test.js:6:1:9:2 | app.use ... o');\\n}) | src/test.js:6:9:9:1 | functio ... oo');\\n} | | src/test.js:12:1:12:42 | app.use ... word')) | src/test.js:12:9:12:41 | basicAu ... sword') | +| src/test.js:17:1:17:21 | app.use ... dler()) | src/test.js:14:1:16:1 | return of function getHandler | | src/test.js:17:1:17:21 | app.use ... dler()) | src/test.js:15:12:15:32 | functio ... res){} | | src/test.js:17:1:17:21 | app.use ... dler()) | src/test.js:17:9:17:20 | getHandler() | | src/test.js:19:1:19:28 | app.use ... res){}) | src/test.js:19:9:19:27 | function(req,res){} | diff --git a/javascript/ql/test/library-tests/frameworks/hapi/tests.expected b/javascript/ql/test/library-tests/frameworks/hapi/tests.expected index a0db859d774..2579ed7a75a 100644 --- a/javascript/ql/test/library-tests/frameworks/hapi/tests.expected +++ b/javascript/ql/test/library-tests/frameworks/hapi/tests.expected @@ -37,6 +37,7 @@ test_RouteSetup_getARouteHandler | src/hapi.js:12:1:15:7 | server2 ... }}) | src/hapi.js:13:14:15:5 | functio ... n\\n } | | src/hapi.js:17:1:18:2 | server2 ... dler\\n}) | src/hapi.js:17:30:18:1 | functio ... ndler\\n} | | src/hapi.js:29:1:29:20 | server2.route(route) | src/hapi.js:20:1:27:1 | functio ... oken;\\n} | +| src/hapi.js:36:1:36:38 | server2 ... ler()}) | src/hapi.js:33:1:35:1 | return of function getHandler | | src/hapi.js:36:1:36:38 | server2 ... ler()}) | src/hapi.js:34:12:34:30 | function (req, h){} | | src/hapi.js:36:1:36:38 | server2 ... ler()}) | src/hapi.js:36:25:36:36 | getHandler() | test_RouteHandler diff --git a/javascript/ql/test/tutorials/Introducing the JavaScript libraries/query15.qll b/javascript/ql/test/tutorials/Introducing the JavaScript libraries/query15.qll index e76a54e172f..0d38bcf3d0e 100644 --- a/javascript/ql/test/tutorials/Introducing the JavaScript libraries/query15.qll +++ b/javascript/ql/test/tutorials/Introducing the JavaScript libraries/query15.qll @@ -1,12 +1,15 @@ import javascript -class TrackedStringLiteral extends DataFlow::TrackedNode { - TrackedStringLiteral() { this.asExpr() instanceof ConstantString } +DataFlow::Node constantString(DataFlow::TypeTracker t) { + t.start() and + result.asExpr() instanceof ConstantString + or + exists(DataFlow::TypeTracker t2 | t = t2.smallstep(constantString(t2), result)) } query predicate test_query15(DataFlow::Node sink) { - exists(TrackedStringLiteral source, SsaExplicitDefinition def | - source.flowsTo(sink) and + exists(SsaExplicitDefinition def | + sink = constantString(DataFlow::TypeTracker::end()) and sink = DataFlow::ssaDefinitionNode(def) and def.getSourceVariable().getName().toLowerCase() = "password" | diff --git a/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll b/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll index a7dc3dd7777..751e11a9044 100644 --- a/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll +++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll @@ -1,7 +1,14 @@ private import python -private import TaintTrackingPublic private import experimental.dataflow.DataFlow private import experimental.dataflow.internal.DataFlowPrivate +private import experimental.dataflow.internal.TaintTrackingPublic + +/** + * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding + * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent + * different objects. + */ +predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() } /** * Holds if `node` should be a barrier in all global taint flow configurations @@ -10,12 +17,11 @@ private import experimental.dataflow.internal.DataFlowPrivate predicate defaultTaintBarrier(DataFlow::Node node) { none() } /** - * Holds if the additional step from `pred` to `succ` should be included in all + * Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all * global taint flow configurations. */ -predicate defaultAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { - none() - // localAdditionalTaintStep(pred, succ) - // or - // succ = pred.(DataFlow::NonLocalJumpNode).getAJumpSuccessor(false) +predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + localAdditionalTaintStep(nodeFrom, nodeTo) + or + any(AdditionalTaintStep a).step(nodeFrom, nodeTo) } diff --git a/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll b/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll index d929d6ce014..a6e42005920 100644 --- a/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll +++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll @@ -6,27 +6,52 @@ private import python private import TaintTrackingPrivate private import experimental.dataflow.DataFlow -// /** -// * Holds if taint propagates from `source` to `sink` in zero or more local -// * (intra-procedural) steps. -// */ -// predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) } -// // /** -// // * Holds if taint can flow from `e1` to `e2` in zero or more -// // * local (intra-procedural) steps. -// // */ -// // predicate localExprTaint(Expr e1, Expr e2) { -// // localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2)) -// // } -// // /** A member (property or field) that is tainted if its containing object is tainted. */ -// // abstract class TaintedMember extends AssignableMember { } -// /** -// * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local -// * (intra-procedural) step. -// */ -// predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { -// // Ordinary data flow -// DataFlow::localFlowStep(nodeFrom, nodeTo) -// or -// localAdditionalTaintStep(nodeFrom, nodeTo) -// } + +// Local taint flow and helpers +/** + * Holds if taint propagates from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) } + +/** + * Holds if taint can flow from `e1` to `e2` in zero or more local (intra-procedural) + * steps. + */ +predicate localExprTaint(Expr e1, Expr e2) { + localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2)) +} + +/** + * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // Ordinary data flow + DataFlow::localFlowStep(nodeFrom, nodeTo) + or + localAdditionalTaintStep(nodeFrom, nodeTo) +} + +// AdditionalTaintStep for global taint flow +private newtype TUnit = TMkUnit() + +/** A singleton class containing a single dummy "unit" value. */ +private class Unit extends TUnit { + /** Gets a textual representation of this element. */ + string toString() { result = "unit" } +} + +/** + * A unit class for adding additional taint steps. + * + * Extend this class to add additional taint steps that should apply to all + * taint configurations. + */ +class AdditionalTaintStep extends Unit { + /** + * Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint + * step for all configurations. + */ + abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo); +} diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRouting1.expected b/python/ql/test/experimental/dataflow/coverage/argumentRouting1.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRouting1.ql b/python/ql/test/experimental/dataflow/coverage/argumentRouting1.ql new file mode 100644 index 00000000000..79bed33d4c7 --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/argumentRouting1.ql @@ -0,0 +1,30 @@ +import experimental.dataflow.DataFlow + +/** + * A configuration to check routing of arguments through magic methods. + */ +class ArgumentRoutingConfig extends DataFlow::Configuration { + ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" } + + override predicate isSource(DataFlow::Node node) { + exists(AssignmentDefinition def | + def.getVariable() = node.(DataFlow::EssaNode).getVar() and + def.getValue().(DataFlow::DataFlowCall).getCallable().getName().matches("With\\_%") + ) and + node.(DataFlow::EssaNode).getVar().getName().matches("with\\_%") + } + + override predicate isSink(DataFlow::Node node) { + exists(CallNode call | + call.getFunction().(NameNode).getId() = "SINK1" and + node.(DataFlow::CfgNode).getNode() = call.getAnArg() + ) + } +} + +from DataFlow::Node source, DataFlow::Node sink +where + source.getLocation().getFile().getBaseName() = "classes.py" and + sink.getLocation().getFile().getBaseName() = "classes.py" and + exists(ArgumentRoutingConfig cfg | cfg.hasFlow(source, sink)) +select source, sink diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRouting2.expected b/python/ql/test/experimental/dataflow/coverage/argumentRouting2.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRouting2.ql b/python/ql/test/experimental/dataflow/coverage/argumentRouting2.ql new file mode 100644 index 00000000000..5ab8a4e7f6d --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/argumentRouting2.ql @@ -0,0 +1,26 @@ +import experimental.dataflow.DataFlow + +/** + * A configuration to check routing of arguments through magic methods. + */ +class ArgumentRoutingConfig extends DataFlow::Configuration { + ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" } + + override predicate isSource(DataFlow::Node node) { + node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg2" + } + + override predicate isSink(DataFlow::Node node) { + exists(CallNode call | + call.getFunction().(NameNode).getId() = "SINK2" and + node.(DataFlow::CfgNode).getNode() = call.getAnArg() + ) + } +} + +from DataFlow::Node source, DataFlow::Node sink +where + source.getLocation().getFile().getBaseName() = "classes.py" and + sink.getLocation().getFile().getBaseName() = "classes.py" and + exists(ArgumentRoutingConfig cfg | cfg.hasFlow(source, sink)) +select source, sink diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRouting3.expected b/python/ql/test/experimental/dataflow/coverage/argumentRouting3.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRouting3.ql b/python/ql/test/experimental/dataflow/coverage/argumentRouting3.ql new file mode 100644 index 00000000000..f29a733b2ce --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/argumentRouting3.ql @@ -0,0 +1,26 @@ +import experimental.dataflow.DataFlow + +/** + * A configuration to check routing of arguments through magic methods. + */ +class ArgumentRoutingConfig extends DataFlow::Configuration { + ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" } + + override predicate isSource(DataFlow::Node node) { + node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg3" + } + + override predicate isSink(DataFlow::Node node) { + exists(CallNode call | + call.getFunction().(NameNode).getId() = "SINK3" and + node.(DataFlow::CfgNode).getNode() = call.getAnArg() + ) + } +} + +from DataFlow::Node source, DataFlow::Node sink +where + source.getLocation().getFile().getBaseName() = "classes.py" and + sink.getLocation().getFile().getBaseName() = "classes.py" and + exists(ArgumentRoutingConfig cfg | cfg.hasFlow(source, sink)) +select source, sink diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRouting4.expected b/python/ql/test/experimental/dataflow/coverage/argumentRouting4.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRouting4.ql b/python/ql/test/experimental/dataflow/coverage/argumentRouting4.ql new file mode 100644 index 00000000000..ad9da22fb24 --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/argumentRouting4.ql @@ -0,0 +1,26 @@ +import experimental.dataflow.DataFlow + +/** + * A configuration to check routing of arguments through magic methods. + */ +class ArgumentRoutingConfig extends DataFlow::Configuration { + ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" } + + override predicate isSource(DataFlow::Node node) { + node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg4" + } + + override predicate isSink(DataFlow::Node node) { + exists(CallNode call | + call.getFunction().(NameNode).getId() = "SINK4" and + node.(DataFlow::CfgNode).getNode() = call.getAnArg() + ) + } +} + +from DataFlow::Node source, DataFlow::Node sink +where + source.getLocation().getFile().getBaseName() = "classes.py" and + sink.getLocation().getFile().getBaseName() = "classes.py" and + exists(ArgumentRoutingConfig cfg | cfg.hasFlow(source, sink)) +select source, sink diff --git a/python/ql/test/experimental/dataflow/coverage/classes.py b/python/ql/test/experimental/dataflow/coverage/classes.py index de06d284361..74951ca5cfd 100644 --- a/python/ql/test/experimental/dataflow/coverage/classes.py +++ b/python/ql/test/experimental/dataflow/coverage/classes.py @@ -8,6 +8,18 @@ # All functions starting with "test_" should run and print `"OK"`. # This can be checked by running validTest.py. +def SINK1(x): + pass + +def SINK2(x): + pass + +def SINK3(x): + pass + +def SINK4(x): + pass + def OK(): print("OK") @@ -15,6 +27,7 @@ def OK(): class With_new: def __new__(cls): + SINK1(cls) # Flow not found OK() return super().__new__(cls) @@ -25,6 +38,7 @@ def test_new(): class With_init: def __init__(self): + SINK1(self) # Flow not found OK() def test_init(): @@ -34,6 +48,7 @@ def test_init(): class With_del: def __del__(self): + SINK1(self) # Flow not found OK() def test_del(): @@ -44,6 +59,7 @@ def test_del(): class With_repr: def __repr__(self): + SINK1(self) # Flow not found OK() return "With_repr()" @@ -55,6 +71,7 @@ def test_repr(): class With_str: def __str__(self): + SINK1(self) # Flow not found OK() return "Awesome" @@ -66,6 +83,7 @@ def test_str(): class With_bytes: def __bytes__(self): + SINK1(self) # Flow not found OK() return b"Awesome" @@ -77,12 +95,15 @@ def test_bytes(): class With_format: def __format__(self, format_spec): + SINK2(format_spec) # Flow not found + SINK1(self) # Flow not found OK() return "Awesome" def test_format(): with_format = With_format() - format(with_format) + arg2 = "" + format(with_format, arg2) def test_format_str(): with_format = With_format() @@ -96,28 +117,36 @@ def test_format_fstr(): class With_lt: def __lt__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_lt(): with_lt = With_lt() - with_lt < with_lt + arg2 = with_lt + with_lt < arg2 # object.__le__(self, other) class With_le: def __le__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_le(): with_le = With_le() - with_le <= with_le + arg2 = with_le + with_le <= arg2 # object.__eq__(self, other) class With_eq: def __eq__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return "" @@ -129,6 +158,8 @@ def test_eq(): class With_ne: def __ne__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return "" @@ -140,28 +171,35 @@ def test_ne(): class With_gt: def __gt__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_gt(): with_gt = With_gt() - with_gt > with_gt + arg2 = with_gt + with_gt > arg2 # object.__ge__(self, other) class With_ge: def __ge__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_ge(): with_ge = With_ge() - with_ge >= with_ge + arg2 = with_ge + with_ge >= arg2 # object.__hash__(self) class With_hash: def __hash__(self): + SINK1(self) # Flow not found OK() return 0 @@ -185,6 +223,7 @@ def test_hash_dict(): class With_bool: def __bool__(self): + SINK1(self) # Flow not found OK() return True @@ -202,48 +241,59 @@ def test_bool_if(): class With_getattr: def __getattr__(self, name): + SINK2(name) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_getattr(): with_getattr = With_getattr() - with_getattr.foo + with_getattr.arg2 # object.__getattribute__(self, name) class With_getattribute: def __getattribute__(self, name): + SINK2(name) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_getattribute(): with_getattribute = With_getattribute() - with_getattribute.foo + with_getattribute.arg2 # object.__setattr__(self, name, value) class With_setattr: def __setattr__(self, name, value): + SINK3(value) # Flow not found + SINK2(name) # Flow not found + SINK1(self) # Flow not found OK() def test_setattr(): with_setattr = With_setattr() - with_setattr.foo = "" + arg3 = "" + with_setattr.arg2 = arg3 # object.__delattr__(self, name) class With_delattr: def __delattr__(self, name): + SINK2(name) # Flow not found + SINK1(self) # Flow not found OK() def test_delattr(): with_delattr = With_delattr() - del with_delattr.foo + del with_delattr.arg2 # object.__dir__(self) class With_dir: def __dir__(self): + SINK1(self) # Flow not found OK() return [] @@ -260,47 +310,63 @@ class Owner: class With_get: def __get__(self, instance, owner=None): + SINK3(owner) # Flow not testsed, use class `Owner` as source to test + SINK2(instance) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_get(): + class arg3: + pass + with_get = With_get() - Owner.attr = with_get - Owner.attr + arg3.attr = with_get + arg2 = arg3() + arg2.attr # object.__set__(self, instance, value) class With_set: def __set__(self, instance, value): + SINK3(value) # Flow not found + SINK2(instance) # Flow not found + SINK1(self) # Flow not found OK() def test_set(): with_set = With_set() Owner.attr = with_set - owner = Owner() - owner.attr = "" + arg2 = Owner() + arg3 = "" + arg2.attr = arg3 # object.__delete__(self, instance) class With_delete: def __delete__(self, instance): + SINK2(instance) # Flow not found + SINK1(self) # Flow not found OK() def test_delete(): with_delete = With_delete() Owner.attr = with_delete - owner = Owner() - del owner.attr + arg2 = Owner() + del arg2.attr # object.__set_name__(self, owner, name) class With_set_name: def __set_name__(self, owner, name): + SINK3(name) # Flow not found + SINK2(owner) # Flow not found + SINK1(self) # Flow not found OK() def test_set_name(): with_set_name = With_set_name() - type("Owner", (object,), dict(attr=with_set_name)) + type("arg2", (object,), dict(arg3=with_set_name)) # 3.3.2.4. __slots__ // We are not testing the suppression of -weakref_ and _dict_ here # object.__slots__ @@ -312,6 +378,7 @@ def test_set_name(): class With_init_subclass: def __init_subclass__(cls): + SINK1(cls) # Flow not found OK() def test_init_subclass(): @@ -328,12 +395,15 @@ def test_init_subclass(): class With_prepare(type): def __prepare__(name, bases, **kwds): + SINK3(kwds) # Flow not tested + SINK2(bases) # Flow not tested + SINK1(name) # Flow not found OK() return kwds def test_prepare(): - class With_meta(metaclass=With_prepare): + class arg1(metaclass=With_prepare): pass # 3.3.4. Customizing instance and subclass checks @@ -341,23 +411,29 @@ def test_prepare(): class With_instancecheck: def __instancecheck__(self, instance): + SINK2(instance) # Flow not found + SINK1(self) # Flow not found OK() return True def test_instancecheck(): with_instancecheck = With_instancecheck() - isinstance("", with_instancecheck) + arg2 = "" + isinstance(arg2, with_instancecheck) # class.__subclasscheck__(self, subclass) class With_subclasscheck: def __subclasscheck__(self, subclass): + SINK2(subclass) # Flow not found + SINK1(self) # Flow not found OK() return True def test_subclasscheck(): with_subclasscheck = With_subclasscheck() - issubclass(object, with_subclasscheck) + arg2 = object + issubclass(arg2, with_subclasscheck) # 3.3.5. Emulating generic types @@ -365,11 +441,14 @@ def test_subclasscheck(): class With_class_getitem: def __class_getitem__(cls, key): + SINK2(key) # Flow not found + SINK1(cls) # Flow not found OK() return object def test_class_getitem(): - with_class_getitem = With_class_getitem[int]() + arg2 = int + with_class_getitem = With_class_getitem[arg2]() # 3.3.6. Emulating callable objects @@ -377,6 +456,7 @@ def test_class_getitem(): class With_call: def __call__(self): + SINK1(self) # Flow not found OK() def test_call(): @@ -388,6 +468,7 @@ def test_call(): class With_len: def __len__(self): + SINK1(self) # Flow not found OK() return 0 @@ -408,6 +489,7 @@ def test_len_if(): class With_length_hint: def __length_hint__(self): + SINK1(self) # Flow not found OK() return 0 @@ -420,48 +502,63 @@ def test_length_hint(): class With_getitem: def __getitem__(self, key): + SINK2(key) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_getitem(): with_getitem = With_getitem() - with_getitem[0] + arg2 = 0 + with_getitem[arg2] # object.__setitem__(self, key, value) class With_setitem: def __setitem__(self, key, value): + SINK3(value) # Flow not found + SINK2(key) # Flow not found + SINK1(self) # Flow not found OK() def test_setitem(): with_setitem = With_setitem() - with_setitem[0] = "" + arg2 = 0 + arg3 = "" + with_setitem[arg2] = arg3 # object.__delitem__(self, key) class With_delitem: def __delitem__(self, key): + SINK2(key) # Flow not found + SINK1(self) # Flow not found OK() def test_delitem(): with_delitem = With_delitem() - del with_delitem[0] + arg2 = 0 + del with_delitem[arg2] # object.__missing__(self, key) class With_missing(dict): def __missing__(self, key): + SINK2(key) # Flow not found + SINK1(self) # Flow not found OK() return "" def test_missing(): with_missing = With_missing() - with_missing[""] + arg2 = 0 + with_missing[arg2] # object.__iter__(self) class With_iter: def __iter__(self): + SINK1(self) # Flow not found OK() return [].__iter__() @@ -473,6 +570,7 @@ def test_iter(): class With_reversed: def __reversed__(self): + SINK1(self) # Flow not found OK() return [].__iter__ @@ -484,12 +582,15 @@ def test_reversed(): class With_contains: def __contains__(self, item): + SINK2(item) # Flow not found + SINK1(self) # Flow not found OK() return True def test_contains(): with_contains = With_contains() - 0 in with_contains + arg2 = 0 + arg2 in with_contains # 3.3.8. Emulating numeric types @@ -497,465 +598,591 @@ def test_contains(): class With_add: def __add__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_add(): with_add = With_add() - with_add + with_add + arg2 = with_add + with_add + arg2 # object.__sub__(self, other) class With_sub: def __sub__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_sub(): with_sub = With_sub() - with_sub - with_sub + arg2 = with_sub + with_sub - arg2 # object.__mul__(self, other) class With_mul: def __mul__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_mul(): with_mul = With_mul() - with_mul * with_mul + arg2 = with_mul + with_mul * arg2 # object.__matmul__(self, other) class With_matmul: def __matmul__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_matmul(): with_matmul = With_matmul() - with_matmul @ with_matmul + arg2 = with_matmul + with_matmul @ arg2 # object.__truediv__(self, other) class With_truediv: def __truediv__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_truediv(): with_truediv = With_truediv() - with_truediv / with_truediv + arg2 = with_truediv + with_truediv / arg2 # object.__floordiv__(self, other) class With_floordiv: def __floordiv__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_floordiv(): with_floordiv = With_floordiv() - with_floordiv // with_floordiv + arg2 = with_floordiv + with_floordiv // arg2 # object.__mod__(self, other) class With_mod: def __mod__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_mod(): with_mod = With_mod() - with_mod % with_mod + arg2 = with_mod + with_mod % arg2 # object.__divmod__(self, other) class With_divmod: def __divmod__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_divmod(): with_divmod = With_divmod() - divmod(with_divmod, with_divmod) + arg2 = With_divmod + divmod(with_divmod, arg2) # object.__pow__(self, other[, modulo]) class With_pow: def __pow__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_pow(): with_pow = With_pow() - pow(with_pow, with_pow) + arg2 = with_pow + pow(with_pow, arg2) def test_pow_op(): with_pow = With_pow() - with_pow ** with_pow + arg2 = with_pow + with_pow ** arg2 # object.__lshift__(self, other) class With_lshift: def __lshift__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_lshift(): with_lshift = With_lshift() - with_lshift << with_lshift + arg2 = with_lshift + with_lshift << arg2 # object.__rshift__(self, other) class With_rshift: def __rshift__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rshift(): with_rshift = With_rshift() - with_rshift >> with_rshift + arg2 = with_rshift + with_rshift >> arg2 # object.__and__(self, other) class With_and: def __and__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_and(): with_and = With_and() - with_and & with_and + arg2 = with_and + with_and & arg2 # object.__xor__(self, other) class With_xor: def __xor__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_xor(): with_xor = With_xor() - with_xor ^ with_xor + arg2 = with_xor + with_xor ^ arg2 # object.__or__(self, other) class With_or: def __or__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_or(): with_or = With_or() - with_or | with_or + arg2 = with_or + with_or | arg2 # object.__radd__(self, other) class With_radd: def __radd__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_radd(): with_radd = With_radd() - "" + with_radd + arg2 = "" + arg2 + with_radd # object.__rsub__(self, other) class With_rsub: def __rsub__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rsub(): with_rsub = With_rsub() - "" - with_rsub + arg2 = "" + arg2 - with_rsub # object.__rmul__(self, other) class With_rmul: def __rmul__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rmul(): with_rmul = With_rmul() - "" * with_rmul + arg2 = "" + arg2 * with_rmul # object.__rmatmul__(self, other) class With_rmatmul: def __rmatmul__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rmatmul(): with_rmatmul = With_rmatmul() - "" @ with_rmatmul + arg2 = "" + arg2 @ with_rmatmul # object.__rtruediv__(self, other) class With_rtruediv: def __rtruediv__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rtruediv(): with_rtruediv = With_rtruediv() - "" / with_rtruediv + arg2 = "" + arg2 / with_rtruediv # object.__rfloordiv__(self, other) class With_rfloordiv: def __rfloordiv__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rfloordiv(): with_rfloordiv = With_rfloordiv() - "" // with_rfloordiv + arg2 = "" + arg2 // with_rfloordiv # object.__rmod__(self, other) class With_rmod: def __rmod__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rmod(): with_rmod = With_rmod() - {} % with_rmod + arg2 = {} + arg2 % with_rmod # object.__rdivmod__(self, other) class With_rdivmod: def __rdivmod__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rdivmod(): with_rdivmod = With_rdivmod() - divmod("", with_rdivmod) + arg2 = "" + divmod(arg2, with_rdivmod) # object.__rpow__(self, other[, modulo]) class With_rpow: def __rpow__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rpow(): with_rpow = With_rpow() - pow("", with_rpow) + arg2 = "" + pow(arg2, with_rpow) def test_rpow_op(): with_rpow = With_rpow() - "" ** with_rpow + arg2 = "" + arg2 ** with_rpow # object.__rlshift__(self, other) class With_rlshift: def __rlshift__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rlshift(): with_rlshift = With_rlshift() - "" << with_rlshift + arg2 = "" + arg2 << with_rlshift # object.__rrshift__(self, other) class With_rrshift: def __rrshift__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rrshift(): with_rrshift = With_rrshift() - "" >> with_rrshift + arg2 = "" + arg2 >> with_rrshift # object.__rand__(self, other) class With_rand: def __rand__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rand(): with_rand = With_rand() - "" & with_rand + arg2 = "" + arg2 & with_rand # object.__rxor__(self, other) class With_rxor: def __rxor__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_rxor(): with_rxor = With_rxor() - "" ^ with_rxor + arg2 = "" + arg2 ^ with_rxor # object.__ror__(self, other) class With_ror: def __ror__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_ror(): with_ror = With_ror() - "" | with_ror + arg2 = "" + arg2 | with_ror # object.__iadd__(self, other) class With_iadd: def __iadd__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_iadd(): with_iadd = With_iadd() - with_iadd += with_iadd + arg2 = with_iadd + with_iadd += arg2 # object.__isub__(self, other) class With_isub: def __isub__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_isub(): with_isub = With_isub() - with_isub -= with_isub + arg2 = with_isub + with_isub -= arg2 # object.__imul__(self, other) class With_imul: def __imul__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_imul(): with_imul = With_imul() - with_imul *= with_imul + arg2 = with_imul + with_imul *= arg2 # object.__imatmul__(self, other) class With_imatmul: def __imatmul__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_imatmul(): with_imatmul = With_imatmul() - with_imatmul @= with_imatmul + arg2 = with_imatmul + with_imatmul @= arg2 # object.__itruediv__(self, other) class With_itruediv: def __itruediv__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_itruediv(): with_itruediv = With_itruediv() - with_itruediv /= with_itruediv + arg2 = with_itruediv + with_itruediv /= arg2 # object.__ifloordiv__(self, other) class With_ifloordiv: def __ifloordiv__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_ifloordiv(): with_ifloordiv = With_ifloordiv() - with_ifloordiv //= with_ifloordiv + arg2 = with_ifloordiv + with_ifloordiv //= arg2 # object.__imod__(self, other) class With_imod: def __imod__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_imod(): with_imod = With_imod() - with_imod %= with_imod + arg2 = with_imod + with_imod %= arg2 # object.__ipow__(self, other[, modulo]) class With_ipow: def __ipow__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_ipow(): with_ipow = With_ipow() - with_ipow **= with_ipow + arg2 = with_ipow + with_ipow **= arg2 # object.__ilshift__(self, other) class With_ilshift: def __ilshift__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_ilshift(): with_ilshift = With_ilshift() - with_ilshift <<= with_ilshift + arg2 = with_ilshift + with_ilshift <<= arg2 # object.__irshift__(self, other) class With_irshift: def __irshift__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_irshift(): with_irshift = With_irshift() - with_irshift >>= with_irshift + arg2 = with_irshift + with_irshift >>= arg2 # object.__iand__(self, other) class With_iand: def __iand__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_iand(): with_iand = With_iand() - with_iand &= with_iand + arg2 = with_iand + with_iand &= arg2 # object.__ixor__(self, other) class With_ixor: def __ixor__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_ixor(): with_ixor = With_ixor() - with_ixor ^= with_ixor + arg2 = with_ixor + with_ixor ^= arg2 # object.__ior__(self, other) class With_ior: def __ior__(self, other): + SINK2(other) # Flow not found + SINK1(self) # Flow not found OK() return self def test_ior(): with_ior = With_ior() - with_ior |= with_ior + arg2 = with_ior + with_ior |= arg2 # object.__neg__(self) class With_neg: def __neg__(self): + SINK1(self) # Flow not found OK() return self @@ -967,6 +1194,7 @@ def test_neg(): class With_pos: def __pos__(self): + SINK1(self) # Flow not found OK() return self @@ -978,6 +1206,7 @@ def test_pos(): class With_abs: def __abs__(self): + SINK1(self) # Flow not found OK() return self @@ -989,6 +1218,7 @@ def test_abs(): class With_invert: def __invert__(self): + SINK1(self) # Flow not found OK() return self @@ -1000,6 +1230,7 @@ def test_invert(): class With_complex: def __complex__(self): + SINK1(self) # Flow not found OK() return 0j @@ -1011,6 +1242,7 @@ def test_complex(): class With_int: def __int__(self): + SINK1(self) # Flow not found OK() return 0 @@ -1022,6 +1254,7 @@ def test_int(): class With_float: def __float__(self): + SINK1(self) # Flow not found OK() return 0.0 @@ -1033,6 +1266,7 @@ def test_float(): class With_index: def __index__(self): + SINK1(self) # Flow not found OK() return 0 @@ -1073,6 +1307,7 @@ def test_index_complex(): class With_round: def __round__(self): + SINK1(self) # Flow not found OK() return 0 @@ -1084,6 +1319,7 @@ def test_round(): class With_trunc: def __trunc__(self): + SINK1(self) # Flow not found OK() return 0 @@ -1096,6 +1332,7 @@ def test_trunc(): class With_floor: def __floor__(self): + SINK1(self) # Flow not found OK() return 0 @@ -1108,6 +1345,7 @@ def test_floor(): class With_ceil: def __ceil__(self): + SINK1(self) # Flow not found OK() return 0 @@ -1122,6 +1360,7 @@ def test_ceil(): class With_enter: def __enter__(self): + SINK1(self) # Flow not found OK() return @@ -1139,6 +1378,10 @@ class With_exit: return def __exit__(self, exc_type, exc_value, traceback): + SINK4(traceback) # Flow not tested + SINK3(exc_value) # Flow not tested + SINK2(exc_type) # Flow not tested + SINK1(self) # Flow not found OK() return @@ -1153,6 +1396,7 @@ import asyncio class With_await: def __await__(self): + SINK1(self) # Flow not found OK() return (yield from asyncio.coroutine(lambda: "")()) @@ -1171,6 +1415,7 @@ async def atest_await(): class With_aiter: def __aiter__(self): + SINK1(self) # Flow not found OK() return self @@ -1189,6 +1434,7 @@ class With_anext: return self async def __anext__(self): + SINK1(self) # Flow not found OK() raise StopAsyncIteration @@ -1203,6 +1449,7 @@ async def atest_anext(): class With_aenter: async def __aenter__(self): + SINK1(self) # Flow not found OK() async def __aexit__(self, exc_type, exc_value, traceback): @@ -1220,6 +1467,10 @@ class With_aexit: pass async def __aexit__(self, exc_type, exc_value, traceback): + SINK4(traceback) # Flow not tested + SINK3(exc_value) # Flow not tested + SINK2(exc_type) # Flow not tested + SINK1(self) # Flow not found OK() async def atest_aexit(): diff --git a/python/ql/test/experimental/dataflow/coverage/classesCallGraph.expected b/python/ql/test/experimental/dataflow/coverage/classesCallGraph.expected index 4baf0b077ab..61b40daef31 100644 --- a/python/ql/test/experimental/dataflow/coverage/classesCallGraph.expected +++ b/python/ql/test/experimental/dataflow/coverage/classesCallGraph.expected @@ -1,6 +1,6 @@ -| classes.py:19:12:19:31 | ControlFlowNode for Attribute() | classes.py:19:12:19:31 | ControlFlowNode for Attribute() | -| classes.py:174:7:174:22 | ControlFlowNode for set() | classes.py:174:7:174:22 | ControlFlowNode for set() | -| classes.py:178:7:178:28 | ControlFlowNode for frozenset() | classes.py:178:7:178:28 | ControlFlowNode for frozenset() | -| classes.py:182:7:182:26 | ControlFlowNode for dict() | classes.py:182:7:182:26 | ControlFlowNode for dict() | -| classes.py:303:28:303:51 | ControlFlowNode for dict() | classes.py:303:28:303:51 | ControlFlowNode for dict() | -| classes.py:466:12:466:24 | ControlFlowNode for Attribute() | classes.py:466:12:466:24 | ControlFlowNode for Attribute() | +| classes.py:32:12:32:31 | ControlFlowNode for Attribute() | classes.py:32:12:32:31 | ControlFlowNode for Attribute() | +| classes.py:212:7:212:22 | ControlFlowNode for set() | classes.py:212:7:212:22 | ControlFlowNode for set() | +| classes.py:216:7:216:28 | ControlFlowNode for frozenset() | classes.py:216:7:216:28 | ControlFlowNode for frozenset() | +| classes.py:220:7:220:26 | ControlFlowNode for dict() | classes.py:220:7:220:26 | ControlFlowNode for dict() | +| classes.py:369:27:369:50 | ControlFlowNode for dict() | classes.py:369:27:369:50 | ControlFlowNode for dict() | +| classes.py:563:12:563:24 | ControlFlowNode for Attribute() | classes.py:563:12:563:24 | ControlFlowNode for Attribute() | diff --git a/python/ql/test/experimental/dataflow/coverage/classesCallGraph.ql b/python/ql/test/experimental/dataflow/coverage/classesCallGraph.ql index 1445e314c98..1763993df18 100644 --- a/python/ql/test/experimental/dataflow/coverage/classesCallGraph.ql +++ b/python/ql/test/experimental/dataflow/coverage/classesCallGraph.ql @@ -1,4 +1,29 @@ -import experimental.dataflow.callGraphConfig +import experimental.dataflow.DataFlow + +/** + * A configuration to find the call graph edges. + */ +class CallGraphConfig extends DataFlow::Configuration { + CallGraphConfig() { this = "CallGraphConfig" } + + override predicate isSource(DataFlow::Node node) { + node instanceof DataFlow::ReturnNode + or + // These sources should allow for the non-standard call syntax + node instanceof DataFlow::ArgumentNode + } + + override predicate isSink(DataFlow::Node node) { + node instanceof DataFlow::OutNode + or + node instanceof DataFlow::ParameterNode and + // exclude parameters to the SINK-functions + not exists(DataFlow::DataFlowCallable c | + node.(DataFlow::ParameterNode).isParameterOf(c, _) and + c.getName().matches("SINK_") + ) + } +} from DataFlow::Node source, DataFlow::Node sink where @@ -8,3 +33,4 @@ where select source, sink // Ideally, we would just have 1-step paths either from argument to parameter // or from return to call. This gives a bit more, so should be rewritten. +// We should also consider splitting this into two, one for each direction. diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/GlobalTaintTracking.expected b/python/ql/test/experimental/dataflow/tainttracking/basic/GlobalTaintTracking.expected new file mode 100644 index 00000000000..23bb0fcce7a --- /dev/null +++ b/python/ql/test/experimental/dataflow/tainttracking/basic/GlobalTaintTracking.expected @@ -0,0 +1,2 @@ +| test.py:3:11:3:16 | ControlFlowNode for SOURCE | test.py:4:6:4:12 | ControlFlowNode for tainted | +| test.py:7:20:7:25 | ControlFlowNode for SOURCE | test.py:8:10:8:21 | ControlFlowNode for also_tainted | diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/GlobalTaintTracking.ql b/python/ql/test/experimental/dataflow/tainttracking/basic/GlobalTaintTracking.ql new file mode 100644 index 00000000000..ceec943efa1 --- /dev/null +++ b/python/ql/test/experimental/dataflow/tainttracking/basic/GlobalTaintTracking.ql @@ -0,0 +1,22 @@ +import python +import experimental.dataflow.TaintTracking +import experimental.dataflow.DataFlow + +class TestTaintTrackingConfiguration extends TaintTracking::Configuration { + TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" } + + override predicate isSource(DataFlow::Node source) { + source.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE" + } + + override predicate isSink(DataFlow::Node sink) { + exists(CallNode call | + call.getFunction().(NameNode).getId() = "SINK" and + sink.(DataFlow::CfgNode).getNode() = call.getAnArg() + ) + } +} + +from TestTaintTrackingConfiguration config, DataFlow::Node source, DataFlow::Node sink +where config.hasFlow(source, sink) +select source, sink diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected new file mode 100644 index 00000000000..b23839170ff --- /dev/null +++ b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected @@ -0,0 +1,5 @@ +| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:5:7:16 | SSA variable also_tainted | test.py:8:5:8:22 | SSA variable also_tainted | +| test.py:7:5:7:16 | SSA variable also_tainted | test.py:8:10:8:21 | ControlFlowNode for also_tainted | +| test.py:7:20:7:25 | ControlFlowNode for SOURCE | test.py:7:5:7:16 | SSA variable also_tainted | +| test.py:8:5:8:22 | SSA variable also_tainted | test.py:6:1:6:11 | Exit node for Function func | diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.ql b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.ql new file mode 100644 index 00000000000..65f11a9294c --- /dev/null +++ b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.ql @@ -0,0 +1,7 @@ +import python +import experimental.dataflow.TaintTracking +import experimental.dataflow.DataFlow + +from DataFlow::Node nodeFrom, DataFlow::Node nodeTo +where TaintTracking::localTaintStep(nodeFrom, nodeTo) +select nodeFrom, nodeTo diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/test.py b/python/ql/test/experimental/dataflow/tainttracking/basic/test.py new file mode 100644 index 00000000000..fcf5edf0e20 --- /dev/null +++ b/python/ql/test/experimental/dataflow/tainttracking/basic/test.py @@ -0,0 +1,8 @@ +# Module level taint is different from inside functions, since shared dataflow library +# relies on `getEnclosingCallable` +tainted = SOURCE +SINK(tainted) + +def func(): + also_tainted = SOURCE + SINK(also_tainted)