diff --git a/config/identical-files.json b/config/identical-files.json index d4fa5c219d3..5e28ac94a9b 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -29,6 +29,10 @@ "cpp/ql/src/semmle/code/cpp/dataflow/internal/tainttracking1/TaintTrackingImpl.qll", "cpp/ql/src/semmle/code/cpp/dataflow/internal/tainttracking2/TaintTrackingImpl.qll" ], + "Taint tracking Java": [ + "java/ql/src/semmle/code/java/dataflow/internal/tainttracking1/TaintTrackingImpl.qll", + "java/ql/src/semmle/code/java/dataflow/internal/tainttracking2/TaintTrackingImpl.qll" + ], "IR Instruction": [ "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll", "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll", diff --git a/java/ql/src/semmle/code/java/dataflow/TaintTracking.qll b/java/ql/src/semmle/code/java/dataflow/TaintTracking.qll index 6d79359bb36..8dd3e12735a 100644 --- a/java/ql/src/semmle/code/java/dataflow/TaintTracking.qll +++ b/java/ql/src/semmle/code/java/dataflow/TaintTracking.qll @@ -1,804 +1,24 @@ /** * Provides classes for performing local (intra-procedural) and * global (inter-procedural) taint-tracking analyses. + * + * We define _taint propagation_ informally to mean that a substantial part of + * the information from the source is preserved at the sink. For example, taint + * propagates from `x` to `x + 100`, but it does not propagate from `x` to `x > + * 100` since we consider a single bit of information to be too little. */ - -import java import semmle.code.java.dataflow.DataFlow import semmle.code.java.dataflow.DataFlow2 -import semmle.code.java.Collections -private import SSA -private import DefUse -private import semmle.code.java.security.SecurityTests -private import semmle.code.java.security.Validation -private import semmle.code.java.frameworks.android.Intent -private import semmle.code.java.frameworks.Guice -private import semmle.code.java.frameworks.Protobuf -private import semmle.code.java.Maps -private import semmle.code.java.dataflow.internal.ContainerFlow + +import semmle.code.java.dataflow.internal.TaintTrackingUtil::StringBuilderVarModule module TaintTracking { - /** - * A taint tracking configuration. - * - * A taint tracking configuration is a special dataflow configuration - * (`DataFlow::Configuration`) that allows for flow through nodes that do not - * necessarily preserve values, but are still relevant from a taint tracking - * perspective. (For example, string concatenation, where one of the operands - * is tainted.) - * - * Each use of the taint tracking library must define its own unique extension - * of this abstract class. A configuration defines a set of relevant sources - * (`isSource`) and sinks (`isSink`), and may additionally treat intermediate - * nodes as "sanitizers" (`isSanitizer`) as well as add custom taint flow steps - * (`isAdditionalTaintStep()`). - */ - abstract class Configuration extends DataFlow::Configuration { - bindingset[this] - Configuration() { any() } - - /** - * Holds if `source` is a relevant taint source. - * - * The smaller this predicate is, the faster `hasFlow()` will converge. - */ - // overridden to provide taint-tracking specific qldoc - abstract override predicate isSource(DataFlow::Node source); - - /** - * Holds if `sink` is a relevant taint sink. - * - * The smaller this predicate is, the faster `hasFlow()` will converge. - */ - // overridden to provide taint-tracking specific qldoc - abstract override predicate isSink(DataFlow::Node sink); - - /** Holds if the node `node` is a taint sanitizer. */ - predicate isSanitizer(DataFlow::Node node) { none() } - - final override predicate isBarrier(DataFlow::Node node) { - isSanitizer(node) or - // Ignore paths through test code. - node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass or - node.asExpr() instanceof ValidatedVariableAccess - } - - /** DEPRECATED: override `isSanitizerIn` and `isSanitizerOut` instead. */ - deprecated predicate isSanitizerEdge(DataFlow::Node node1, DataFlow::Node node2) { none() } - - deprecated final override predicate isBarrierEdge(DataFlow::Node node1, DataFlow::Node node2) { - isSanitizerEdge(node1, node2) - } - - /** Holds if data flow into `node` is prohibited. */ - predicate isSanitizerIn(DataFlow::Node node) { none() } - - final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } - - /** Holds if data flow out of `node` is prohibited. */ - predicate isSanitizerOut(DataFlow::Node node) { none() } - - final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } - - /** Holds if data flow through nodes guarded by `guard` is prohibited. */ - predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } - - final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) } - - /** - * Holds if the additional taint propagation step from `node1` to `node2` - * must be taken into account in the analysis. - */ - predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } - - final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isAdditionalTaintStep(node1, node2) or - localAdditionalTaintStep(node1, node2) - } - - /** - * Holds if taint may flow from `source` to `sink` for this configuration. - */ - // overridden to provide taint-tracking specific qldoc - override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { - super.hasFlow(source, sink) - } - } + import semmle.code.java.dataflow.internal.tainttracking1.TaintTrackingImpl + private import semmle.code.java.dataflow.TaintTracking2 /** - * A taint tracking configuration. - * - * A taint tracking configuration is a special dataflow configuration - * (`DataFlow::Configuration`) that allows for flow through nodes that do not - * necessarily preserve values, but are still relevant from a taint tracking - * perspective. (For example, string concatenation, where one of the operands - * is tainted.) - * - * Each use of the taint tracking library must define its own unique extension - * of this abstract class. A configuration defines a set of relevant sources - * (`isSource`) and sinks (`isSink`), and may additionally treat intermediate - * nodes as "sanitizers" (`isSanitizer`) as well as add custom taint flow steps - * (`isAdditionalTaintStep()`). + * DEPRECATED: Use TaintTracking2::Configuration instead. */ - abstract class Configuration2 extends DataFlow2::Configuration { - bindingset[this] - Configuration2() { any() } - - /** - * Holds if `source` is a relevant taint source. - * - * The smaller this predicate is, the faster `hasFlow()` will converge. - */ - // overridden to provide taint-tracking specific qldoc - abstract override predicate isSource(DataFlow::Node source); - - /** - * Holds if `sink` is a relevant taint sink. - * - * The smaller this predicate is, the faster `hasFlow()` will converge. - */ - // overridden to provide taint-tracking specific qldoc - abstract override predicate isSink(DataFlow::Node sink); - - /** Holds if the node `node` is a taint sanitizer. */ - predicate isSanitizer(DataFlow::Node node) { none() } - - final override predicate isBarrier(DataFlow::Node node) { - isSanitizer(node) or - // Ignore paths through test code. - node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass or - node.asExpr() instanceof ValidatedVariableAccess - } - - /** DEPRECATED: override `isSanitizerIn` and `isSanitizerOut` instead. */ - deprecated predicate isSanitizerEdge(DataFlow::Node node1, DataFlow::Node node2) { none() } - - deprecated final override predicate isBarrierEdge(DataFlow::Node node1, DataFlow::Node node2) { - isSanitizerEdge(node1, node2) - } - - /** Holds if data flow into `node` is prohibited. */ - predicate isSanitizerIn(DataFlow::Node node) { none() } - - final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } - - /** Holds if data flow out of `node` is prohibited. */ - predicate isSanitizerOut(DataFlow::Node node) { none() } - - final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } - - /** Holds if data flow through nodes guarded by `guard` is prohibited. */ - predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } - - final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) } - - /** - * Holds if the additional taint propagation step from `node1` to `node2` - * must be taken into account in the analysis. - */ - predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } - - final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isAdditionalTaintStep(node1, node2) or - localAdditionalTaintStep(node1, node2) - } - - /** - * Holds if taint may flow from `source` to `sink` for this configuration. - */ - // overridden to provide taint-tracking specific qldoc - override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { - super.hasFlow(source, sink) - } - } - - /** - * Holds if taint can flow from `src` to `sink` in zero or more - * local (intra-procedural) steps. - */ - predicate localTaint(DataFlow::Node src, DataFlow::Node sink) { localTaintStep*(src, sink) } - - /** - * Holds if taint can flow in one local step from `src` to `sink`. - */ - predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) { - DataFlow::localFlowStep(src, sink) or - localAdditionalTaintStep(src, sink) - } - - /** - * Holds if taint can flow in one local step from `src` to `sink` excluding - * local data flow steps. That is, `src` and `sink` are likely to represent - * different objects. - */ - predicate localAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) { - localAdditionalTaintExprStep(src.asExpr(), sink.asExpr()) - or - exists(Argument arg | - src.asExpr() = arg and - arg.isVararg() and - sink.(DataFlow::ImplicitVarargsArray).getCall() = arg.getCall() - ) - } - - /** - * Holds if taint can flow in one local step from `src` to `sink` excluding - * local data flow steps. That is, `src` and `sink` are likely to represent - * different objects. - */ - private predicate localAdditionalTaintExprStep(Expr src, Expr sink) { - sink.(AddExpr).getAnOperand() = src and sink.getType() instanceof TypeString - or - sink.(AssignAddExpr).getSource() = src and sink.getType() instanceof TypeString - or - sink.(ArrayCreationExpr).getInit() = src - or - sink.(ArrayInit).getAnInit() = src - or - sink.(ArrayAccess).getArray() = src - or - sink.(LogicExpr).getAnOperand() = src - or - exists(Assignment assign | assign.getSource() = src | - sink = assign.getDest().(ArrayAccess).getArray() - ) - or - exists(EnhancedForStmt for, SsaExplicitUpdate v | - for.getExpr() = src and - v.getDefiningExpr() = for.getVariable() and - v.getAFirstUse() = sink - ) - or - containerStep(src, sink) - or - constructorStep(src, sink) - or - qualifierToMethodStep(src, sink) - or - qualifierToArgumentStep(src, sink) - or - argToMethodStep(src, sink) - or - argToArgStep(src, sink) - or - argToQualifierStep(src, sink) - or - comparisonStep(src, sink) - or - stringBuilderStep(src, sink) - or - serializationStep(src, sink) - } - - private class BulkData extends RefType { - BulkData() { - this.(Array).getElementType().(PrimitiveType).getName().regexpMatch("byte|char") - or - exists(RefType t | this.getASourceSupertype*() = t | - t.hasQualifiedName("java.io", "InputStream") or - t.hasQualifiedName("java.nio", "ByteBuffer") or - t.hasQualifiedName("java.lang", "Readable") or - t.hasQualifiedName("java.io", "DataInput") or - t.hasQualifiedName("java.nio.channels", "ReadableByteChannel") - ) - } - } - - /** - * Holds if `c` is a constructor for a subclass of `java.io.InputStream` that - * wraps an underlying data source. The underlying data source is given as a - * the `argi`'th parameter to the constructor. - * - * An object construction of such a wrapper is likely to preserve the data flow - * status of its argument. - */ - private predicate inputStreamWrapper(Constructor c, int argi) { - c.getParameterType(argi) instanceof BulkData and - c.getDeclaringType().getASourceSupertype().hasQualifiedName("java.io", "InputStream") - } - - /** An object construction that preserves the data flow status of any of its arguments. */ - private predicate constructorStep(Expr tracked, ConstructorCall sink) { - exists(int argi | sink.getArgument(argi) = tracked | - exists(string s | sink.getConstructedType().getQualifiedName() = s | - // String constructor does nothing to data - s = "java.lang.String" and argi = 0 - or - // some readers preserve the content of streams - s = "java.io.InputStreamReader" and argi = 0 - or - s = "java.io.BufferedReader" and argi = 0 - or - s = "java.io.CharArrayReader" and argi = 0 - or - s = "java.io.StringReader" and argi = 0 - or - // data preserved through streams - s = "java.io.ObjectInputStream" and argi = 0 - or - s = "java.io.ByteArrayInputStream" and argi = 0 - or - s = "java.io.DataInputStream" and argi = 0 - or - s = "java.io.BufferedInputStream" and argi = 0 - or - s = "com.esotericsoftware.kryo.io.Input" and argi = 0 - or - s = "java.beans.XMLDecoder" and argi = 0 - or - // a tokenizer preserves the content of a string - s = "java.util.StringTokenizer" and argi = 0 - or - // unzipping the stream preserves content - s = "java.util.zip.ZipInputStream" and argi = 0 - or - s = "java.util.zip.GZIPInputStream" and argi = 0 - or - // string builders and buffers - s = "java.lang.StringBuilder" and argi = 0 - or - s = "java.lang.StringBuffer" and argi = 0 - or - // a cookie with tainted ingredients is tainted - s = "javax.servlet.http.Cookie" and argi = 0 - or - s = "javax.servlet.http.Cookie" and argi = 1 - or - // various xml stream source constructors. - s = "org.xml.sax.InputSource" and argi = 0 - or - s = "javax.xml.transform.sax.SAXSource" and argi = 0 and sink.getNumArgument() = 1 - or - s = "javax.xml.transform.sax.SAXSource" and argi = 1 and sink.getNumArgument() = 2 - or - s = "javax.xml.transform.stream.StreamSource" and argi = 0 - or - //a URI constructed from a tainted string is tainted. - s = "java.net.URI" and argi = 0 and sink.getNumArgument() = 1 - ) - or - exists(RefType t | t.getQualifiedName() = "java.lang.Number" | - hasSubtype*(t, sink.getConstructedType()) - ) and - argi = 0 - or - // wrappers constructed by extension - exists(Constructor c, Parameter p, SuperConstructorInvocationStmt sup | - c = sink.getConstructor() and - p = c.getParameter(argi) and - sup.getEnclosingCallable() = c and - constructorStep(p.getAnAccess(), sup) - ) - or - // a custom InputStream that wraps a tainted data source is tainted - inputStreamWrapper(sink.getConstructor(), argi) - ) - } - - /** Access to a method that passes taint from qualifier to argument. */ - private predicate qualifierToArgumentStep(Expr tracked, RValue sink) { - exists(MethodAccess ma, int arg | - taintPreservingQualifierToArgument(ma.getMethod(), arg) and - tracked = ma.getQualifier() and - sink = ma.getArgument(arg) - ) - } - - /** Methods that passes tainted data from qualifier to argument. */ - private predicate taintPreservingQualifierToArgument(Method m, int arg) { - m.getDeclaringType().hasQualifiedName("java.io", "ByteArrayOutputStream") and - m.hasName("writeTo") and - arg = 0 - or - m.getDeclaringType().hasQualifiedName("java.io", "InputStream") and - m.hasName("read") and - arg = 0 - or - m.getDeclaringType().getASupertype*().hasQualifiedName("java.io", "Reader") and - m.hasName("read") and - arg = 0 - } - - /** Access to a method that passes taint from the qualifier. */ - private predicate qualifierToMethodStep(Expr tracked, MethodAccess sink) { - (taintPreservingQualifierToMethod(sink.getMethod()) or unsafeEscape(sink)) and - tracked = sink.getQualifier() - } - - /** - * Methods that return tainted data when called on tainted data. - */ - private predicate taintPreservingQualifierToMethod(Method m) { - m.getDeclaringType() instanceof TypeString and - ( - m.getName() = "concat" or - m.getName() = "endsWith" or - m.getName() = "getBytes" or - m.getName() = "split" or - m.getName() = "substring" or - m.getName() = "toCharArray" or - m.getName() = "toLowerCase" or - m.getName() = "toString" or - m.getName() = "toUpperCase" or - m.getName() = "trim" - ) - or - exists(Class c | c.getQualifiedName() = "java.lang.Number" | - hasSubtype*(c, m.getDeclaringType()) - ) and - ( - m.getName().matches("to%String") or - m.getName() = "toByteArray" or - m.getName().matches("%Value") - ) - or - m.getDeclaringType().getASupertype*().hasQualifiedName("java.io", "Reader") and - ( - m.getName() = "read" and m.getNumberOfParameters() = 0 - or - m.getName() = "readLine" - ) - or - m.getDeclaringType().getQualifiedName().matches("%StringWriter") and - m.getName() = "toString" - or - m.getDeclaringType().hasQualifiedName("java.util", "StringTokenizer") and - m.getName().matches("next%") - or - m.getDeclaringType().hasQualifiedName("java.io", "ByteArrayOutputStream") and - (m.getName() = "toByteArray" or m.getName() = "toString") - or - m.getDeclaringType().hasQualifiedName("java.io", "ObjectInputStream") and - m.getName().matches("read%") - or - ( - m.getDeclaringType().hasQualifiedName("java.lang", "StringBuilder") or - m.getDeclaringType().hasQualifiedName("java.lang", "StringBuffer") - ) and - (m.getName() = "toString" or m.getName() = "append") - or - m.getDeclaringType().hasQualifiedName("javax.xml.transform.sax", "SAXSource") and - m.hasName("getInputSource") - or - m.getDeclaringType().hasQualifiedName("javax.xml.transform.stream", "StreamSource") and - m.hasName("getInputStream") - or - m instanceof IntentGetExtraMethod - or - m.getDeclaringType().hasQualifiedName("java.nio", "ByteBuffer") and - m.hasName("get") - or - m = any(GuiceProvider gp).getAnOverridingGetMethod() - or - m = any(ProtobufMessageLite p).getAGetterMethod() - } - - private class StringReplaceMethod extends Method { - StringReplaceMethod() { - getDeclaringType() instanceof TypeString and - ( - hasName("replace") or - hasName("replaceAll") or - hasName("replaceFirst") - ) - } - } - - private predicate unsafeEscape(MethodAccess ma) { - // Removing `