From 3448751b4cef97f4b59857ea30cc29ac8191bc49 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 19 Aug 2024 14:41:21 +0200 Subject: [PATCH] JS: Consolidate command-line argument modeling Such that we can reuse the existing modeling, but have it globally applied as a threat-model as well. I Basically just moved the modeling. One important aspect is that this changes is that the previously query-specific `argsParseStep` is now a globally applied taint-step. This seems reasonable, if someone applied the argument parsing to any user-controlled string, it seems correct to propagate that taint for _any_ query. --- javascript/ql/lib/javascript.qll | 1 + .../frameworks/CommandLineArguments.qll | 142 ++++++++++++++++++ ...IndirectCommandInjectionCustomizations.qll | 121 +-------------- .../IndirectCommandInjectionQuery.qll | 4 - .../threat-models/sources/sources.js | 40 +++++ 5 files changed, 186 insertions(+), 122 deletions(-) create mode 100644 javascript/ql/lib/semmle/javascript/frameworks/CommandLineArguments.qll diff --git a/javascript/ql/lib/javascript.qll b/javascript/ql/lib/javascript.qll index 07fb759bd65..7bb2b767610 100644 --- a/javascript/ql/lib/javascript.qll +++ b/javascript/ql/lib/javascript.qll @@ -81,6 +81,7 @@ import semmle.javascript.frameworks.Classnames import semmle.javascript.frameworks.ClassValidator import semmle.javascript.frameworks.ClientRequests import semmle.javascript.frameworks.ClosureLibrary +import semmle.javascript.frameworks.CommandLineArguments import semmle.javascript.frameworks.CookieLibraries import semmle.javascript.frameworks.Credentials import semmle.javascript.frameworks.CryptoLibraries diff --git a/javascript/ql/lib/semmle/javascript/frameworks/CommandLineArguments.qll b/javascript/ql/lib/semmle/javascript/frameworks/CommandLineArguments.qll new file mode 100644 index 00000000000..db1444db741 --- /dev/null +++ b/javascript/ql/lib/semmle/javascript/frameworks/CommandLineArguments.qll @@ -0,0 +1,142 @@ +/** Provides modeling for parsed command line arguments. */ + +import javascript + +/** + * An object containing command-line arguments, potentially parsed by a library. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `CommandLineArguments::Range` instead. + */ +class CommandLineArguments extends ThreatModelSource instanceof CommandLineArguments::Range { } + +/** Provides a class for modeling new sources of remote user input. */ +module CommandLineArguments { + /** + * An object containing command-line arguments, potentially parsed by a library. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `CommandLineArguments` instead. + */ + abstract class Range extends ThreatModelSource::Range { + override string getThreatModel() { result = "commandargs" } + + override string getSourceType() { result = "CommandLineArguments" } + } +} + +/** A read of `process.argv`, considered as a threat-model source. */ +private class ProcessArgv extends CommandLineArguments::Range { + ProcessArgv() { + // `process.argv[0]` and `process.argv[1]` are paths to `node` and `main`, and + // therefore should not be considered a threat-source... However, we don't have an + // easy way to exclude them, so we need to allow them. + this = NodeJSLib::process().getAPropertyRead("argv") + } + + override string getSourceType() { result = "process.argv" } +} + +private class DefaultModels extends CommandLineArguments::Range { + DefaultModels() { + // `require('get-them-args')(...)` => `{ unknown: [], a: ... b: ... }` + this = DataFlow::moduleImport("get-them-args").getACall() + or + // `require('optimist').argv` => `{ _: [], a: ... b: ... }` + this = DataFlow::moduleMember("optimist", "argv") + or + // `require("arg")({...spec})` => `{_: [], a: ..., b: ...}` + this = DataFlow::moduleImport("arg").getACall() + or + // `(new (require(argparse)).ArgumentParser({...spec})).parse_args()` => `{a: ..., b: ...}` + this = + API::moduleImport("argparse") + .getMember("ArgumentParser") + .getInstance() + .getMember("parse_args") + .getACall() + or + // `require('command-line-args')({...spec})` => `{a: ..., b: ...}` + this = DataFlow::moduleImport("command-line-args").getACall() + or + // `require('meow')(help, {...spec})` => `{a: ..., b: ....}` + this = DataFlow::moduleImport("meow").getACall() + or + // `require("dashdash").createParser(...spec)` => `{a: ..., b: ...}` + this = + [ + API::moduleImport("dashdash"), + API::moduleImport("dashdash").getMember("createParser").getReturn() + ].getMember("parse").getACall() + or + // `require('commander').myCmdArgumentName` + this = commander().getAMember().asSource() + or + // `require('commander').opt()` => `{a: ..., b: ...}` + this = commander().getMember("opts").getACall() + } +} + +/** + * A step for propagating taint through command line parsing, + * such as `var succ = require("minimist")(pred)`. + */ +private class ArgsParseStep extends TaintTracking::SharedTaintStep { + override predicate step(DataFlow::Node pred, DataFlow::Node succ) { + exists(DataFlow::CallNode call | + call = DataFlow::moduleMember("args", "parse").getACall() or + call = DataFlow::moduleImport(["yargs-parser", "minimist", "subarg"]).getACall() + | + succ = call and + pred = call.getArgument(0) + ) + } +} + +/** + * Gets a Command instance from the `commander` library. + */ +private API::Node commander() { + result = API::moduleImport("commander") + or + // `require("commander").program === require("commander")` + result = commander().getMember("program") + or + result = commander().getMember("Command").getInstance() + or + // lots of chainable methods + result = commander().getAMember().getReturn() +} + +/** + * Gets an instance of `yargs`. + * Either directly imported as a module, or through some chained method call. + */ +private DataFlow::SourceNode yargs() { + result = DataFlow::moduleImport("yargs") + or + // script used to generate list of chained methods: https://gist.github.com/erik-krogh/f8afe952c0577f4b563a993e613269ba + exists(string method | + not method = + // the methods that does not return a chained `yargs` object. + [ + "getContext", "getDemandedOptions", "getDemandedCommands", "getDeprecatedOptions", + "_getParseContext", "getOptions", "getGroups", "getStrict", "getStrictCommands", + "getExitProcess", "locale", "getUsageInstance", "getCommandInstance" + ] + | + result = yargs().getAMethodCall(method) + ) +} + +/** + * An array of command line arguments (`argv`) parsed by the `yargs` library. + */ +private class YargsArgv extends CommandLineArguments::Range { + YargsArgv() { + this = yargs().getAPropertyRead("argv") + or + this = yargs().getAMethodCall("parse") and + this.(DataFlow::MethodCallNode).getNumArgument() = 0 + } +} diff --git a/javascript/ql/lib/semmle/javascript/security/dataflow/IndirectCommandInjectionCustomizations.qll b/javascript/ql/lib/semmle/javascript/security/dataflow/IndirectCommandInjectionCustomizations.qll index b2b94fcca8d..9dd6ab4b4a9 100644 --- a/javascript/ql/lib/semmle/javascript/security/dataflow/IndirectCommandInjectionCustomizations.qll +++ b/javascript/ql/lib/semmle/javascript/security/dataflow/IndirectCommandInjectionCustomizations.qll @@ -25,21 +25,6 @@ module IndirectCommandInjection { */ abstract class Sanitizer extends DataFlow::Node { } - /** - * A source of user input from the command-line, considered as a flow source for command injection. - */ - private class CommandLineArgumentsArrayAsSource extends Source instanceof CommandLineArgumentsArray - { } - - /** - * An array of command-line arguments. - */ - class CommandLineArgumentsArray extends DataFlow::SourceNode { - CommandLineArgumentsArray() { - this = DataFlow::globalVarRef("process").getAPropertyRead("argv") - } - } - /** * A read of `process.env`, considered as a flow source for command injection. */ @@ -82,109 +67,9 @@ module IndirectCommandInjection { } /** - * An object containing parsed command-line arguments, considered as a flow source for command injection. + * An object containing command-line arguments, considered as a flow source for command injection. */ - class ParsedCommandLineArgumentsAsSource extends Source { - ParsedCommandLineArgumentsAsSource() { - // `require('get-them-args')(...)` => `{ unknown: [], a: ... b: ... }` - this = DataFlow::moduleImport("get-them-args").getACall() - or - // `require('optimist').argv` => `{ _: [], a: ... b: ... }` - this = DataFlow::moduleMember("optimist", "argv") - or - // `require("arg")({...spec})` => `{_: [], a: ..., b: ...}` - this = DataFlow::moduleImport("arg").getACall() - or - // `(new (require(argparse)).ArgumentParser({...spec})).parse_args()` => `{a: ..., b: ...}` - this = - API::moduleImport("argparse") - .getMember("ArgumentParser") - .getInstance() - .getMember("parse_args") - .getACall() - or - // `require('command-line-args')({...spec})` => `{a: ..., b: ...}` - this = DataFlow::moduleImport("command-line-args").getACall() - or - // `require('meow')(help, {...spec})` => `{a: ..., b: ....}` - this = DataFlow::moduleImport("meow").getACall() - or - // `require("dashdash").createParser(...spec)` => `{a: ..., b: ...}` - this = - [ - API::moduleImport("dashdash"), - API::moduleImport("dashdash").getMember("createParser").getReturn() - ].getMember("parse").getACall() - or - // `require('commander').myCmdArgumentName` - this = commander().getAMember().asSource() - or - // `require('commander').opt()` => `{a: ..., b: ...}` - this = commander().getMember("opts").getACall() - } - } - - /** - * Holds if there is a command line parsing step from `pred` to `succ`. - * E.g: `var succ = require("minimist")(pred)`. - */ - predicate argsParseStep(DataFlow::Node pred, DataFlow::Node succ) { - exists(DataFlow::CallNode call | - call = DataFlow::moduleMember("args", "parse").getACall() or - call = DataFlow::moduleImport(["yargs-parser", "minimist", "subarg"]).getACall() - | - succ = call and - pred = call.getArgument(0) - ) - } - - /** - * Gets a Command instance from the `commander` library. - */ - private API::Node commander() { - result = API::moduleImport("commander") - or - // `require("commander").program === require("commander")` - result = commander().getMember("program") - or - result = commander().getMember("Command").getInstance() - or - // lots of chainable methods - result = commander().getAMember().getReturn() - } - - /** - * Gets an instance of `yargs`. - * Either directly imported as a module, or through some chained method call. - */ - private DataFlow::SourceNode yargs() { - result = DataFlow::moduleImport("yargs") - or - // script used to generate list of chained methods: https://gist.github.com/erik-krogh/f8afe952c0577f4b563a993e613269ba - exists(string method | - not method = - // the methods that does not return a chained `yargs` object. - [ - "getContext", "getDemandedOptions", "getDemandedCommands", "getDeprecatedOptions", - "_getParseContext", "getOptions", "getGroups", "getStrict", "getStrictCommands", - "getExitProcess", "locale", "getUsageInstance", "getCommandInstance" - ] - | - result = yargs().getAMethodCall(method) - ) - } - - /** - * An array of command line arguments (`argv`) parsed by the `yargs` library. - */ - class YargsArgv extends Source { - YargsArgv() { - this = yargs().getAPropertyRead("argv") - or - this = yargs().getAMethodCall("parse") and - this.(DataFlow::MethodCallNode).getNumArgument() = 0 - } - } + private class CommandLineArgumentsAsSource extends Source instanceof CommandLineArguments { } /** * A command-line argument that effectively is system-controlled, and therefore not likely to be exploitable when used in the execution of another command. @@ -193,7 +78,7 @@ module IndirectCommandInjection { SystemControlledCommandLineArgumentSanitizer() { // `process.argv[0]` and `process.argv[1]` are paths to `node` and `main`. exists(string index | index = "0" or index = "1" | - this = any(CommandLineArgumentsArray a).getAPropertyRead(index) + this = DataFlow::globalVarRef("process").getAPropertyRead("argv").getAPropertyRead(index) ) } } diff --git a/javascript/ql/lib/semmle/javascript/security/dataflow/IndirectCommandInjectionQuery.qll b/javascript/ql/lib/semmle/javascript/security/dataflow/IndirectCommandInjectionQuery.qll index d2de26d5cd0..b3e59aec7bd 100644 --- a/javascript/ql/lib/semmle/javascript/security/dataflow/IndirectCommandInjectionQuery.qll +++ b/javascript/ql/lib/semmle/javascript/security/dataflow/IndirectCommandInjectionQuery.qll @@ -28,8 +28,4 @@ class Configuration extends TaintTracking::Configuration { override predicate isSink(DataFlow::Node sink) { this.isSinkWithHighlight(sink, _) } override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } - - override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { - argsParseStep(pred, succ) - } } diff --git a/javascript/ql/test/library-tests/threat-models/sources/sources.js b/javascript/ql/test/library-tests/threat-models/sources/sources.js index 0194af87f6e..0e9e73f57be 100644 --- a/javascript/ql/test/library-tests/threat-models/sources/sources.js +++ b/javascript/ql/test/library-tests/threat-models/sources/sources.js @@ -2,3 +2,43 @@ import 'dummy'; var x = process.env['foo']; // $ threat-source=environment SINK(x); // $ hasFlow + +var y = process.argv[2]; // $ threat-source=commandargs +SINK(y); // $ hasFlow + + +// Accessing command line arguments using yargs +// https://www.npmjs.com/package/yargs/v/17.7.2 +const yargs = require('yargs/yargs'); +const { hideBin } = require('yargs/helpers'); +const argv = yargs(hideBin(process.argv)).argv; // $ threat-source=commandargs + +SINK(argv.foo); // $ MISSING: hasFlow + +// older version +// https://www.npmjs.com/package/yargs/v/7.1.2 +const yargsOld = require('yargs'); +const argvOld = yargsOld.argv; // $ threat-source=commandargs + +SINK(argvOld.foo); // $ hasFlow + +// Accessing command line arguments using yargs-parser +const yargsParser = require('yargs-parser'); +const src = process.argv.slice(2); // $ threat-source=commandargs +const parsedArgs = yargsParser(src); + +SINK(parsedArgs.foo); // $ hasFlow + +// Accessing command line arguments using minimist +const minimist = require('minimist'); +const args = minimist(process.argv.slice(2)); // $ threat-source=commandargs + +SINK(args.foo); // $ hasFlow + + +// Accessing command line arguments using commander +const { Command } = require('commander'); // $ SPURIOUS: threat-source=commandargs +const program = new Command(); +program.parse(process.argv); // $ threat-source=commandargs + +SINK(program.opts().foo); // $ hasFlow SPURIOUS: threat-source=commandargs