JS: Consolidate command-line argument modeling

Such that we can reuse the existing modeling, but have it globally
applied as a threat-model as well.

I Basically just moved the modeling. One important aspect is that this
changes is that the previously query-specific `argsParseStep` is now a
globally applied taint-step. This seems reasonable, if someone applied
the argument parsing to any user-controlled string, it seems correct to
propagate that taint for _any_ query.
This commit is contained in:
Rasmus Wriedt Larsen
2024-08-19 14:41:21 +02:00
parent 412e841d69
commit 3448751b4c
5 changed files with 186 additions and 122 deletions

View File

@@ -81,6 +81,7 @@ import semmle.javascript.frameworks.Classnames
import semmle.javascript.frameworks.ClassValidator
import semmle.javascript.frameworks.ClientRequests
import semmle.javascript.frameworks.ClosureLibrary
import semmle.javascript.frameworks.CommandLineArguments
import semmle.javascript.frameworks.CookieLibraries
import semmle.javascript.frameworks.Credentials
import semmle.javascript.frameworks.CryptoLibraries

View File

@@ -0,0 +1,142 @@
/** Provides modeling for parsed command line arguments. */
import javascript
/**
* An object containing command-line arguments, potentially parsed by a library.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CommandLineArguments::Range` instead.
*/
class CommandLineArguments extends ThreatModelSource instanceof CommandLineArguments::Range { }
/** Provides a class for modeling new sources of remote user input. */
module CommandLineArguments {
/**
* An object containing command-line arguments, potentially parsed by a library.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CommandLineArguments` instead.
*/
abstract class Range extends ThreatModelSource::Range {
override string getThreatModel() { result = "commandargs" }
override string getSourceType() { result = "CommandLineArguments" }
}
}
/** A read of `process.argv`, considered as a threat-model source. */
private class ProcessArgv extends CommandLineArguments::Range {
ProcessArgv() {
// `process.argv[0]` and `process.argv[1]` are paths to `node` and `main`, and
// therefore should not be considered a threat-source... However, we don't have an
// easy way to exclude them, so we need to allow them.
this = NodeJSLib::process().getAPropertyRead("argv")
}
override string getSourceType() { result = "process.argv" }
}
private class DefaultModels extends CommandLineArguments::Range {
DefaultModels() {
// `require('get-them-args')(...)` => `{ unknown: [], a: ... b: ... }`
this = DataFlow::moduleImport("get-them-args").getACall()
or
// `require('optimist').argv` => `{ _: [], a: ... b: ... }`
this = DataFlow::moduleMember("optimist", "argv")
or
// `require("arg")({...spec})` => `{_: [], a: ..., b: ...}`
this = DataFlow::moduleImport("arg").getACall()
or
// `(new (require(argparse)).ArgumentParser({...spec})).parse_args()` => `{a: ..., b: ...}`
this =
API::moduleImport("argparse")
.getMember("ArgumentParser")
.getInstance()
.getMember("parse_args")
.getACall()
or
// `require('command-line-args')({...spec})` => `{a: ..., b: ...}`
this = DataFlow::moduleImport("command-line-args").getACall()
or
// `require('meow')(help, {...spec})` => `{a: ..., b: ....}`
this = DataFlow::moduleImport("meow").getACall()
or
// `require("dashdash").createParser(...spec)` => `{a: ..., b: ...}`
this =
[
API::moduleImport("dashdash"),
API::moduleImport("dashdash").getMember("createParser").getReturn()
].getMember("parse").getACall()
or
// `require('commander').myCmdArgumentName`
this = commander().getAMember().asSource()
or
// `require('commander').opt()` => `{a: ..., b: ...}`
this = commander().getMember("opts").getACall()
}
}
/**
* A step for propagating taint through command line parsing,
* such as `var succ = require("minimist")(pred)`.
*/
private class ArgsParseStep extends TaintTracking::SharedTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::CallNode call |
call = DataFlow::moduleMember("args", "parse").getACall() or
call = DataFlow::moduleImport(["yargs-parser", "minimist", "subarg"]).getACall()
|
succ = call and
pred = call.getArgument(0)
)
}
}
/**
* Gets a Command instance from the `commander` library.
*/
private API::Node commander() {
result = API::moduleImport("commander")
or
// `require("commander").program === require("commander")`
result = commander().getMember("program")
or
result = commander().getMember("Command").getInstance()
or
// lots of chainable methods
result = commander().getAMember().getReturn()
}
/**
* Gets an instance of `yargs`.
* Either directly imported as a module, or through some chained method call.
*/
private DataFlow::SourceNode yargs() {
result = DataFlow::moduleImport("yargs")
or
// script used to generate list of chained methods: https://gist.github.com/erik-krogh/f8afe952c0577f4b563a993e613269ba
exists(string method |
not method =
// the methods that does not return a chained `yargs` object.
[
"getContext", "getDemandedOptions", "getDemandedCommands", "getDeprecatedOptions",
"_getParseContext", "getOptions", "getGroups", "getStrict", "getStrictCommands",
"getExitProcess", "locale", "getUsageInstance", "getCommandInstance"
]
|
result = yargs().getAMethodCall(method)
)
}
/**
* An array of command line arguments (`argv`) parsed by the `yargs` library.
*/
private class YargsArgv extends CommandLineArguments::Range {
YargsArgv() {
this = yargs().getAPropertyRead("argv")
or
this = yargs().getAMethodCall("parse") and
this.(DataFlow::MethodCallNode).getNumArgument() = 0
}
}

View File

@@ -25,21 +25,6 @@ module IndirectCommandInjection {
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of user input from the command-line, considered as a flow source for command injection.
*/
private class CommandLineArgumentsArrayAsSource extends Source instanceof CommandLineArgumentsArray
{ }
/**
* An array of command-line arguments.
*/
class CommandLineArgumentsArray extends DataFlow::SourceNode {
CommandLineArgumentsArray() {
this = DataFlow::globalVarRef("process").getAPropertyRead("argv")
}
}
/**
* A read of `process.env`, considered as a flow source for command injection.
*/
@@ -82,109 +67,9 @@ module IndirectCommandInjection {
}
/**
* An object containing parsed command-line arguments, considered as a flow source for command injection.
* An object containing command-line arguments, considered as a flow source for command injection.
*/
class ParsedCommandLineArgumentsAsSource extends Source {
ParsedCommandLineArgumentsAsSource() {
// `require('get-them-args')(...)` => `{ unknown: [], a: ... b: ... }`
this = DataFlow::moduleImport("get-them-args").getACall()
or
// `require('optimist').argv` => `{ _: [], a: ... b: ... }`
this = DataFlow::moduleMember("optimist", "argv")
or
// `require("arg")({...spec})` => `{_: [], a: ..., b: ...}`
this = DataFlow::moduleImport("arg").getACall()
or
// `(new (require(argparse)).ArgumentParser({...spec})).parse_args()` => `{a: ..., b: ...}`
this =
API::moduleImport("argparse")
.getMember("ArgumentParser")
.getInstance()
.getMember("parse_args")
.getACall()
or
// `require('command-line-args')({...spec})` => `{a: ..., b: ...}`
this = DataFlow::moduleImport("command-line-args").getACall()
or
// `require('meow')(help, {...spec})` => `{a: ..., b: ....}`
this = DataFlow::moduleImport("meow").getACall()
or
// `require("dashdash").createParser(...spec)` => `{a: ..., b: ...}`
this =
[
API::moduleImport("dashdash"),
API::moduleImport("dashdash").getMember("createParser").getReturn()
].getMember("parse").getACall()
or
// `require('commander').myCmdArgumentName`
this = commander().getAMember().asSource()
or
// `require('commander').opt()` => `{a: ..., b: ...}`
this = commander().getMember("opts").getACall()
}
}
/**
* Holds if there is a command line parsing step from `pred` to `succ`.
* E.g: `var succ = require("minimist")(pred)`.
*/
predicate argsParseStep(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::CallNode call |
call = DataFlow::moduleMember("args", "parse").getACall() or
call = DataFlow::moduleImport(["yargs-parser", "minimist", "subarg"]).getACall()
|
succ = call and
pred = call.getArgument(0)
)
}
/**
* Gets a Command instance from the `commander` library.
*/
private API::Node commander() {
result = API::moduleImport("commander")
or
// `require("commander").program === require("commander")`
result = commander().getMember("program")
or
result = commander().getMember("Command").getInstance()
or
// lots of chainable methods
result = commander().getAMember().getReturn()
}
/**
* Gets an instance of `yargs`.
* Either directly imported as a module, or through some chained method call.
*/
private DataFlow::SourceNode yargs() {
result = DataFlow::moduleImport("yargs")
or
// script used to generate list of chained methods: https://gist.github.com/erik-krogh/f8afe952c0577f4b563a993e613269ba
exists(string method |
not method =
// the methods that does not return a chained `yargs` object.
[
"getContext", "getDemandedOptions", "getDemandedCommands", "getDeprecatedOptions",
"_getParseContext", "getOptions", "getGroups", "getStrict", "getStrictCommands",
"getExitProcess", "locale", "getUsageInstance", "getCommandInstance"
]
|
result = yargs().getAMethodCall(method)
)
}
/**
* An array of command line arguments (`argv`) parsed by the `yargs` library.
*/
class YargsArgv extends Source {
YargsArgv() {
this = yargs().getAPropertyRead("argv")
or
this = yargs().getAMethodCall("parse") and
this.(DataFlow::MethodCallNode).getNumArgument() = 0
}
}
private class CommandLineArgumentsAsSource extends Source instanceof CommandLineArguments { }
/**
* A command-line argument that effectively is system-controlled, and therefore not likely to be exploitable when used in the execution of another command.
@@ -193,7 +78,7 @@ module IndirectCommandInjection {
SystemControlledCommandLineArgumentSanitizer() {
// `process.argv[0]` and `process.argv[1]` are paths to `node` and `main`.
exists(string index | index = "0" or index = "1" |
this = any(CommandLineArgumentsArray a).getAPropertyRead(index)
this = DataFlow::globalVarRef("process").getAPropertyRead("argv").getAPropertyRead(index)
)
}
}

View File

@@ -28,8 +28,4 @@ class Configuration extends TaintTracking::Configuration {
override predicate isSink(DataFlow::Node sink) { this.isSinkWithHighlight(sink, _) }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
argsParseStep(pred, succ)
}
}

View File

@@ -2,3 +2,43 @@ import 'dummy';
var x = process.env['foo']; // $ threat-source=environment
SINK(x); // $ hasFlow
var y = process.argv[2]; // $ threat-source=commandargs
SINK(y); // $ hasFlow
// Accessing command line arguments using yargs
// https://www.npmjs.com/package/yargs/v/17.7.2
const yargs = require('yargs/yargs');
const { hideBin } = require('yargs/helpers');
const argv = yargs(hideBin(process.argv)).argv; // $ threat-source=commandargs
SINK(argv.foo); // $ MISSING: hasFlow
// older version
// https://www.npmjs.com/package/yargs/v/7.1.2
const yargsOld = require('yargs');
const argvOld = yargsOld.argv; // $ threat-source=commandargs
SINK(argvOld.foo); // $ hasFlow
// Accessing command line arguments using yargs-parser
const yargsParser = require('yargs-parser');
const src = process.argv.slice(2); // $ threat-source=commandargs
const parsedArgs = yargsParser(src);
SINK(parsedArgs.foo); // $ hasFlow
// Accessing command line arguments using minimist
const minimist = require('minimist');
const args = minimist(process.argv.slice(2)); // $ threat-source=commandargs
SINK(args.foo); // $ hasFlow
// Accessing command line arguments using commander
const { Command } = require('commander'); // $ SPURIOUS: threat-source=commandargs
const program = new Command();
program.parse(process.argv); // $ threat-source=commandargs
SINK(program.opts().foo); // $ hasFlow SPURIOUS: threat-source=commandargs