Add CSV injection model

This commit is contained in:
haby0
2022-03-15 15:15:38 +08:00
parent 958fd9b3ea
commit 4195eef9ba
10 changed files with 284 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Desc csv injection
"""
import copy
import csv
from flask import Flask
from flask import request
from typing import List
app = Flask(__name__)
@app.route('/bad1')
def bad1():
csv_data = request.args.get('csv')
csvWriter = csv.writer(open("test.csv", "wt"))
csvWriter.writerow(csv_data)
return "bad1"
@app.route('/good1')
def good1():
csv_data = request.args.get('csv')
csvWriter = csv.writer(open("test.csv", "wt"))
csvWriter.writerow(santize_for_csv(csv_data))
return "good1"
def santize_for_csv(data: str| List[str] | List[List[str]]):
def sanitize(item):
return "'" + item
unsafe_prefixes = ("+", "=", "-", "@")
if isinstance(data, str):
if data.startswith(unsafe_prefixes):
return sanitize(data)
return data
elif isinstance(data, list) and isinstance(data[0], str):
sanitized_data = copy.deepcopy(data)
for index, item in enumerate(data):
if item.startswith(unsafe_prefixes):
sanitized_data[index] = sanitize(item)
return sanitized_data
elif isinstance(data[0], list) and isinstance(data[0][0], str):
sanitized_data = copy.deepcopy(data)
for outer_index, sublist in enumerate(data):
for inner_index, item in enumerate(sublist):
if item.startswith(unsafe_prefixes):
sanitized_data[outer_index][inner_index] = sanitize(item)
return sanitized_data
else:
raise ValueError("Unsupported data type: " + str(type(data)))
if __name__ == '__main__':
app.debug = True
app.run()

View File

@@ -0,0 +1,29 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>CSV Injection, also known as Formula Injection, occurs when websites embed untrusted input inside CSV files.</p>
<p>When a CSV format file is opened with a spreadsheet program such as Microsoft Excel or LibreOffice Calc.
this software interprets entries beginning with <code>=</code> as formulas. may attempt information exfiltration
or other malicious activity when automatically executed by the spreadsheet software.</p>
</overview>
<recommendation>
<p>When generating CSV output, ensure that formula-sensitive metacharacters are effectively escaped or removed from all data before storage in the resultant CSV.
Risky characters include <code>=</code>(equal), <code>+</code>(plus), <code>-</code>(minus), and <code>@</code>(at).</p>
</recommendation>
<example>
<p>The following examples show the bad case and the good case respectively.
In <code>bad1</code> method, the data provided by the user is directly stored in the CSV file, which may be attacked.
But in the <code>good1</code> method,, the program will check the data provided by the user, and process the data starting with <code>=</code>(equal), <code>+</code>(plus), <code>-</code>(minus), and <code>@</code>(at) characters safely.</p>
<sample src="CsvInjection.py" />
</example>
<references>
<li>OWASP: <a href="https://owasp.org/www-community/attacks/CSV_Injection">CSV Injection</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,20 @@
/**
* @name Csv Injection
* @description From user-controlled data saved in CSV files, it is easy to attempt information disclosure
* or other malicious activities when automated by spreadsheet software
* @kind path-problem
* @problem.severity error
* @id py/csv-injection
* @tags security
* external/cwe/cwe-1236
*/
import python
import DataFlow::PathGraph
import semmle.python.dataflow.new.DataFlow
import experimental.semmle.python.security.injection.CsvInjection
from CsvInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Csv injection might include code from $@.", source.getNode(),
"this user input"

View File

@@ -267,6 +267,36 @@ class HeaderDeclaration extends DataFlow::Node {
DataFlow::Node getValueArg() { result = range.getValueArg() }
}
/** Provides classes for modeling Csv writer APIs. */
module CsvWriter {
/**
* A data flow node for csv writer.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CsvWriter` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Get the parameter value of the csv writer function.
*/
abstract DataFlow::Node getAnInput();
}
}
/**
* A data flow node for csv writer.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CsvWriter::Range` instead.
*/
class CsvWriter extends DataFlow::Node {
CsvWriter::Range range;
CsvWriter() { this = range }
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling JWT encoding-related APIs. */
module JWTEncoding {
/**

View File

@@ -9,6 +9,7 @@ private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.JWT
private import experimental.semmle.python.frameworks.Csv
private import experimental.semmle.python.libraries.PyJWT
private import experimental.semmle.python.libraries.Python_JWT
private import experimental.semmle.python.libraries.Authlib

View File

@@ -0,0 +1,73 @@
/**
* Provides classes modeling security-relevant aspects of the `csv` PyPI package.
* See https://docs.python.org/3/library/csv.html
*/
private import python
private import semmle.python.ApiGraphs
private import experimental.semmle.python.Concepts
/**
* Provides models for the `csv` PyPI package.
*
* See
* - https://docs.python.org/3/library/csv.html
*/
private module Csv {
private module Writer {
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `csv.writer` or `csv.DictWriter`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
ClassInstantiation() {
this = API::moduleImport("csv").getMember(["writer", "DictWriter"]).getACall()
}
}
/** Gets a reference to an `csv.writer` or `csv.DictWriter` instance. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an `csv.writer` or `csv.DictWriter` instance. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* See:
* - https://docs.python.org/3/library/csv.html#csvwriter.writerow
* - https://docs.python.org/3/library/csv.html#csvwriter.writerows
*/
private class CsvWriteCall extends CsvWriter::Range, DataFlow::CallCfgNode {
string methodName;
CsvWriteCall() {
methodName in ["writerow", "writerows"] and
this.(DataFlow::MethodCallNode).calls(Writer::instance(), methodName)
}
override DataFlow::Node getAnInput() {
result = this.getArg(0)
or
methodName = "writerow" and
result = this.getArgByName("row")
or
methodName = "writerows" and
result = this.getArgByName("rows")
}
}
/**
* See: https://docs.python.org/3/library/csv.html#csv.DictWriter
*/
private class DictWriterInstance extends CsvWriter::Range, DataFlow::CallCfgNode {
DictWriterInstance() { this = API::moduleImport("csv").getMember("DictWriter").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(1), this.getArgByName("fieldnames")]
}
}
}
}

View File

@@ -0,0 +1,24 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A taint-tracking configuration for tracking untrusted user input used in file read.
*/
class CsvInjectionFlowConfig extends TaintTracking::Configuration {
CsvInjectionFlowConfig() { this = "CsvInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSource
// exists(DataFlow::CallCfgNode ccn |
// ccn.getFunction().asCfgNode().(NameNode).getId() = "flag" and
// ccn.getArg(_) = source
// )
}
override predicate isSink(DataFlow::Node sink) {
exists(CsvWriter csvwriter | sink = csvwriter.getAnInput())
}
}

View File

@@ -0,0 +1,19 @@
edges
| csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute |
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data |
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data |
| csv_bad.py:24:16:24:22 | ControlFlowNode for request | csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute |
| csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute | csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data |
nodes
| csv_bad.py:16:16:16:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
| csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
| csv_bad.py:24:16:24:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
subpaths
#select
| csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:16:16:16:22 | ControlFlowNode for request | this user input |
| csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:16:16:16:22 | ControlFlowNode for request | this user input |
| csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | csv_bad.py:24:16:24:22 | ControlFlowNode for request | csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:24:16:24:22 | ControlFlowNode for request | this user input |

View File

@@ -0,0 +1 @@
experimental/Security/CWE-1236/CsvInjection.ql

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Desc csv injection
"""
import copy
import csv
from flask import Flask
from flask import request
from typing import List
app = Flask(__name__)
@app.route('/bad1')
def bad1():
csv_data = request.args.get('csv')
csvWriter = csv.writer(open("test.csv", "wt"))
csvWriter.writerow(csv_data) # bad
csvWriter.writerows(csv_data) # bad
return "bad1"
@app.route('/bad2')
def bad2():
csv_data = request.args.get('csv')
csvWriter = csv.DictWriter(f, fieldnames=csv_data) # bad
csvWriter.writeheader()
return "bad2"
if __name__ == '__main__':
app.debug = True
app.run()