mirror of
https://github.com/github/codeql.git
synced 2025-12-21 19:26:31 +01:00
Add CSV injection model
This commit is contained in:
56
python/ql/src/experimental/Security/CWE-1236/CsvInjection.py
Normal file
56
python/ql/src/experimental/Security/CWE-1236/CsvInjection.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@Desc :csv injection
|
||||
"""
|
||||
import copy
|
||||
import csv
|
||||
from flask import Flask
|
||||
from flask import request
|
||||
from typing import List
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/bad1')
|
||||
def bad1():
|
||||
csv_data = request.args.get('csv')
|
||||
csvWriter = csv.writer(open("test.csv", "wt"))
|
||||
csvWriter.writerow(csv_data)
|
||||
return "bad1"
|
||||
|
||||
@app.route('/good1')
|
||||
def good1():
|
||||
csv_data = request.args.get('csv')
|
||||
csvWriter = csv.writer(open("test.csv", "wt"))
|
||||
csvWriter.writerow(santize_for_csv(csv_data))
|
||||
return "good1"
|
||||
|
||||
def santize_for_csv(data: str| List[str] | List[List[str]]):
|
||||
def sanitize(item):
|
||||
return "'" + item
|
||||
|
||||
unsafe_prefixes = ("+", "=", "-", "@")
|
||||
if isinstance(data, str):
|
||||
if data.startswith(unsafe_prefixes):
|
||||
return sanitize(data)
|
||||
return data
|
||||
elif isinstance(data, list) and isinstance(data[0], str):
|
||||
sanitized_data = copy.deepcopy(data)
|
||||
for index, item in enumerate(data):
|
||||
if item.startswith(unsafe_prefixes):
|
||||
sanitized_data[index] = sanitize(item)
|
||||
return sanitized_data
|
||||
elif isinstance(data[0], list) and isinstance(data[0][0], str):
|
||||
sanitized_data = copy.deepcopy(data)
|
||||
for outer_index, sublist in enumerate(data):
|
||||
for inner_index, item in enumerate(sublist):
|
||||
if item.startswith(unsafe_prefixes):
|
||||
sanitized_data[outer_index][inner_index] = sanitize(item)
|
||||
return sanitized_data
|
||||
else:
|
||||
raise ValueError("Unsupported data type: " + str(type(data)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.debug = True
|
||||
app.run()
|
||||
@@ -0,0 +1,29 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>CSV Injection, also known as Formula Injection, occurs when websites embed untrusted input inside CSV files.</p>
|
||||
<p>When a CSV format file is opened with a spreadsheet program such as Microsoft Excel or LibreOffice Calc.
|
||||
this software interprets entries beginning with <code>=</code> as formulas. may attempt information exfiltration
|
||||
or other malicious activity when automatically executed by the spreadsheet software.</p>
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>When generating CSV output, ensure that formula-sensitive metacharacters are effectively escaped or removed from all data before storage in the resultant CSV.
|
||||
Risky characters include <code>=</code>(equal), <code>+</code>(plus), <code>-</code>(minus), and <code>@</code>(at).</p>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
|
||||
<p>The following examples show the bad case and the good case respectively.
|
||||
In <code>bad1</code> method, the data provided by the user is directly stored in the CSV file, which may be attacked.
|
||||
But in the <code>good1</code> method,, the program will check the data provided by the user, and process the data starting with <code>=</code>(equal), <code>+</code>(plus), <code>-</code>(minus), and <code>@</code>(at) characters safely.</p>
|
||||
|
||||
<sample src="CsvInjection.py" />
|
||||
|
||||
</example>
|
||||
<references>
|
||||
<li>OWASP: <a href="https://owasp.org/www-community/attacks/CSV_Injection">CSV Injection</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
20
python/ql/src/experimental/Security/CWE-1236/CsvInjection.ql
Normal file
20
python/ql/src/experimental/Security/CWE-1236/CsvInjection.ql
Normal file
@@ -0,0 +1,20 @@
|
||||
/**
|
||||
* @name Csv Injection
|
||||
* @description From user-controlled data saved in CSV files, it is easy to attempt information disclosure
|
||||
* or other malicious activities when automated by spreadsheet software
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/csv-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-1236
|
||||
*/
|
||||
|
||||
import python
|
||||
import DataFlow::PathGraph
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import experimental.semmle.python.security.injection.CsvInjection
|
||||
|
||||
from CsvInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Csv injection might include code from $@.", source.getNode(),
|
||||
"this user input"
|
||||
@@ -267,6 +267,36 @@ class HeaderDeclaration extends DataFlow::Node {
|
||||
DataFlow::Node getValueArg() { result = range.getValueArg() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling Csv writer APIs. */
|
||||
module CsvWriter {
|
||||
/**
|
||||
* A data flow node for csv writer.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `CsvWriter` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Get the parameter value of the csv writer function.
|
||||
*/
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node for csv writer.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `CsvWriter::Range` instead.
|
||||
*/
|
||||
class CsvWriter extends DataFlow::Node {
|
||||
CsvWriter::Range range;
|
||||
|
||||
CsvWriter() { this = range }
|
||||
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling JWT encoding-related APIs. */
|
||||
module JWTEncoding {
|
||||
/**
|
||||
|
||||
@@ -9,6 +9,7 @@ private import experimental.semmle.python.frameworks.Werkzeug
|
||||
private import experimental.semmle.python.frameworks.LDAP
|
||||
private import experimental.semmle.python.frameworks.NoSQL
|
||||
private import experimental.semmle.python.frameworks.JWT
|
||||
private import experimental.semmle.python.frameworks.Csv
|
||||
private import experimental.semmle.python.libraries.PyJWT
|
||||
private import experimental.semmle.python.libraries.Python_JWT
|
||||
private import experimental.semmle.python.libraries.Authlib
|
||||
|
||||
73
python/ql/src/experimental/semmle/python/frameworks/Csv.qll
Normal file
73
python/ql/src/experimental/semmle/python/frameworks/Csv.qll
Normal file
@@ -0,0 +1,73 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `csv` PyPI package.
|
||||
* See https://docs.python.org/3/library/csv.html
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.ApiGraphs
|
||||
private import experimental.semmle.python.Concepts
|
||||
|
||||
/**
|
||||
* Provides models for the `csv` PyPI package.
|
||||
*
|
||||
* See
|
||||
* - https://docs.python.org/3/library/csv.html
|
||||
*/
|
||||
private module Csv {
|
||||
private module Writer {
|
||||
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
|
||||
|
||||
/** A direct instantiation of `csv.writer` or `csv.DictWriter`. */
|
||||
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
|
||||
ClassInstantiation() {
|
||||
this = API::moduleImport("csv").getMember(["writer", "DictWriter"]).getACall()
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets a reference to an `csv.writer` or `csv.DictWriter` instance. */
|
||||
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof InstanceSource
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to an `csv.writer` or `csv.DictWriter` instance. */
|
||||
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
|
||||
/**
|
||||
* See:
|
||||
* - https://docs.python.org/3/library/csv.html#csvwriter.writerow
|
||||
* - https://docs.python.org/3/library/csv.html#csvwriter.writerows
|
||||
*/
|
||||
private class CsvWriteCall extends CsvWriter::Range, DataFlow::CallCfgNode {
|
||||
string methodName;
|
||||
|
||||
CsvWriteCall() {
|
||||
methodName in ["writerow", "writerows"] and
|
||||
this.(DataFlow::MethodCallNode).calls(Writer::instance(), methodName)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result = this.getArg(0)
|
||||
or
|
||||
methodName = "writerow" and
|
||||
result = this.getArgByName("row")
|
||||
or
|
||||
methodName = "writerows" and
|
||||
result = this.getArgByName("rows")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* See: https://docs.python.org/3/library/csv.html#csv.DictWriter
|
||||
*/
|
||||
private class DictWriterInstance extends CsvWriter::Range, DataFlow::CallCfgNode {
|
||||
DictWriterInstance() { this = API::moduleImport("csv").getMember("DictWriter").getACall() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(1), this.getArgByName("fieldnames")]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for tracking untrusted user input used in file read.
|
||||
*/
|
||||
class CsvInjectionFlowConfig extends TaintTracking::Configuration {
|
||||
CsvInjectionFlowConfig() { this = "CsvInjectionFlowConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
source instanceof RemoteFlowSource
|
||||
// exists(DataFlow::CallCfgNode ccn |
|
||||
// ccn.getFunction().asCfgNode().(NameNode).getId() = "flag" and
|
||||
// ccn.getArg(_) = source
|
||||
// )
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
exists(CsvWriter csvwriter | sink = csvwriter.getAnInput())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
edges
|
||||
| csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute |
|
||||
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data |
|
||||
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data |
|
||||
| csv_bad.py:24:16:24:22 | ControlFlowNode for request | csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute |
|
||||
| csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute | csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data |
|
||||
nodes
|
||||
| csv_bad.py:16:16:16:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| csv_bad.py:16:16:16:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
|
||||
| csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
|
||||
| csv_bad.py:24:16:24:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| csv_bad.py:24:16:24:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | semmle.label | ControlFlowNode for csv_data |
|
||||
subpaths
|
||||
#select
|
||||
| csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:18:24:18:31 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:16:16:16:22 | ControlFlowNode for request | this user input |
|
||||
| csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | csv_bad.py:16:16:16:22 | ControlFlowNode for request | csv_bad.py:19:25:19:32 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:16:16:16:22 | ControlFlowNode for request | this user input |
|
||||
| csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | csv_bad.py:24:16:24:22 | ControlFlowNode for request | csv_bad.py:25:46:25:53 | ControlFlowNode for csv_data | Csv injection might include code from $@. | csv_bad.py:24:16:24:22 | ControlFlowNode for request | this user input |
|
||||
@@ -0,0 +1 @@
|
||||
experimental/Security/CWE-1236/CsvInjection.ql
|
||||
@@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""
|
||||
@Desc :csv injection
|
||||
"""
|
||||
import copy
|
||||
import csv
|
||||
from flask import Flask
|
||||
from flask import request
|
||||
from typing import List
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/bad1')
|
||||
def bad1():
|
||||
csv_data = request.args.get('csv')
|
||||
csvWriter = csv.writer(open("test.csv", "wt"))
|
||||
csvWriter.writerow(csv_data) # bad
|
||||
csvWriter.writerows(csv_data) # bad
|
||||
return "bad1"
|
||||
|
||||
@app.route('/bad2')
|
||||
def bad2():
|
||||
csv_data = request.args.get('csv')
|
||||
csvWriter = csv.DictWriter(f, fieldnames=csv_data) # bad
|
||||
csvWriter.writeheader()
|
||||
return "bad2"
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.debug = True
|
||||
app.run()
|
||||
Reference in New Issue
Block a user