Merge pull request #12550 from erik-krogh/useNumberUtil

Java/Python: use Number.qll to parse hex numbers in regex parsing
This commit is contained in:
Erik Krogh Kristensen
2023-03-20 15:50:31 +01:00
committed by GitHub
4 changed files with 7 additions and 72 deletions

View File

@@ -470,6 +470,8 @@ module Impl implements RegexTreeViewSig {
override string getPrimaryQLClass() { result = "RegExpAlt" } override string getPrimaryQLClass() { result = "RegExpAlt" }
} }
private import codeql.util.Numbers as Numbers
/** /**
* An escaped regular expression term, that is, a regular expression * An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference. * term starting with a backslash, which is not a backreference.
@@ -531,11 +533,7 @@ module Impl implements RegexTreeViewSig {
* Gets the unicode char for this escape. * Gets the unicode char for this escape.
* E.g. for `\u0061` this returns "a". * E.g. for `\u0061` this returns "a".
*/ */
private string getUnicode() { private string getUnicode() { result = Numbers::parseHexInt(this.getHexString()).toUnicode() }
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
}
/** Gets the part of this escape that is a hexidecimal string */ /** Gets the part of this escape that is a hexidecimal string */
private string getHexString() { private string getHexString() {
@@ -547,18 +545,6 @@ module Impl implements RegexTreeViewSig {
then result = this.getText().substring(3, this.getText().length() - 1) then result = this.getText().substring(3, this.getText().length() - 1)
else result = this.getText().suffix(2) // \xhh else result = this.getText().suffix(2) // \xhh
} }
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getHexString() |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
} }
/** /**
@@ -586,25 +572,6 @@ module Impl implements RegexTreeViewSig {
RegExpNonWordBoundary() { this.getChar() = "\\B" } RegExpNonWordBoundary() { this.getChar() = "\\B" }
} }
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
result = [0 .. 9] and hex = result.toString()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/** /**
* A character class escape in a regular expression. * A character class escape in a regular expression.
* That is, an escaped character that denotes multiple characters. * That is, an escaped character that denotes multiple characters.

View File

@@ -7,6 +7,7 @@ library: true
upgrades: upgrades upgrades: upgrades
dependencies: dependencies:
codeql/regex: ${workspace} codeql/regex: ${workspace}
codeql/util: ${workspace}
codeql/tutorial: ${workspace} codeql/tutorial: ${workspace}
dataExtensions: dataExtensions:
- semmle/python/frameworks/**/model.yml - semmle/python/frameworks/**/model.yml

View File

@@ -468,6 +468,8 @@ module Impl implements RegexTreeViewSig {
*/ */
class RegExpCharEscape = RegExpEscape; class RegExpCharEscape = RegExpEscape;
private import codeql.util.Numbers as Numbers
/** /**
* An escaped regular expression term, that is, a regular expression * An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference. * term starting with a backslash, which is not a backreference.
@@ -528,42 +530,8 @@ module Impl implements RegexTreeViewSig {
* E.g. for `\u0061` this returns "a". * E.g. for `\u0061` this returns "a".
*/ */
private string getUnicode() { private string getUnicode() {
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) | result = Numbers::parseHexInt(this.getText().suffix(2)).toUnicode()
result = codepoint.toUnicode()
)
} }
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
} }
/** /**

View File

@@ -6,7 +6,6 @@ groups:
dependencies: dependencies:
codeql/python-all: ${workspace} codeql/python-all: ${workspace}
codeql/suite-helpers: ${workspace} codeql/suite-helpers: ${workspace}
codeql/util: ${workspace}
suites: codeql-suites suites: codeql-suites
extractor: python extractor: python
defaultSuiteFile: codeql-suites/python-code-scanning.qls defaultSuiteFile: codeql-suites/python-code-scanning.qls