mirror of
https://github.com/github/codeql.git
synced 2025-12-22 19:56:32 +01:00
Merge pull request #12550 from erik-krogh/useNumberUtil
Java/Python: use Number.qll to parse hex numbers in regex parsing
This commit is contained in:
@@ -470,6 +470,8 @@ module Impl implements RegexTreeViewSig {
|
|||||||
override string getPrimaryQLClass() { result = "RegExpAlt" }
|
override string getPrimaryQLClass() { result = "RegExpAlt" }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private import codeql.util.Numbers as Numbers
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An escaped regular expression term, that is, a regular expression
|
* An escaped regular expression term, that is, a regular expression
|
||||||
* term starting with a backslash, which is not a backreference.
|
* term starting with a backslash, which is not a backreference.
|
||||||
@@ -531,11 +533,7 @@ module Impl implements RegexTreeViewSig {
|
|||||||
* Gets the unicode char for this escape.
|
* Gets the unicode char for this escape.
|
||||||
* E.g. for `\u0061` this returns "a".
|
* E.g. for `\u0061` this returns "a".
|
||||||
*/
|
*/
|
||||||
private string getUnicode() {
|
private string getUnicode() { result = Numbers::parseHexInt(this.getHexString()).toUnicode() }
|
||||||
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
|
|
||||||
result = codepoint.toUnicode()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Gets the part of this escape that is a hexidecimal string */
|
/** Gets the part of this escape that is a hexidecimal string */
|
||||||
private string getHexString() {
|
private string getHexString() {
|
||||||
@@ -547,18 +545,6 @@ module Impl implements RegexTreeViewSig {
|
|||||||
then result = this.getText().substring(3, this.getText().length() - 1)
|
then result = this.getText().substring(3, this.getText().length() - 1)
|
||||||
else result = this.getText().suffix(2) // \xhh
|
else result = this.getText().suffix(2) // \xhh
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets int value for the `index`th char in the hex number of the unicode escape.
|
|
||||||
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
|
|
||||||
*/
|
|
||||||
private int getHexValueFromUnicode(int index) {
|
|
||||||
this.isUnicode() and
|
|
||||||
exists(string hex, string char | hex = this.getHexString() |
|
|
||||||
char = hex.charAt(index) and
|
|
||||||
result = 16.pow(hex.length() - index - 1) * toHex(char)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -586,25 +572,6 @@ module Impl implements RegexTreeViewSig {
|
|||||||
RegExpNonWordBoundary() { this.getChar() = "\\B" }
|
RegExpNonWordBoundary() { this.getChar() = "\\B" }
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the hex number for the `hex` char.
|
|
||||||
*/
|
|
||||||
private int toHex(string hex) {
|
|
||||||
result = [0 .. 9] and hex = result.toString()
|
|
||||||
or
|
|
||||||
result = 10 and hex = ["a", "A"]
|
|
||||||
or
|
|
||||||
result = 11 and hex = ["b", "B"]
|
|
||||||
or
|
|
||||||
result = 12 and hex = ["c", "C"]
|
|
||||||
or
|
|
||||||
result = 13 and hex = ["d", "D"]
|
|
||||||
or
|
|
||||||
result = 14 and hex = ["e", "E"]
|
|
||||||
or
|
|
||||||
result = 15 and hex = ["f", "F"]
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A character class escape in a regular expression.
|
* A character class escape in a regular expression.
|
||||||
* That is, an escaped character that denotes multiple characters.
|
* That is, an escaped character that denotes multiple characters.
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ library: true
|
|||||||
upgrades: upgrades
|
upgrades: upgrades
|
||||||
dependencies:
|
dependencies:
|
||||||
codeql/regex: ${workspace}
|
codeql/regex: ${workspace}
|
||||||
|
codeql/util: ${workspace}
|
||||||
codeql/tutorial: ${workspace}
|
codeql/tutorial: ${workspace}
|
||||||
dataExtensions:
|
dataExtensions:
|
||||||
- semmle/python/frameworks/**/model.yml
|
- semmle/python/frameworks/**/model.yml
|
||||||
|
|||||||
@@ -468,6 +468,8 @@ module Impl implements RegexTreeViewSig {
|
|||||||
*/
|
*/
|
||||||
class RegExpCharEscape = RegExpEscape;
|
class RegExpCharEscape = RegExpEscape;
|
||||||
|
|
||||||
|
private import codeql.util.Numbers as Numbers
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An escaped regular expression term, that is, a regular expression
|
* An escaped regular expression term, that is, a regular expression
|
||||||
* term starting with a backslash, which is not a backreference.
|
* term starting with a backslash, which is not a backreference.
|
||||||
@@ -528,42 +530,8 @@ module Impl implements RegexTreeViewSig {
|
|||||||
* E.g. for `\u0061` this returns "a".
|
* E.g. for `\u0061` this returns "a".
|
||||||
*/
|
*/
|
||||||
private string getUnicode() {
|
private string getUnicode() {
|
||||||
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
|
result = Numbers::parseHexInt(this.getText().suffix(2)).toUnicode()
|
||||||
result = codepoint.toUnicode()
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets int value for the `index`th char in the hex number of the unicode escape.
|
|
||||||
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
|
|
||||||
*/
|
|
||||||
private int getHexValueFromUnicode(int index) {
|
|
||||||
this.isUnicode() and
|
|
||||||
exists(string hex, string char | hex = this.getText().suffix(2) |
|
|
||||||
char = hex.charAt(index) and
|
|
||||||
result = 16.pow(hex.length() - index - 1) * toHex(char)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the hex number for the `hex` char.
|
|
||||||
*/
|
|
||||||
private int toHex(string hex) {
|
|
||||||
hex = [0 .. 9].toString() and
|
|
||||||
result = hex.toInt()
|
|
||||||
or
|
|
||||||
result = 10 and hex = ["a", "A"]
|
|
||||||
or
|
|
||||||
result = 11 and hex = ["b", "B"]
|
|
||||||
or
|
|
||||||
result = 12 and hex = ["c", "C"]
|
|
||||||
or
|
|
||||||
result = 13 and hex = ["d", "D"]
|
|
||||||
or
|
|
||||||
result = 14 and hex = ["e", "E"]
|
|
||||||
or
|
|
||||||
result = 15 and hex = ["f", "F"]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ groups:
|
|||||||
dependencies:
|
dependencies:
|
||||||
codeql/python-all: ${workspace}
|
codeql/python-all: ${workspace}
|
||||||
codeql/suite-helpers: ${workspace}
|
codeql/suite-helpers: ${workspace}
|
||||||
codeql/util: ${workspace}
|
|
||||||
suites: codeql-suites
|
suites: codeql-suites
|
||||||
extractor: python
|
extractor: python
|
||||||
defaultSuiteFile: codeql-suites/python-code-scanning.qls
|
defaultSuiteFile: codeql-suites/python-code-scanning.qls
|
||||||
|
|||||||
Reference in New Issue
Block a user