CharacterLiteral.getCodePointValue: fix handling of surrogates

This commit is contained in:
Chris Smowton
2021-11-25 14:07:21 +00:00
parent 609d6011a2
commit db39c0b8be
4 changed files with 101 additions and 1 deletions

View File

@@ -0,0 +1,3 @@
lgtm,codescanning
* `CharacterLiteral`'s `getCodePointValue` predicate now returns the correct value for UTF-16 surrogates.
* The `RangeAnalysis` module and the `java/constant-comparison` queries no longer raise false alerts regarding comparisons with Unicode surrogate character literals.

View File

@@ -713,6 +713,18 @@ class DoubleLiteral extends Literal, @doubleliteral {
override string getAPrimaryQlClass() { result = "DoubleLiteral" }
}
// Implementation taken from @p0 at https://github.com/github/codeql/issues/4145
bindingset[s]
private int fromHex(string s) {
exists(string digits | s.toUpperCase() = digits |
result =
sum(int i |
|
"0123456789ABCDEF".indexOf(digits.charAt(i)).bitShiftLeft((digits.length() - i - 1) * 4)
)
)
}
/** A character literal. For example, `'\n'`. */
class CharacterLiteral extends Literal, @characterliteral {
override string getAPrimaryQlClass() { result = "CharacterLiteral" }
@@ -731,7 +743,11 @@ class CharacterLiteral extends Literal, @characterliteral {
* this literal. The result is the same as if the Java code had cast
* the character to an `int`.
*/
int getCodePointValue() { result.toUnicode() = this.getValue() }
int getCodePointValue() {
if this.getLiteral().matches("'\\u%'")
then result = fromHex(this.getLiteral().substring(3, 7))
else result.toUnicode() = this.getValue()
}
}
/**

View File

@@ -0,0 +1,77 @@
public class CharLiterals {
public static boolean redundantSurrogateRange(char c) {
if(c >= '\uda00') {
if(c >= '\ud900') {
return true;
}
}
return false;
}
public static boolean goodSurrogateRange(char c) {
if(c >= '\ud900') {
if(c >= '\uda00') {
return true;
}
}
return false;
}
public static boolean redundantNonSurrogateRange(char c) {
if(c >= 'b') {
if(c >= 'a') {
return true;
}
}
return false;
}
public static boolean goodNonSurrogateRange(char c) {
if(c >= 'a') {
if(c >= 'b') {
return true;
}
}
return false;
}
public static boolean redundantSurrogateEquality(char c) {
if(c == '\uda00') {
return true;
}
else if(c == '\uda00') {
return true;
}
return false;
}
public static boolean goodSurrogateEquality(char c) {
if(c == '\uda00') {
return true;
}
else if(c == '\ud900') {
return true;
}
return false;
}
public static boolean redundantNonSurrogateEquality(char c) {
if(c == 'a') {
return true;
}
else if(c == 'a') {
return true;
}
return false;
}
public static boolean goodNonSurrogateEquality(char c) {
if(c == 'a') {
return true;
}
else if(c == 'b') {
return true;
}
return false;
}
}

View File

@@ -15,6 +15,10 @@
| A.java:76:11:76:16 | ... >= ... | Test is always false, because of $@. | A.java:74:13:74:18 | ... >= ... | this condition |
| A.java:84:21:84:30 | ... < ... | Test is always false, because of $@. | A.java:80:12:80:21 | ... > ... | this condition |
| A.java:88:9:88:13 | ... > ... | Test is always false. | A.java:88:9:88:13 | ... > ... | this condition |
| CharLiterals.java:4:10:4:22 | ... >= ... | Test is always true, because of $@. | CharLiterals.java:3:8:3:20 | ... >= ... | this condition |
| CharLiterals.java:22:10:22:17 | ... >= ... | Test is always true, because of $@. | CharLiterals.java:21:8:21:15 | ... >= ... | this condition |
| CharLiterals.java:42:13:42:25 | ... == ... | Test is always false, because of $@. | CharLiterals.java:39:8:39:20 | ... == ... | this condition |
| CharLiterals.java:62:13:62:20 | ... == ... | Test is always false, because of $@. | CharLiterals.java:59:8:59:15 | ... == ... | this condition |
| Test.java:9:7:9:12 | ... >= ... | Test is always true, because of $@. | Test.java:5:7:5:11 | ... < ... | this condition |
| Test.java:10:7:10:12 | ... >= ... | Test is always true, because of $@. | Test.java:5:16:5:20 | ... < ... | this condition |
| Test.java:14:9:14:15 | ... == ... | Test is always false, because of $@. | Test.java:12:8:12:13 | ... < ... | this condition |