Merge pull request #6653 from smowton/smowton/admin/javascript-unpaired-surrogate-test

Java and JS: Add/adapt tests for literals with an unpaired surrogate character
This commit is contained in:
Chris Smowton
2021-09-13 14:53:23 +01:00
committed by GitHub
6 changed files with 21 additions and 14 deletions

View File

@@ -15,6 +15,6 @@
| literals/Literals.java:130:3:130:13 | "\\u1234567" | \u1234567 | \u1234567 |
| literals/Literals.java:131:3:131:18 | "\\uaBcDeF\\u0aB1" | \uabcdeF\u0ab1 | \uabcdeF\u0ab1 |
| literals/Literals.java:132:3:132:16 | "\\uD800\\uDC00" | \ud800\udc00 | \ud800\udc00 |
| literals/Literals.java:134:3:134:10 | "\\uD800" | ? | ? |
| literals/Literals.java:135:3:135:10 | "\\uDC00" | ? | ? |
| literals/Literals.java:136:3:136:31 | "hello\\uD800hello\\uDC00world" | hello?hello?world | hello?hello?world |
| literals/Literals.java:134:3:134:10 | "\\uD800" | \ufffd | \ufffd |
| literals/Literals.java:135:3:135:10 | "\\uDC00" | \ufffd | \ufffd |
| literals/Literals.java:136:3:136:31 | "hello\\uD800hello\\uDC00world" | hello\ufffdhello\ufffdworld | hello\ufffdhello\ufffdworld |

View File

@@ -258,8 +258,8 @@ class RegExpConstant extends RegExpTerm, @regexp_constant {
class RegExpCharEscape extends RegExpEscape, RegExpConstant, @regexp_char_escape {
override predicate isCharacter() {
not (
// unencodable characters are represented as '?' in the database
getValue() = "?" and
// unencodable characters are represented as '?' or \uFFFD in the database
getValue() = ["?", 65533.toUnicode()] and
exists(string s | s = toString().toLowerCase() |
// only Unicode escapes give rise to unencodable characters
s.matches("\\\\u%") and

View File

@@ -0,0 +1 @@
| test.js:1:9:1:16 | "\\ud800" | \ufffd | "\\ud800" | \ufffd | test.js:1:9:1:16 | "\\ud800" |

View File

@@ -0,0 +1 @@
let s = "\ud800";

View File

@@ -0,0 +1,5 @@
import javascript
from StringLiteral sl
where sl.getFile().getBaseName() = "test.js"
select sl, sl.getValue(), sl.getRawValue(), sl.getStringValue(), sl.getUnderlyingValue()

View File

@@ -304,10 +304,10 @@ var bad66 = /^ab(c+)+$/;
// NOT GOOD
var bad67 = /(\d(\s+)*){20}/;
// GOOD - but we spuriously conclude that a rejecting suffix exists.
// GOOD - but we spuriously conclude that a rejecting suffix exists.
var good36 = /(([^/]|X)+)(\/[^]*)*$/;
// GOOD - but we spuriously conclude that a rejecting suffix exists.
// GOOD - but we spuriously conclude that a rejecting suffix exists.
var good37 = /^((x([^Y]+)?)*(Y|$))/;
// NOT GOOD
@@ -331,7 +331,7 @@ var bad72 = /(c?a?)*b/;
// NOT GOOD
var bad73 = /(?:a|a?)+b/;
// NOT GOOD - but not detected.
// NOT GOOD - but not detected.
var bad74 = /(a?b?)*$/;
// NOT GOOD
@@ -357,13 +357,13 @@ var good40 = /(a|b)+/;
var good41 = /(?:[\s;,"'<>(){}|[\]@=+*]|:(?![/\\]))+/;
// NOT GOOD
var bad83 = /^((?:a{|-)|\w\{)+X$/;
var bad84 = /^((?:a{0|-)|\w\{\d)+X$/;
var bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/;
var bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/;
var bad83 = /^((?:a{|-)|\w\{)+X$/;
var bad84 = /^((?:a{0|-)|\w\{\d)+X$/;
var bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/;
var bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/;
// GOOD:
var good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;
// GOOD:
var good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;
// GOOD
var good43 = /("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)/g;