JS: Fix offsets of octal and unicode escape

This commit is contained in:
Asger F
2019-11-06 13:27:55 +00:00
parent 4680e3a89a
commit 57a9cad721
4 changed files with 86 additions and 3 deletions

View File

@@ -526,6 +526,10 @@ public class ASTExtractor {
return key;
}
private boolean isOctalDigit(char ch) {
return '0' <= ch && ch <= '7';
}
/**
* Builds a translation from offsets in a string value back to its original raw literal text
* (including quotes).
@@ -551,8 +555,14 @@ public class ASTExtractor {
int outputLength = 1; // Number characters the sequence expands to.
char ch = rawLiteral.charAt(pos + 1);
if ('0' <= ch && ch <= '7') {
// Octal escape: \NNN
length = 4;
// Octal escape: \N, \NN, or \NNN
int firstDigit = pos + 1;
int end = firstDigit;
int maxEnd = Math.min(firstDigit + (ch <= '3' ? 3 : 2), rawLiteral.length());
while (end < maxEnd && isOctalDigit(rawLiteral.charAt(end))) {
++end;
}
length = end - pos;
} else if (ch == 'x') {
// Hex escape: \xNN
length = 4;
@@ -562,11 +572,16 @@ public class ASTExtractor {
// Scan for the ending '}'
int firstDigit = pos + 3;
int end = firstDigit;
int leadingZeros = 0;
while (end < rawLiteral.length() && rawLiteral.charAt(end) == '0') {
++end;
++leadingZeros;
}
while (end < rawLiteral.length() && rawLiteral.charAt(end) != '}') {
++end;
}
int numDigits = end - firstDigit;
if (numDigits > 4) {
if (numDigits - leadingZeros > 4) {
outputLength = 2; // Encoded as a surrogate pair
}
++end; // Include '}' character

View File

@@ -0,0 +1,29 @@
| tst.js:1:22:1:22 | . | . |
| tst.js:2:23:2:23 | . | . |
| tst.js:3:24:3:24 | . | . |
| tst.js:4:23:4:23 | . | . |
| tst.js:5:24:5:24 | . | . |
| tst.js:6:24:6:24 | . | . |
| tst.js:7:24:7:24 | . | . |
| tst.js:8:24:8:24 | . | . |
| tst.js:10:24:10:24 | . | . |
| tst.js:11:24:11:24 | . | . |
| tst.js:12:25:12:25 | . | . |
| tst.js:13:25:13:25 | . | . |
| tst.js:14:25:14:25 | . | . |
| tst.js:15:25:15:25 | . | . |
| tst.js:17:25:17:25 | . | . |
| tst.js:18:25:18:25 | . | . |
| tst.js:19:22:19:22 | . | . |
| tst.js:20:23:20:23 | . | . |
| tst.js:21:24:21:24 | . | . |
| tst.js:23:26:23:26 | . | . |
| tst.js:24:27:24:27 | . | . |
| tst.js:25:28:25:28 | . | . |
| tst.js:26:29:26:29 | . | . |
| tst.js:27:30:27:30 | . | . |
| tst.js:28:31:28:31 | . | . |
| tst.js:30:27:30:27 | . | . |
| tst.js:31:28:31:28 | . | . |
| tst.js:32:29:32:29 | . | . |
| tst.js:33:30:33:30 | . | . |

View File

@@ -0,0 +1,6 @@
import javascript
from StringLiteral literal, RegExpDot dot, int pos
where dot.getParent*() = literal
and pos = dot.getLocation().getStartColumn() - literal.getLocation().getStartColumn()
select dot, literal.getRawValue().charAt(pos)

View File

@@ -0,0 +1,33 @@
new RegExp('\0 hello . \0 world \0');
new RegExp('\00 hello . \00 world \00');
new RegExp('\000 hello . \000 world \000');
new RegExp('\12 hello . \12 world \12');
new RegExp('\333 hello . \333 world \333');
new RegExp('\444 hello . \444 world \444');
new RegExp('\555 hello . \555 world \555');
new RegExp('\666 hello . \666 world \666');
new RegExp('\777 hello . \777 world \777');
new RegExp('\787 hello . \787 world \787');
new RegExp('\087 hello . \087 world \087');
new RegExp('\3331 hello . \3331 world \3331');
new RegExp('\4441 hello . \4441 world \4441');
new RegExp('\5551 hello . \5551 world \5551');
new RegExp('\6661 hello . \6661 world \6661');
new RegExp('\7771 hello . \7771 world \7771');
new RegExp('\7871 hello . \7871 world \7871');
new RegExp('\0871 hello . \0871 world \0871');
new RegExp('\8 hello . \8 world \8');
new RegExp('\81 hello . \81 world \81');
new RegExp('\811 hello . \811 world \811');
new RegExp('\u{a0} hello . \u{a0} world \u{a0}');
new RegExp('\u{0a0} hello . \u{0a0} world \u{0a0}');
new RegExp('\u{00a0} hello . \u{00a0} world \u{00a0}');
new RegExp('\u{000a0} hello . \u{000a0} world \u{000a0}');
new RegExp('\u{0000a0} hello . \u{0000a0} world \u{0000a0}');
new RegExp('\u{00000a0} hello . \u{00000a0} world \u{00000a0}');
new RegExp('\u{1a0} hello . \u{1a0} world \u{1a0}');
new RegExp('\u{10a0} hello . \u{10a0} world \u{10a0}');
new RegExp('\u{100a0} hello . \u{100a0} world \u{100a0}');
new RegExp('\u{1000a0} hello . \u{1000a0} world \u{1000a0}');