mirror of
https://github.com/github/codeql.git
synced 2025-12-21 11:16:30 +01:00
extract regexp literals from string concatenations
This commit is contained in:
@@ -3,6 +3,8 @@ package com.semmle.js.extractor;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Stack;
|
||||
@@ -164,6 +166,9 @@ import com.semmle.util.locations.SourceMap;
|
||||
import com.semmle.util.trap.TrapWriter;
|
||||
import com.semmle.util.trap.TrapWriter.Label;
|
||||
|
||||
import com.semmle.util.files.FileLineOffsetCache;
|
||||
|
||||
|
||||
/** Extractor for AST-based information; invoked by the {@link JSExtractor}. */
|
||||
public class ASTExtractor {
|
||||
private final TrapWriter trapwriter;
|
||||
@@ -567,12 +572,17 @@ public class ASTExtractor {
|
||||
String valueString = nd.getStringValue();
|
||||
|
||||
trapwriter.addTuple("literals", valueString, source, key);
|
||||
Position start = nd.getLoc().getStart();
|
||||
com.semmle.util.locations.Position startPos = new com.semmle.util.locations.Position(start.getLine(), start.getColumn(), start.getOffset());
|
||||
|
||||
if (nd.isRegExp()) {
|
||||
OffsetTranslation offsets = new OffsetTranslation();
|
||||
offsets.set(0, 1); // skip the initial '/'
|
||||
regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), offsets, nd, false);
|
||||
SourceMap sourceMap = SourceMap.legacyWithStartPos(SourceMap.fromString(nd.getRaw()).offsetBy(0, offsets), startPos);
|
||||
regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false);
|
||||
} else if (nd.isStringLiteral() && !c.isInsideType() && nd.getRaw().length() < 1000) {
|
||||
regexpExtractor.extract(valueString, makeStringLiteralOffsets(nd.getRaw()), nd, true);
|
||||
SourceMap sourceMap = SourceMap.legacyWithStartPos(SourceMap.fromString(nd.getRaw()).offsetBy(0, makeStringLiteralOffsets(nd.getRaw())), startPos);
|
||||
regexpExtractor.extract(valueString, sourceMap, nd, true);
|
||||
|
||||
// Scan the string for template tags, if we're in a context where such tags are relevant.
|
||||
if (scopeManager.isInTemplateFile()) {
|
||||
@@ -593,6 +603,48 @@ public class ASTExtractor {
|
||||
return '0' <= ch && ch <= '7';
|
||||
}
|
||||
|
||||
private String getStringConcatResult(Expression exp) {
|
||||
if (exp instanceof BinaryExpression) {
|
||||
BinaryExpression be = (BinaryExpression) exp;
|
||||
if (be.getOperator().equals("+")) {
|
||||
String left = getStringConcatResult(be.getLeft());
|
||||
String right = getStringConcatResult(be.getRight());
|
||||
if (left != null && right != null) {
|
||||
return left + right;
|
||||
}
|
||||
}
|
||||
} else if (exp instanceof Literal) {
|
||||
Literal lit = (Literal) exp;
|
||||
if (!lit.isStringLiteral()) {
|
||||
return null;
|
||||
}
|
||||
return lit.getStringValue();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private OffsetTranslation computeStringConcatOffset(Expression exp) {
|
||||
if (exp instanceof Literal && ((Literal)exp).isStringLiteral()) {
|
||||
String raw = ((Literal) exp).getRaw();
|
||||
return makeStringLiteralOffsets(raw);
|
||||
}
|
||||
|
||||
if (exp instanceof BinaryExpression) {
|
||||
BinaryExpression be = (BinaryExpression) exp;
|
||||
OffsetTranslation left = computeStringConcatOffset(be.getLeft());
|
||||
OffsetTranslation right = computeStringConcatOffset(be.getRight());
|
||||
|
||||
if (left == null || right == null) {
|
||||
return null;
|
||||
}
|
||||
int delta = be.getRight().getLoc().getStart().getOffset() - be.getLeft().getLoc().getStart().getOffset();
|
||||
int offset = getStringConcatResult(be.getLeft()).length();
|
||||
return left.append(right, offset, delta);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a translation from offsets in a string value back to its original raw literal text
|
||||
* (including quotes).
|
||||
@@ -786,11 +838,31 @@ public class ASTExtractor {
|
||||
return key;
|
||||
}
|
||||
|
||||
// set to determine which BinaryExpression has been extracted as regexp
|
||||
private Set<Expression> extractedAsRegexp = new HashSet<>();
|
||||
|
||||
@Override
|
||||
public Label visit(BinaryExpression nd, Context c) {
|
||||
Label key = super.visit(nd, c);
|
||||
extractedAsRegexp.add(nd.getLeft());
|
||||
extractedAsRegexp.add(nd.getRight());
|
||||
visit(nd.getLeft(), key, 0);
|
||||
visit(nd.getRight(), key, 1);
|
||||
if (extractedAsRegexp.contains(nd)) {
|
||||
return key;
|
||||
}
|
||||
String rawString = getStringConcatResult(nd);
|
||||
if (rawString == null) {
|
||||
return key;
|
||||
}
|
||||
if (rawString.length() > 1000 && !rawString.trim().isEmpty()) {
|
||||
return key;
|
||||
}
|
||||
OffsetTranslation offsets = computeStringConcatOffset(nd);
|
||||
Position start = nd.getLoc().getStart();
|
||||
com.semmle.util.locations.Position startPos = new com.semmle.util.locations.Position(start.getLine(), start.getColumn(), start.getOffset());
|
||||
SourceMap sourceMap = SourceMap.legacyWithStartPos(SourceMap.fromString(nd.getLoc().getSource()).offsetBy(0, offsets), startPos);
|
||||
regexpExtractor.extract(rawString, sourceMap, nd, true);
|
||||
return key;
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ public class Main {
|
||||
* A version identifier that should be updated every time the extractor changes in such a way that
|
||||
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
|
||||
*/
|
||||
public static final String EXTRACTOR_VERSION = "2021-10-25";
|
||||
public static final String EXTRACTOR_VERSION = "2021-10-28";
|
||||
|
||||
public static final Pattern NEWLINE = Pattern.compile("\n");
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
|
||||
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
|
||||
import com.semmle.js.parser.RegExpParser;
|
||||
import com.semmle.js.parser.RegExpParser.Result;
|
||||
import com.semmle.util.locations.OffsetTranslation;
|
||||
import com.semmle.util.locations.SourceMap;
|
||||
import com.semmle.util.trap.TrapWriter;
|
||||
import com.semmle.util.trap.TrapWriter.Label;
|
||||
|
||||
@@ -52,8 +52,7 @@ public class RegExpExtractor {
|
||||
private final TrapWriter trapwriter;
|
||||
private final LocationManager locationManager;
|
||||
private final RegExpParser parser = new RegExpParser();
|
||||
private Position literalStart;
|
||||
private OffsetTranslation offsets;
|
||||
private SourceMap sourceMap;
|
||||
|
||||
public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) {
|
||||
this.trapwriter = trapwriter;
|
||||
@@ -122,17 +121,14 @@ public class RegExpExtractor {
|
||||
}
|
||||
|
||||
public void emitLocation(SourceElement term, Label lbl) {
|
||||
int col = literalStart.getColumn();
|
||||
int sl, sc, el, ec;
|
||||
sl = el = literalStart.getLine();
|
||||
sc = col + offsets.get(term.getLoc().getStart().getColumn());
|
||||
ec = col + offsets.get(term.getLoc().getEnd().getColumn());
|
||||
sc += 1; // convert to 1-based
|
||||
ec += 1; // convert to 1-based
|
||||
ec -= 1; // convert to inclusive
|
||||
int sl = sourceMap.getStart(term.getLoc().getStart().getColumn()).getLine();
|
||||
int sc = sourceMap.getStart(term.getLoc().getStart().getColumn()).getColumn() + 1; // convert to 1-based
|
||||
int el = sourceMap.getEnd(term.getLoc().getEnd().getColumn()).getLine();
|
||||
int ec = sourceMap.getEnd(term.getLoc().getEnd().getColumn()).getColumn() - 1; // convert to inclusive
|
||||
locationManager.emitSnippetLocation(lbl, sl, sc, el, ec);
|
||||
}
|
||||
|
||||
|
||||
private class V implements Visitor {
|
||||
private Label parent;
|
||||
private int idx;
|
||||
@@ -348,16 +344,13 @@ public class RegExpExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
public void extract(
|
||||
String src, OffsetTranslation offsets, Node parent, boolean isSpeculativeParsing) {
|
||||
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing) {
|
||||
Result res = parser.parse(src);
|
||||
|
||||
if (isSpeculativeParsing && res.getErrors().size() > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.literalStart = parent.getLoc().getStart();
|
||||
this.offsets = offsets;
|
||||
this.sourceMap = sourceMap;
|
||||
RegExpTerm ast = res.getAST();
|
||||
new V().visit(ast, trapwriter.localID(parent), 0);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user