mirror of
https://github.com/github/codeql.git
synced 2026-05-01 03:35:13 +02:00
JS: Track location information using SourceMaps
This commit is contained in:
@@ -1,6 +1,13 @@
|
||||
package com.semmle.js.ast;
|
||||
|
||||
/** A source position identifying a single character. */
|
||||
/**
|
||||
* A source position identifying a single character.
|
||||
* <p>
|
||||
* Note that this class remains distinct from {@link com.semmle.util.locations.Position},
|
||||
* due to the 1-based line number convention and the tendency for users of this class to provide
|
||||
* dummy offset values. Although the classes are structurally identical, it is not always safe to
|
||||
* convert one into the other.
|
||||
*/
|
||||
public class Position implements Comparable<Position> {
|
||||
private final int line, column, offset;
|
||||
|
||||
@@ -23,6 +30,8 @@ public class Position implements Comparable<Position> {
|
||||
/**
|
||||
* The offset (0-based) of this position from the start of the file, that is, the number of
|
||||
* characters that precede it.
|
||||
* <p>
|
||||
* Note that in some cases, a dummy value is filled in for the offset.
|
||||
*/
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
|
||||
@@ -155,6 +155,9 @@ import com.semmle.ts.ast.TypeofTypeExpr;
|
||||
import com.semmle.ts.ast.UnaryTypeExpr;
|
||||
import com.semmle.ts.ast.UnionTypeExpr;
|
||||
import com.semmle.util.collections.CollectionUtil;
|
||||
import com.semmle.util.data.Pair;
|
||||
import com.semmle.util.locations.OffsetTranslation;
|
||||
import com.semmle.util.locations.SourceMap;
|
||||
import com.semmle.util.trap.TrapWriter;
|
||||
import com.semmle.util.trap.TrapWriter.Label;
|
||||
|
||||
@@ -1165,25 +1168,23 @@ public class ASTExtractor {
|
||||
if (textualExtractor.isSnippet()) {
|
||||
return; // do not create nested snippets
|
||||
}
|
||||
String source = tryGetStringValueFromExpression(expr);
|
||||
if (source == null) {
|
||||
Pair<String, OffsetTranslation> sourceAndOffset = tryGetStringValueFromExpression(expr);
|
||||
if (sourceAndOffset == null) {
|
||||
return;
|
||||
}
|
||||
String source = sourceAndOffset.fst();
|
||||
SourceLocation loc = expr.getLoc();
|
||||
Path originalFile = textualExtractor.getExtractedFile().toPath();
|
||||
Path vfile = originalFile.resolveSibling(originalFile.getFileName().toString() + "." + loc.getStart().getLine() + "." + loc.getStart().getColumn() + ".html");
|
||||
LocationManager innerLocationManager = new LocationManager(
|
||||
locationManager.getSourceFile(),
|
||||
locationManager.getTrapWriter(),
|
||||
locationManager.getFileLabel());
|
||||
innerLocationManager.setStart(loc.getStart().getLine(), loc.getStart().getColumn());
|
||||
SourceMap sourceMap = textualExtractor.getSourceMap().offsetBy(loc.getStart().getOffset(), sourceAndOffset.snd());
|
||||
TextualExtractor innerTextualExtractor = new TextualExtractor(
|
||||
trapwriter,
|
||||
innerLocationManager,
|
||||
locationManager,
|
||||
source,
|
||||
false,
|
||||
getMetrics(),
|
||||
vfile.toFile());
|
||||
vfile.toFile(),
|
||||
sourceMap);
|
||||
HTMLExtractor html = HTMLExtractor.forEmbeddedHtml(config);
|
||||
List<Label> rootNodes = html.extractEx(innerTextualExtractor).fst();
|
||||
int rootNodeIndex = 0;
|
||||
@@ -1195,22 +1196,25 @@ public class ASTExtractor {
|
||||
private String tryGetIdentifierName(Expression e) {
|
||||
return e instanceof Identifier ? ((Identifier)e).getName() : null;
|
||||
}
|
||||
|
||||
private String tryGetStringValueFromExpression(Expression e) {
|
||||
|
||||
private Pair<String, OffsetTranslation> tryGetStringValueFromExpression(Expression e) {
|
||||
if (e instanceof Literal) {
|
||||
Literal lit = (Literal) e;
|
||||
return lit.isStringLiteral() ? (String) lit.getValue() : null;
|
||||
if (!lit.isStringLiteral()) {
|
||||
return null;
|
||||
}
|
||||
return Pair.make((String) lit.getValue(), makeStringLiteralOffsets(lit.getRaw()));
|
||||
}
|
||||
if (e instanceof TemplateLiteral) {
|
||||
TemplateLiteral lit = (TemplateLiteral) e;
|
||||
if (!lit.getExpressions().isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (TemplateElement elm : lit.getQuasis()) {
|
||||
sb.append(elm.getCooked());
|
||||
if (lit.getQuasis().size() != 1) {
|
||||
return null;
|
||||
}
|
||||
return sb.toString();
|
||||
TemplateElement element = lit.getQuasis().get(0);
|
||||
return Pair.make((String) element.getCooked(), makeStringLiteralOffsets("`" + element.getRaw() + "`"));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import com.semmle.util.data.Option;
|
||||
import com.semmle.util.data.Pair;
|
||||
import com.semmle.util.data.StringUtil;
|
||||
import com.semmle.util.io.WholeIO;
|
||||
import com.semmle.util.locations.Position;
|
||||
import com.semmle.util.trap.TrapWriter;
|
||||
import com.semmle.util.trap.TrapWriter.Label;
|
||||
|
||||
@@ -22,7 +23,6 @@ import net.htmlparser.jericho.Attribute;
|
||||
import net.htmlparser.jericho.Attributes;
|
||||
import net.htmlparser.jericho.Element;
|
||||
import net.htmlparser.jericho.HTMLElementName;
|
||||
import net.htmlparser.jericho.RowColumnVector;
|
||||
import net.htmlparser.jericho.Segment;
|
||||
|
||||
/** Extractor for handling HTML and XHTML files. */
|
||||
@@ -73,15 +73,13 @@ public class HTMLExtractor implements IExtractor {
|
||||
*/
|
||||
source = source.replace("<![CDATA[", " ").replace("]]>", " ");
|
||||
if (!source.trim().isEmpty()) {
|
||||
RowColumnVector contentStart = content.getRowColumnVector();
|
||||
extractSnippet(
|
||||
TopLevelKind.inlineScript,
|
||||
config.withSourceType(sourceType),
|
||||
scopeManager,
|
||||
textualExtractor,
|
||||
source,
|
||||
contentStart.getRow(),
|
||||
contentStart.getColumn(),
|
||||
content.getBegin(),
|
||||
isTypeScript,
|
||||
elt,
|
||||
context);
|
||||
@@ -96,7 +94,7 @@ public class HTMLExtractor implements IExtractor {
|
||||
if (attr.getValue() == null || attr.getValue().isEmpty()) continue;
|
||||
|
||||
String source = attr.getValue();
|
||||
RowColumnVector valueStart = attr.getValueSegment().getRowColumnVector();
|
||||
int valueStart = attr.getValueSegment().getBegin();
|
||||
if (JS_ATTRIBUTE.matcher(attr.getName()).matches()) {
|
||||
extractSnippet(
|
||||
TopLevelKind.eventHandler,
|
||||
@@ -104,8 +102,7 @@ public class HTMLExtractor implements IExtractor {
|
||||
scopeManager,
|
||||
textualExtractor,
|
||||
source,
|
||||
valueStart.getRow(),
|
||||
valueStart.getColumn(),
|
||||
valueStart,
|
||||
false /* isTypeScript */,
|
||||
attr,
|
||||
context);
|
||||
@@ -126,8 +123,7 @@ public class HTMLExtractor implements IExtractor {
|
||||
scopeManager,
|
||||
textualExtractor,
|
||||
source,
|
||||
valueStart.getRow(),
|
||||
valueStart.getColumn() + offset,
|
||||
valueStart + offset,
|
||||
false /* isTypeScript */,
|
||||
attr,
|
||||
context);
|
||||
@@ -139,8 +135,7 @@ public class HTMLExtractor implements IExtractor {
|
||||
scopeManager,
|
||||
textualExtractor,
|
||||
source,
|
||||
valueStart.getRow(),
|
||||
valueStart.getColumn() + 11,
|
||||
valueStart + 11,
|
||||
false /* isTypeScript */,
|
||||
attr,
|
||||
context);
|
||||
@@ -201,8 +196,11 @@ public class HTMLExtractor implements IExtractor {
|
||||
textualExtractor.getSource(),
|
||||
textualExtractor.getTrapwriter(),
|
||||
locationManager.getFileLabel());
|
||||
|
||||
extractor.setStartOffset(locationManager.getStartLine() - 1, locationManager.getStartColumn() - 1);
|
||||
|
||||
// For efficiency, avoid building the source map if not needed (i.e. for plain HTML files).
|
||||
if (textualExtractor.hasNonTrivialSourceMap()) {
|
||||
extractor.setSourceMap(textualExtractor.getSourceMap());
|
||||
}
|
||||
|
||||
List<Label> rootNodes = extractor.doit(Option.some(eltHandler));
|
||||
|
||||
@@ -280,24 +278,23 @@ public class HTMLExtractor implements IExtractor {
|
||||
ScopeManager scopeManager,
|
||||
TextualExtractor textualExtractor,
|
||||
String source,
|
||||
int line,
|
||||
int column,
|
||||
int offset,
|
||||
boolean isTypeScript,
|
||||
Segment parentHtmlNode,
|
||||
HtmlPopulator.Context context) {
|
||||
TrapWriter trapWriter = textualExtractor.getTrapwriter();
|
||||
LocationManager locationManager = textualExtractor.getLocationManager();
|
||||
LocationManager scriptLocationManager =
|
||||
new LocationManager(
|
||||
locationManager.getSourceFile(), trapWriter, locationManager.getFileLabel());
|
||||
scriptLocationManager.setStart(line, column);
|
||||
// JavaScript AST extraction does not currently support source maps, so just set
|
||||
// line/column numbers on the location manager.
|
||||
Position pos = textualExtractor.getSourceMap().getStart(offset);
|
||||
LocationManager scriptLocationManager = locationManager.startingAt(pos.getLine(), pos.getColumn());
|
||||
if (isTypeScript) {
|
||||
if (isEmbedded) {
|
||||
return; // Do not extract files from HTML embedded in other files.
|
||||
}
|
||||
Path file = textualExtractor.getExtractedFile().toPath();
|
||||
FileSnippet snippet =
|
||||
new FileSnippet(file, line, column, toplevelKind, config.getSourceType());
|
||||
new FileSnippet(file, pos.getLine(), pos.getColumn(), toplevelKind, config.getSourceType());
|
||||
VirtualSourceRoot vroot = config.getVirtualSourceRoot();
|
||||
// Vue files are special in that they can be imported as modules, and may only
|
||||
// contain one <script> tag.
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
package com.semmle.js.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import com.semmle.js.ast.Position;
|
||||
import com.semmle.js.ast.SourceElement;
|
||||
import com.semmle.util.files.FileUtil;
|
||||
import com.semmle.util.locations.SourceMap;
|
||||
import com.semmle.util.trap.TrapWriter;
|
||||
import com.semmle.util.trap.TrapWriter.Label;
|
||||
import java.io.File;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* This class handles location information; in particular, it translates locations reported by the
|
||||
@@ -30,6 +32,30 @@ public class LocationManager {
|
||||
this.startColumn = 1;
|
||||
}
|
||||
|
||||
public LocationManager(LocationManager other) {
|
||||
this.sourceFile = other.sourceFile;
|
||||
this.trapWriter = other.trapWriter;
|
||||
this.fileLabel = other.fileLabel;
|
||||
this.startLine = other.startLine;
|
||||
this.startColumn = other.startColumn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of this location manager whose locations are relative to the
|
||||
* given 1-based line and column numbers (which themselves are relative to this location manager's
|
||||
* starting point).
|
||||
*/
|
||||
public LocationManager startingAt(int line, int column) {
|
||||
LocationManager copy = new LocationManager(this);
|
||||
if (line == 1) {
|
||||
copy.startColumn += column - 1;
|
||||
} else {
|
||||
copy.startLine += line - 1;
|
||||
copy.startColumn = column;
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
public File getSourceFile() {
|
||||
return sourceFile;
|
||||
}
|
||||
@@ -63,6 +89,19 @@ public class LocationManager {
|
||||
this.hasLocation = hasLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a source map adjusted for the line/column offset configured in this location manager.
|
||||
* <p>
|
||||
* Note that the absolute offset returned by the source map will be incorrect and should not be relied upon,
|
||||
* only the line and column numbers are valid.
|
||||
*/
|
||||
public SourceMap adjustSourceMap(final SourceMap map) {
|
||||
// This method is a placeholder for a better solution in which the location manager has a SourceMap of its own.
|
||||
// That solution requires all users of the location manager to track absolute offsets and defer line/column calculations
|
||||
// to the location manager's source map.
|
||||
return SourceMap.legacyWithStartPos(map, new com.semmle.util.locations.Position(startLine, startColumn, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit location information for an AST node. The node's location is translated from the parser's
|
||||
* 0-based column numbering scheme with exclusive offsets into our 1-based scheme with inclusive
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
package com.semmle.js.extractor;
|
||||
|
||||
import com.semmle.util.data.IntList;
|
||||
|
||||
/**
|
||||
* A mapping of some source range into a set of intervals in an output source range.
|
||||
*
|
||||
* <p>The mapping is constructed by adding "anchors": input/output pairs that correspond to the
|
||||
* beginning of an interval, which is assumed to end at the next anchor.
|
||||
*/
|
||||
public class OffsetTranslation {
|
||||
private IntList anchors = IntList.create();
|
||||
private IntList deltas = IntList.create();
|
||||
|
||||
/** Returns the mapping of x. */
|
||||
public int get(int x) {
|
||||
int index = anchors.binarySearch(x);
|
||||
if (index < 0) {
|
||||
// The insertion point is -index - 1.
|
||||
// Get the index immediately before that.
|
||||
index = -index - 2;
|
||||
if (index < 0) {
|
||||
// If queried before the first anchor, use the first anchor anyway.
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
return x + deltas.get(index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps the given input offset to the given output offset.
|
||||
*
|
||||
* <p>This is added as an anchor. Any offset is mapped based on its closest preceding anchor.
|
||||
*/
|
||||
public void set(int from, int to) {
|
||||
anchors.add(from);
|
||||
deltas.add(to - from);
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,8 @@
|
||||
package com.semmle.js.extractor;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.semmle.js.ast.Node;
|
||||
import com.semmle.js.ast.Position;
|
||||
import com.semmle.js.ast.SourceElement;
|
||||
@@ -40,10 +43,9 @@ import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
|
||||
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
|
||||
import com.semmle.js.parser.RegExpParser;
|
||||
import com.semmle.js.parser.RegExpParser.Result;
|
||||
import com.semmle.util.locations.OffsetTranslation;
|
||||
import com.semmle.util.trap.TrapWriter;
|
||||
import com.semmle.util.trap.TrapWriter.Label;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/** Extractor for populating regular expressions. */
|
||||
public class RegExpExtractor {
|
||||
|
||||
@@ -6,6 +6,7 @@ import java.util.regex.Pattern;
|
||||
|
||||
import com.semmle.js.ast.Position;
|
||||
import com.semmle.js.ast.SourceElement;
|
||||
import com.semmle.util.locations.SourceMap;
|
||||
import com.semmle.util.trap.TrapWriter;
|
||||
import com.semmle.util.trap.TrapWriter.Label;
|
||||
|
||||
@@ -24,6 +25,7 @@ public class TextualExtractor {
|
||||
private final boolean extractLines;
|
||||
private final ExtractionMetrics metrics;
|
||||
private final File extractedFile;
|
||||
private SourceMap sourceMap;
|
||||
|
||||
public TextualExtractor(
|
||||
TrapWriter trapwriter,
|
||||
@@ -32,6 +34,17 @@ public class TextualExtractor {
|
||||
boolean extractLines,
|
||||
ExtractionMetrics metrics,
|
||||
File extractedFile) {
|
||||
this(trapwriter, locationManager, source, extractLines, metrics, extractedFile, null);
|
||||
}
|
||||
|
||||
public TextualExtractor(
|
||||
TrapWriter trapwriter,
|
||||
LocationManager locationManager,
|
||||
String source,
|
||||
boolean extractLines,
|
||||
ExtractionMetrics metrics,
|
||||
File extractedFile,
|
||||
SourceMap sourceMap) {
|
||||
this.trapwriter = trapwriter;
|
||||
this.locationManager = locationManager;
|
||||
this.source = source;
|
||||
@@ -39,6 +52,29 @@ public class TextualExtractor {
|
||||
this.extractLines = extractLines;
|
||||
this.metrics = metrics;
|
||||
this.extractedFile = extractedFile;
|
||||
this.sourceMap = sourceMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the source map mapping the characters of {@link #getSource()} back to the
|
||||
* original file locations.
|
||||
*/
|
||||
public SourceMap getSourceMap() {
|
||||
// The SourceMap should ideally be owned by the location manager, but the location manager does not
|
||||
// have access to the source code. We construct a source map lazily since, at the time of writing,
|
||||
// most code does not operate with source maps.
|
||||
if (sourceMap == null) {
|
||||
sourceMap = locationManager.adjustSourceMap(SourceMap.fromString(source));
|
||||
}
|
||||
return sourceMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the source map that would be returned by {@link #getSourceMap()} might not be a 1:1 mapping
|
||||
* to the original source file.
|
||||
*/
|
||||
public boolean hasNonTrivialSourceMap() {
|
||||
return sourceMap != null || locationManager.getStartLine() != 1 || locationManager.getStartColumn() != 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
package com.semmle.js.extractor.test;
|
||||
|
||||
import com.semmle.js.extractor.OffsetTranslation;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class OffsetTranslationTest {
|
||||
@Test
|
||||
public void testBasic() {
|
||||
OffsetTranslation table = new OffsetTranslation();
|
||||
table.set(0, 10);
|
||||
table.set(100, 250);
|
||||
Assert.assertEquals(10, table.get(0));
|
||||
Assert.assertEquals(15, table.get(5));
|
||||
Assert.assertEquals(85, table.get(75));
|
||||
Assert.assertEquals(109, table.get(99));
|
||||
Assert.assertEquals(250, table.get(100));
|
||||
Assert.assertEquals(251, table.get(101));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLookupBefore() {
|
||||
OffsetTranslation table = new OffsetTranslation();
|
||||
table.set(0, 10);
|
||||
table.set(100, 250);
|
||||
Assert.assertEquals(9, table.get(-1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIdentity() {
|
||||
OffsetTranslation table = new OffsetTranslation();
|
||||
table.set(0, 0);
|
||||
Assert.assertEquals(0, table.get(0));
|
||||
Assert.assertEquals(75, table.get(75));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDuplicateAnchor() {
|
||||
OffsetTranslation table = new OffsetTranslation();
|
||||
table.set(0, 0);
|
||||
table.set(10, 100);
|
||||
table.set(10, 100);
|
||||
table.set(20, 150);
|
||||
Assert.assertEquals(1, table.get(1));
|
||||
Assert.assertEquals(100, table.get(10));
|
||||
Assert.assertEquals(101, table.get(11));
|
||||
Assert.assertEquals(150, table.get(20));
|
||||
Assert.assertEquals(151, table.get(21));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user