JS: Track location information using SourceMaps

This commit is contained in:
Asger Feldthaus
2020-12-14 15:10:42 +00:00
parent 898d22d2f4
commit 97f7cb4dc1
8 changed files with 129 additions and 131 deletions

View File

@@ -1,6 +1,13 @@
package com.semmle.js.ast;
/** A source position identifying a single character. */
/**
* A source position identifying a single character.
* <p>
* Note that this class remains distinct from {@link com.semmle.util.locations.Position},
* due to the 1-based line number convention and the tendency for users of this class to provide
* dummy offset values. Although the classes are structurally identical, it is not always safe to
* convert one into the other.
*/
public class Position implements Comparable<Position> {
private final int line, column, offset;
@@ -23,6 +30,8 @@ public class Position implements Comparable<Position> {
/**
* The offset (0-based) of this position from the start of the file, that is, the number of
* characters that precede it.
* <p>
* Note that in some cases, a dummy value is filled in for the offset.
*/
public int getOffset() {
return offset;

View File

@@ -155,6 +155,9 @@ import com.semmle.ts.ast.TypeofTypeExpr;
import com.semmle.ts.ast.UnaryTypeExpr;
import com.semmle.ts.ast.UnionTypeExpr;
import com.semmle.util.collections.CollectionUtil;
import com.semmle.util.data.Pair;
import com.semmle.util.locations.OffsetTranslation;
import com.semmle.util.locations.SourceMap;
import com.semmle.util.trap.TrapWriter;
import com.semmle.util.trap.TrapWriter.Label;
@@ -1165,25 +1168,23 @@ public class ASTExtractor {
if (textualExtractor.isSnippet()) {
return; // do not create nested snippets
}
String source = tryGetStringValueFromExpression(expr);
if (source == null) {
Pair<String, OffsetTranslation> sourceAndOffset = tryGetStringValueFromExpression(expr);
if (sourceAndOffset == null) {
return;
}
String source = sourceAndOffset.fst();
SourceLocation loc = expr.getLoc();
Path originalFile = textualExtractor.getExtractedFile().toPath();
Path vfile = originalFile.resolveSibling(originalFile.getFileName().toString() + "." + loc.getStart().getLine() + "." + loc.getStart().getColumn() + ".html");
LocationManager innerLocationManager = new LocationManager(
locationManager.getSourceFile(),
locationManager.getTrapWriter(),
locationManager.getFileLabel());
innerLocationManager.setStart(loc.getStart().getLine(), loc.getStart().getColumn());
SourceMap sourceMap = textualExtractor.getSourceMap().offsetBy(loc.getStart().getOffset(), sourceAndOffset.snd());
TextualExtractor innerTextualExtractor = new TextualExtractor(
trapwriter,
innerLocationManager,
locationManager,
source,
false,
getMetrics(),
vfile.toFile());
vfile.toFile(),
sourceMap);
HTMLExtractor html = HTMLExtractor.forEmbeddedHtml(config);
List<Label> rootNodes = html.extractEx(innerTextualExtractor).fst();
int rootNodeIndex = 0;
@@ -1195,22 +1196,25 @@ public class ASTExtractor {
private String tryGetIdentifierName(Expression e) {
return e instanceof Identifier ? ((Identifier)e).getName() : null;
}
private String tryGetStringValueFromExpression(Expression e) {
private Pair<String, OffsetTranslation> tryGetStringValueFromExpression(Expression e) {
if (e instanceof Literal) {
Literal lit = (Literal) e;
return lit.isStringLiteral() ? (String) lit.getValue() : null;
if (!lit.isStringLiteral()) {
return null;
}
return Pair.make((String) lit.getValue(), makeStringLiteralOffsets(lit.getRaw()));
}
if (e instanceof TemplateLiteral) {
TemplateLiteral lit = (TemplateLiteral) e;
if (!lit.getExpressions().isEmpty()) {
return null;
}
StringBuilder sb = new StringBuilder();
for (TemplateElement elm : lit.getQuasis()) {
sb.append(elm.getCooked());
if (lit.getQuasis().size() != 1) {
return null;
}
return sb.toString();
TemplateElement element = lit.getQuasis().get(0);
return Pair.make((String) element.getCooked(), makeStringLiteralOffsets("`" + element.getRaw() + "`"));
}
return null;
}

View File

@@ -15,6 +15,7 @@ import com.semmle.util.data.Option;
import com.semmle.util.data.Pair;
import com.semmle.util.data.StringUtil;
import com.semmle.util.io.WholeIO;
import com.semmle.util.locations.Position;
import com.semmle.util.trap.TrapWriter;
import com.semmle.util.trap.TrapWriter.Label;
@@ -22,7 +23,6 @@ import net.htmlparser.jericho.Attribute;
import net.htmlparser.jericho.Attributes;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.RowColumnVector;
import net.htmlparser.jericho.Segment;
/** Extractor for handling HTML and XHTML files. */
@@ -73,15 +73,13 @@ public class HTMLExtractor implements IExtractor {
*/
source = source.replace("<![CDATA[", " ").replace("]]>", " ");
if (!source.trim().isEmpty()) {
RowColumnVector contentStart = content.getRowColumnVector();
extractSnippet(
TopLevelKind.inlineScript,
config.withSourceType(sourceType),
scopeManager,
textualExtractor,
source,
contentStart.getRow(),
contentStart.getColumn(),
content.getBegin(),
isTypeScript,
elt,
context);
@@ -96,7 +94,7 @@ public class HTMLExtractor implements IExtractor {
if (attr.getValue() == null || attr.getValue().isEmpty()) continue;
String source = attr.getValue();
RowColumnVector valueStart = attr.getValueSegment().getRowColumnVector();
int valueStart = attr.getValueSegment().getBegin();
if (JS_ATTRIBUTE.matcher(attr.getName()).matches()) {
extractSnippet(
TopLevelKind.eventHandler,
@@ -104,8 +102,7 @@ public class HTMLExtractor implements IExtractor {
scopeManager,
textualExtractor,
source,
valueStart.getRow(),
valueStart.getColumn(),
valueStart,
false /* isTypeScript */,
attr,
context);
@@ -126,8 +123,7 @@ public class HTMLExtractor implements IExtractor {
scopeManager,
textualExtractor,
source,
valueStart.getRow(),
valueStart.getColumn() + offset,
valueStart + offset,
false /* isTypeScript */,
attr,
context);
@@ -139,8 +135,7 @@ public class HTMLExtractor implements IExtractor {
scopeManager,
textualExtractor,
source,
valueStart.getRow(),
valueStart.getColumn() + 11,
valueStart + 11,
false /* isTypeScript */,
attr,
context);
@@ -201,8 +196,11 @@ public class HTMLExtractor implements IExtractor {
textualExtractor.getSource(),
textualExtractor.getTrapwriter(),
locationManager.getFileLabel());
extractor.setStartOffset(locationManager.getStartLine() - 1, locationManager.getStartColumn() - 1);
// For efficiency, avoid building the source map if not needed (i.e. for plain HTML files).
if (textualExtractor.hasNonTrivialSourceMap()) {
extractor.setSourceMap(textualExtractor.getSourceMap());
}
List<Label> rootNodes = extractor.doit(Option.some(eltHandler));
@@ -280,24 +278,23 @@ public class HTMLExtractor implements IExtractor {
ScopeManager scopeManager,
TextualExtractor textualExtractor,
String source,
int line,
int column,
int offset,
boolean isTypeScript,
Segment parentHtmlNode,
HtmlPopulator.Context context) {
TrapWriter trapWriter = textualExtractor.getTrapwriter();
LocationManager locationManager = textualExtractor.getLocationManager();
LocationManager scriptLocationManager =
new LocationManager(
locationManager.getSourceFile(), trapWriter, locationManager.getFileLabel());
scriptLocationManager.setStart(line, column);
// JavaScript AST extraction does not currently support source maps, so just set
// line/column numbers on the location manager.
Position pos = textualExtractor.getSourceMap().getStart(offset);
LocationManager scriptLocationManager = locationManager.startingAt(pos.getLine(), pos.getColumn());
if (isTypeScript) {
if (isEmbedded) {
return; // Do not extract files from HTML embedded in other files.
}
Path file = textualExtractor.getExtractedFile().toPath();
FileSnippet snippet =
new FileSnippet(file, line, column, toplevelKind, config.getSourceType());
new FileSnippet(file, pos.getLine(), pos.getColumn(), toplevelKind, config.getSourceType());
VirtualSourceRoot vroot = config.getVirtualSourceRoot();
// Vue files are special in that they can be imported as modules, and may only
// contain one <script> tag.

View File

@@ -1,13 +1,15 @@
package com.semmle.js.extractor;
import java.io.File;
import java.util.LinkedHashSet;
import java.util.Set;
import com.semmle.js.ast.Position;
import com.semmle.js.ast.SourceElement;
import com.semmle.util.files.FileUtil;
import com.semmle.util.locations.SourceMap;
import com.semmle.util.trap.TrapWriter;
import com.semmle.util.trap.TrapWriter.Label;
import java.io.File;
import java.util.LinkedHashSet;
import java.util.Set;
/**
* This class handles location information; in particular, it translates locations reported by the
@@ -30,6 +32,30 @@ public class LocationManager {
this.startColumn = 1;
}
public LocationManager(LocationManager other) {
this.sourceFile = other.sourceFile;
this.trapWriter = other.trapWriter;
this.fileLabel = other.fileLabel;
this.startLine = other.startLine;
this.startColumn = other.startColumn;
}
/**
* Returns a copy of this location manager whose locations are relative to the
* given 1-based line and column numbers (which themselves are relative to this location manager's
* starting point).
*/
public LocationManager startingAt(int line, int column) {
LocationManager copy = new LocationManager(this);
if (line == 1) {
copy.startColumn += column - 1;
} else {
copy.startLine += line - 1;
copy.startColumn = column;
}
return copy;
}
public File getSourceFile() {
return sourceFile;
}
@@ -63,6 +89,19 @@ public class LocationManager {
this.hasLocation = hasLocation;
}
/**
* Creates a source map adjusted for the line/column offset configured in this location manager.
* <p>
* Note that the absolute offset returned by the source map will be incorrect and should not be relied upon,
* only the line and column numbers are valid.
*/
public SourceMap adjustSourceMap(final SourceMap map) {
// This method is a placeholder for a better solution in which the location manager has a SourceMap of its own.
// That solution requires all users of the location manager to track absolute offsets and defer line/column calculations
// to the location manager's source map.
return SourceMap.legacyWithStartPos(map, new com.semmle.util.locations.Position(startLine, startColumn, 0));
}
/**
* Emit location information for an AST node. The node's location is translated from the parser's
* 0-based column numbering scheme with exclusive offsets into our 1-based scheme with inclusive

View File

@@ -1,39 +0,0 @@
package com.semmle.js.extractor;
import com.semmle.util.data.IntList;
/**
* A mapping of some source range into a set of intervals in an output source range.
*
* <p>The mapping is constructed by adding "anchors": input/output pairs that correspond to the
* beginning of an interval, which is assumed to end at the next anchor.
*/
public class OffsetTranslation {
private IntList anchors = IntList.create();
private IntList deltas = IntList.create();
/** Returns the mapping of x. */
public int get(int x) {
int index = anchors.binarySearch(x);
if (index < 0) {
// The insertion point is -index - 1.
// Get the index immediately before that.
index = -index - 2;
if (index < 0) {
// If queried before the first anchor, use the first anchor anyway.
index = 0;
}
}
return x + deltas.get(index);
}
/**
* Maps the given input offset to the given output offset.
*
* <p>This is added as an anchor. Any offset is mapped based on its closest preceding anchor.
*/
public void set(int from, int to) {
anchors.add(from);
deltas.add(to - from);
}
}

View File

@@ -1,5 +1,8 @@
package com.semmle.js.extractor;
import java.util.LinkedHashMap;
import java.util.Map;
import com.semmle.js.ast.Node;
import com.semmle.js.ast.Position;
import com.semmle.js.ast.SourceElement;
@@ -40,10 +43,9 @@ import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
import com.semmle.js.parser.RegExpParser;
import com.semmle.js.parser.RegExpParser.Result;
import com.semmle.util.locations.OffsetTranslation;
import com.semmle.util.trap.TrapWriter;
import com.semmle.util.trap.TrapWriter.Label;
import java.util.LinkedHashMap;
import java.util.Map;
/** Extractor for populating regular expressions. */
public class RegExpExtractor {

View File

@@ -6,6 +6,7 @@ import java.util.regex.Pattern;
import com.semmle.js.ast.Position;
import com.semmle.js.ast.SourceElement;
import com.semmle.util.locations.SourceMap;
import com.semmle.util.trap.TrapWriter;
import com.semmle.util.trap.TrapWriter.Label;
@@ -24,6 +25,7 @@ public class TextualExtractor {
private final boolean extractLines;
private final ExtractionMetrics metrics;
private final File extractedFile;
private SourceMap sourceMap;
public TextualExtractor(
TrapWriter trapwriter,
@@ -32,6 +34,17 @@ public class TextualExtractor {
boolean extractLines,
ExtractionMetrics metrics,
File extractedFile) {
this(trapwriter, locationManager, source, extractLines, metrics, extractedFile, null);
}
public TextualExtractor(
TrapWriter trapwriter,
LocationManager locationManager,
String source,
boolean extractLines,
ExtractionMetrics metrics,
File extractedFile,
SourceMap sourceMap) {
this.trapwriter = trapwriter;
this.locationManager = locationManager;
this.source = source;
@@ -39,6 +52,29 @@ public class TextualExtractor {
this.extractLines = extractLines;
this.metrics = metrics;
this.extractedFile = extractedFile;
this.sourceMap = sourceMap;
}
/**
* Returns the source map mapping the characters of {@link #getSource()} back to the
* original file locations.
*/
public SourceMap getSourceMap() {
// The SourceMap should ideally be owned by the location manager, but the location manager does not
// have access to the source code. We construct a source map lazily since, at the time of writing,
// most code does not operate with source maps.
if (sourceMap == null) {
sourceMap = locationManager.adjustSourceMap(SourceMap.fromString(source));
}
return sourceMap;
}
/**
* Returns true if the source map that would be returned by {@link #getSourceMap()} might not be a 1:1 mapping
* to the original source file.
*/
public boolean hasNonTrivialSourceMap() {
return sourceMap != null || locationManager.getStartLine() != 1 || locationManager.getStartColumn() != 1;
}
/**

View File

@@ -1,50 +0,0 @@
package com.semmle.js.extractor.test;
import com.semmle.js.extractor.OffsetTranslation;
import org.junit.Assert;
import org.junit.Test;
public class OffsetTranslationTest {
@Test
public void testBasic() {
OffsetTranslation table = new OffsetTranslation();
table.set(0, 10);
table.set(100, 250);
Assert.assertEquals(10, table.get(0));
Assert.assertEquals(15, table.get(5));
Assert.assertEquals(85, table.get(75));
Assert.assertEquals(109, table.get(99));
Assert.assertEquals(250, table.get(100));
Assert.assertEquals(251, table.get(101));
}
@Test
public void testLookupBefore() {
OffsetTranslation table = new OffsetTranslation();
table.set(0, 10);
table.set(100, 250);
Assert.assertEquals(9, table.get(-1));
}
@Test
public void testIdentity() {
OffsetTranslation table = new OffsetTranslation();
table.set(0, 0);
Assert.assertEquals(0, table.get(0));
Assert.assertEquals(75, table.get(75));
}
@Test
public void testDuplicateAnchor() {
OffsetTranslation table = new OffsetTranslation();
table.set(0, 0);
table.set(10, 100);
table.set(10, 100);
table.set(20, 150);
Assert.assertEquals(1, table.get(1));
Assert.assertEquals(100, table.get(10));
Assert.assertEquals(101, table.get(11));
Assert.assertEquals(150, table.get(20));
Assert.assertEquals(151, table.get(21));
}
}