diff --git a/javascript/extractor/src/com/semmle/js/extractor/LocationManager.java b/javascript/extractor/src/com/semmle/js/extractor/LocationManager.java index 0216742b580..66b2919f520 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/LocationManager.java +++ b/javascript/extractor/src/com/semmle/js/extractor/LocationManager.java @@ -65,7 +65,8 @@ public class LocationManager { /** * Emit location information for an AST node. The node's location is translated from the parser's - * 0-based column numbering scheme into our 1-based scheme and then emitted as a snippet location. + * 0-based column numbering scheme with exclusive offsets into our 1-based scheme with inclusive + * end-offsets and then emitted as a snippet location. */ public void emitNodeLocation(SourceElement nd, Label lbl) { int sl = nd.getLoc().getStart().getLine(), @@ -86,7 +87,15 @@ public class LocationManager { emitSnippetLocation(lbl, sl, sc, el, ec); } - /** Emit a relative location in the current snippet. */ + /** + * Emit a relative location in the current snippet. + * + * @param lbl label to associate with the location + * @param sl start line (1-based) + * @param sc start column (1-based, inclusive) + * @param el end line (1-based) + * @param ec end column (1-based, inclusive) + */ public void emitSnippetLocation(Label lbl, int sl, int sc, int el, int ec) { Position start = translatePosition(new Position(sl, sc, -1)); Position end = translatePosition(new Position(el, ec, -1)); diff --git a/javascript/extractor/src/com/semmle/js/extractor/OffsetTranslation.java b/javascript/extractor/src/com/semmle/js/extractor/OffsetTranslation.java new file mode 100644 index 00000000000..88ebc80b9a9 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/extractor/OffsetTranslation.java @@ -0,0 +1,39 @@ +package com.semmle.js.extractor; + +import com.semmle.util.data.IntList; + +/** + * A mapping of some source range into a set of intervals in an output source range. + * + *
The mapping is constructed by adding "anchors": input/output pairs that correspond to the + * beginning of an interval, which is assumed to end at the next anchor. + */ +public class OffsetTranslation { + private IntList anchors = IntList.create(); + private IntList deltas = IntList.create(); + + /** Returns the mapping of x. */ + public int get(int x) { + int index = anchors.binarySearch(x); + if (index < 0) { + // The insertion point is -index - 1. + // Get the index immediately before that. + index = -index - 2; + if (index < 0) { + // If queried before the first anchor, use the first anchor anyway. + index = 0; + } + } + return x + deltas.get(index); + } + + /** + * Maps the given input offset to the given output offset. + * + *
This is added as an anchor. Any offset is mapped based on its closest preceding anchor. + */ + public void set(int from, int to) { + anchors.add(from); + deltas.add(to - from); + } +} diff --git a/javascript/extractor/src/com/semmle/js/extractor/OffsetTranslationBuilder.java b/javascript/extractor/src/com/semmle/js/extractor/OffsetTranslationBuilder.java new file mode 100644 index 00000000000..3f417eb7b15 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/extractor/OffsetTranslationBuilder.java @@ -0,0 +1,37 @@ +package com.semmle.js.extractor; + +import com.semmle.util.data.IntList; + +/** + * A mapping from integers to integers, is encoded as a sequence of consecutive intervals and their + * corresponding output intervals. + */ +public class OffsetTranslationBuilder { + private IntList anchors = IntList.create(); + private IntList deltas = IntList.create(); + + /** Returns the mapping of x. */ + public int get(int x) { + int index = anchors.binarySearch(x); + if (index < 0) { + // The insertion point is -index - 1. + // Get the index immediately before that. + index = -index - 2; + if (index < 0) { + // If queried before the first anchor, use the first anchor anyway. + index = 0; + } + } + return x + deltas.get(index); + } + + /** + * Maps the given input offset to the given output offset. + * + *
This is added as an anchor. Any offset is mapped based on its closest preceding anchor. + */ + public void set(int from, int to) { + anchors.add(from); + deltas.add(to - from); + } +} diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index 8e07b0e5dc4..239b6feb62d 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -51,6 +51,7 @@ public class RegExpExtractor { private final LocationManager locationManager; private final RegExpParser parser = new RegExpParser(); private Position literalStart; + private OffsetTranslation offsets; public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) { this.trapwriter = trapwriter; @@ -121,8 +122,12 @@ public class RegExpExtractor { public void emitLocation(SourceElement term, Label lbl) { int sl, sc, el, ec; sl = el = literalStart.getLine(); - sc = literalStart.getColumn() + 2 + term.getLoc().getStart().getColumn(); - ec = literalStart.getColumn() + 1 + term.getLoc().getEnd().getColumn(); + // the offset table accounts for the position on the line and for skipping the initial '/' + sc = offsets.get(term.getLoc().getStart().getColumn()); + ec = offsets.get(term.getLoc().getEnd().getColumn()); + sc += 1; // convert to 1-based + ec += 1; // convert to 1-based + ec -= 1; // convert to inclusive locationManager.emitSnippetLocation(lbl, sl, sc, el, ec); } @@ -349,6 +354,8 @@ public class RegExpExtractor { } this.literalStart = parent.getLoc().getStart(); + offsets = new OffsetTranslation(); + offsets.set(0, literalStart.getColumn() + 1); // add 1 to skip the leading '/' or quote RegExpTerm ast = res.getAST(); new V().visit(ast, trapwriter.localID(parent), 0); diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/OffsetTranslationTest.java b/javascript/extractor/src/com/semmle/js/extractor/test/OffsetTranslationTest.java new file mode 100644 index 00000000000..8b8a65aa14d --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/extractor/test/OffsetTranslationTest.java @@ -0,0 +1,50 @@ +package com.semmle.js.extractor.test; + +import com.semmle.js.extractor.OffsetTranslation; +import org.junit.Assert; +import org.junit.Test; + +public class OffsetTranslationTest { + @Test + public void testBasic() { + OffsetTranslation table = new OffsetTranslation(); + table.set(0, 10); + table.set(100, 250); + Assert.assertEquals(10, table.get(0)); + Assert.assertEquals(15, table.get(5)); + Assert.assertEquals(85, table.get(75)); + Assert.assertEquals(109, table.get(99)); + Assert.assertEquals(250, table.get(100)); + Assert.assertEquals(251, table.get(101)); + } + + @Test + public void testLookupBefore() { + OffsetTranslation table = new OffsetTranslation(); + table.set(0, 10); + table.set(100, 250); + Assert.assertEquals(9, table.get(-1)); + } + + @Test + public void testIdentity() { + OffsetTranslation table = new OffsetTranslation(); + table.set(0, 0); + Assert.assertEquals(0, table.get(0)); + Assert.assertEquals(75, table.get(75)); + } + + @Test + public void testDuplicateAnchor() { + OffsetTranslation table = new OffsetTranslation(); + table.set(0, 0); + table.set(10, 100); + table.set(10, 100); + table.set(20, 150); + Assert.assertEquals(1, table.get(1)); + Assert.assertEquals(100, table.get(10)); + Assert.assertEquals(101, table.get(11)); + Assert.assertEquals(150, table.get(20)); + Assert.assertEquals(151, table.get(21)); + } +}