Added support for -- subtraction opetor.

This commit is contained in:
Napalys
2025-03-02 19:46:56 +01:00
parent ee83c42b71
commit 3664d50772
5 changed files with 180 additions and 198 deletions

View File

@@ -0,0 +1,22 @@
package com.semmle.js.ast.regexp;
import com.semmle.js.ast.SourceLocation;
import java.util.List;
public class CharacterClassSubtraction extends RegExpTerm {
private final List<RegExpTerm> subtraction;
public CharacterClassSubtraction(SourceLocation loc, List<RegExpTerm> subtraction) {
super(loc, "CharacterClassSubtraction");
this.subtraction = subtraction;
}
@Override
public void accept(Visitor v) {
v.visit(this);
}
public List<RegExpTerm> getSubtraction() {
return subtraction;
}
}

View File

@@ -65,4 +65,6 @@ public interface Visitor {
public void visit(CharacterClassQuotedString nd);
public void visit(CharacterClassIntersection nd);
public void visit(CharacterClassSubtraction nd);
}

View File

@@ -12,6 +12,7 @@ import com.semmle.js.ast.regexp.CharacterClass;
import com.semmle.js.ast.regexp.CharacterClassEscape;
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
import com.semmle.js.ast.regexp.CharacterClassRange;
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
import com.semmle.js.ast.regexp.Constant;
import com.semmle.js.ast.regexp.ControlEscape;
import com.semmle.js.ast.regexp.ControlLetter;
@@ -96,6 +97,7 @@ public class RegExpExtractor {
termkinds.put("UnicodePropertyEscape", 27);
termkinds.put("CharacterClassQuotedString", 28);
termkinds.put("CharacterClassIntersection", 29);
termkinds.put("CharacterClassSubtraction", 30);
}
private static final String[] errmsgs =
@@ -362,6 +364,14 @@ public class RegExpExtractor {
for (RegExpTerm element : nd.getIntersections())
visit(element, lbl, i++);
}
@Override
public void visit(CharacterClassSubtraction nd) {
Label lbl = extractTerm(nd, parent, idx);
int i = 0;
for (RegExpTerm element : nd.getSubtraction())
visit(element, lbl, i++);
}
}
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {

View File

@@ -8,6 +8,7 @@ import com.semmle.js.ast.regexp.CharacterClass;
import com.semmle.js.ast.regexp.CharacterClassEscape;
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
import com.semmle.js.ast.regexp.CharacterClassRange;
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
import com.semmle.js.ast.regexp.Constant;
import com.semmle.js.ast.regexp.ControlEscape;
import com.semmle.js.ast.regexp.ControlLetter;
@@ -566,6 +567,7 @@ public class RegExpParser {
private enum CharacterClassType {
STANDARD,
INTERSECTION,
SUBTRACTION,
}
// ECMA 2024 `v` flag allows nested character classes.
@@ -588,6 +590,10 @@ public class RegExpParser {
this.match("&&");
classType = CharacterClassType.INTERSECTION;
}
else if (lookahead("--")) {
this.match("--");
classType = CharacterClassType.SUBTRACTION;
}
else {
elements.add(this.parseCharacterClassElement());
}
@@ -597,6 +603,8 @@ public class RegExpParser {
switch (classType) {
case INTERSECTION:
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted));
case SUBTRACTION:
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted));
case STANDARD:
default:
return this.finishTerm(new CharacterClass(loc, elements, inverted));
@@ -614,7 +622,7 @@ public class RegExpParser {
return atom;
}
}
if (!this.lookahead("-]") && this.match("-") && !(atom instanceof CharacterClassEscape))
if (!this.lookahead("-]") && !this.lookahead("--") && this.match("-") && !(atom instanceof CharacterClassEscape))
return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom()));
return atom;
}

View File

@@ -83,246 +83,186 @@ regexpterm(#20025,23,#20024,0,"[\p{Script_Extensions=Greek}--\p{Letter}]")
locations_default(#20026,#10000,1,2,1,42)
hasLocation(#20025,#20026)
#20027=*
regexpterm(#20027,24,#20025,0,"\p{Script_Extensions=Greek}--")
#20028=@"loc,{#10000},1,3,1,31"
locations_default(#20028,#10000,1,3,1,31)
hasLocation(#20027,#20028)
#20029=*
regexpterm(#20029,27,#20027,0,"\p{Script_Extensions=Greek}")
#20030=@"loc,{#10000},1,3,1,29"
locations_default(#20030,#10000,1,3,1,29)
hasLocation(#20029,#20030)
unicode_property_escapename(#20029,"Script_Extensions")
unicode_property_escapevalue(#20029,"Greek")
#20031=*
regexpterm(#20031,14,#20027,1,"-")
#20032=@"loc,{#10000},1,31,1,31"
locations_default(#20032,#10000,1,31,1,31)
hasLocation(#20031,#20032)
regexp_const_value(#20031,"-")
regexpterm(#20027,30,#20025,0,"[\p{Script_Extensions=Greek}--\p{Letter}]")
hasLocation(#20027,#20026)
#20028=*
regexpterm(#20028,27,#20027,0,"\p{Script_Extensions=Greek}")
#20029=@"loc,{#10000},1,3,1,29"
locations_default(#20029,#10000,1,3,1,29)
hasLocation(#20028,#20029)
unicode_property_escapename(#20028,"Script_Extensions")
unicode_property_escapevalue(#20028,"Greek")
#20030=*
regexpterm(#20030,27,#20027,1,"\p{Letter}")
#20031=@"loc,{#10000},1,32,1,41"
locations_default(#20031,#10000,1,32,1,41)
hasLocation(#20030,#20031)
unicode_property_escapename(#20030,"Letter")
#20032=*
stmts(#20032,2,#20001,1,"/[[abc]--[cbd]]/v;")
hasLocation(#20032,#20005)
stmt_containers(#20032,#20001)
#20033=*
regexpterm(#20033,27,#20025,1,"\p{Letter}")
#20034=@"loc,{#10000},1,32,1,41"
locations_default(#20034,#10000,1,32,1,41)
hasLocation(#20033,#20034)
unicode_property_escapename(#20033,"Letter")
#20035=*
stmts(#20035,2,#20001,1,"/[[abc]--[cbd]]/v;")
hasLocation(#20035,#20005)
stmt_containers(#20035,#20001)
exprs(#20033,5,#20032,0,"/[[abc]--[cbd]]/v")
hasLocation(#20033,#20013)
enclosing_stmt(#20033,#20032)
expr_containers(#20033,#20001)
literals("/[[abc]--[cbd]]/v","/[[abc]--[cbd]]/v",#20033)
#20034=*
regexpterm(#20034,23,#20033,0,"[[abc]--[cbd]]")
#20035=@"loc,{#10000},2,2,2,15"
locations_default(#20035,#10000,2,2,2,15)
hasLocation(#20034,#20035)
#20036=*
exprs(#20036,5,#20035,0,"/[[abc]--[cbd]]/v")
hasLocation(#20036,#20013)
enclosing_stmt(#20036,#20035)
expr_containers(#20036,#20001)
literals("/[[abc]--[cbd]]/v","/[[abc]--[cbd]]/v",#20036)
regexpterm(#20036,30,#20034,0,"[[abc]--[cbd]]")
hasLocation(#20036,#20035)
#20037=*
regexpterm(#20037,1,#20036,0,"[[abc]--[cbd]]")
#20038=@"loc,{#10000},2,2,2,15"
locations_default(#20038,#10000,2,2,2,15)
regexpterm(#20037,23,#20036,0,"[abc]")
#20038=@"loc,{#10000},2,3,2,7"
locations_default(#20038,#10000,2,3,2,7)
hasLocation(#20037,#20038)
#20039=*
regexpterm(#20039,23,#20037,0,"[[abc]--[cbd]")
#20040=@"loc,{#10000},2,2,2,14"
locations_default(#20040,#10000,2,2,2,14)
regexpterm(#20039,14,#20037,0,"a")
#20040=@"loc,{#10000},2,4,2,4"
locations_default(#20040,#10000,2,4,2,4)
hasLocation(#20039,#20040)
regexp_const_value(#20039,"a")
#20041=*
regexpterm(#20041,23,#20039,0,"[abc]")
#20042=@"loc,{#10000},2,3,2,7"
locations_default(#20042,#10000,2,3,2,7)
regexpterm(#20041,14,#20037,1,"b")
#20042=@"loc,{#10000},2,5,2,5"
locations_default(#20042,#10000,2,5,2,5)
hasLocation(#20041,#20042)
regexp_const_value(#20041,"b")
#20043=*
regexpterm(#20043,14,#20041,0,"a")
#20044=@"loc,{#10000},2,4,2,4"
locations_default(#20044,#10000,2,4,2,4)
regexpterm(#20043,14,#20037,2,"c")
#20044=@"loc,{#10000},2,6,2,6"
locations_default(#20044,#10000,2,6,2,6)
hasLocation(#20043,#20044)
regexp_const_value(#20043,"a")
regexp_const_value(#20043,"c")
#20045=*
regexpterm(#20045,14,#20041,1,"b")
#20046=@"loc,{#10000},2,5,2,5"
locations_default(#20046,#10000,2,5,2,5)
regexpterm(#20045,23,#20036,1,"[cbd]")
#20046=@"loc,{#10000},2,10,2,14"
locations_default(#20046,#10000,2,10,2,14)
hasLocation(#20045,#20046)
regexp_const_value(#20045,"b")
#20047=*
regexpterm(#20047,14,#20041,2,"c")
#20048=@"loc,{#10000},2,6,2,6"
locations_default(#20048,#10000,2,6,2,6)
regexpterm(#20047,14,#20045,0,"c")
#20048=@"loc,{#10000},2,11,2,11"
locations_default(#20048,#10000,2,11,2,11)
hasLocation(#20047,#20048)
regexp_const_value(#20047,"c")
#20049=*
regexpterm(#20049,24,#20039,1,"--[")
#20050=@"loc,{#10000},2,8,2,10"
locations_default(#20050,#10000,2,8,2,10)
regexpterm(#20049,14,#20045,1,"b")
#20050=@"loc,{#10000},2,12,2,12"
locations_default(#20050,#10000,2,12,2,12)
hasLocation(#20049,#20050)
regexp_const_value(#20049,"b")
#20051=*
regexpterm(#20051,14,#20049,0,"-")
#20052=@"loc,{#10000},2,8,2,8"
locations_default(#20052,#10000,2,8,2,8)
regexpterm(#20051,14,#20045,2,"d")
#20052=@"loc,{#10000},2,13,2,13"
locations_default(#20052,#10000,2,13,2,13)
hasLocation(#20051,#20052)
regexp_const_value(#20051,"-")
regexp_const_value(#20051,"d")
#20053=*
regexpterm(#20053,14,#20049,1,"[")
#20054=@"loc,{#10000},2,10,2,10"
locations_default(#20054,#10000,2,10,2,10)
hasLocation(#20053,#20054)
regexp_const_value(#20053,"[")
stmts(#20053,2,#20001,2,"/[[abc] ... de]]/v;")
hasLocation(#20053,#20007)
stmt_containers(#20053,#20001)
#20054=*
exprs(#20054,5,#20053,0,"/[[abc] ... bde]]/v")
hasLocation(#20054,#20017)
enclosing_stmt(#20054,#20053)
expr_containers(#20054,#20001)
literals("/[[abc]--[cbd]--[bde]]/v","/[[abc]--[cbd]--[bde]]/v",#20054)
#20055=*
regexpterm(#20055,14,#20039,2,"c")
#20056=@"loc,{#10000},2,11,2,11"
locations_default(#20056,#10000,2,11,2,11)
regexpterm(#20055,23,#20054,0,"[[abc]--[cbd]--[bde]]")
#20056=@"loc,{#10000},3,2,3,22"
locations_default(#20056,#10000,3,2,3,22)
hasLocation(#20055,#20056)
regexp_const_value(#20055,"c")
#20057=*
regexpterm(#20057,14,#20039,3,"b")
#20058=@"loc,{#10000},2,12,2,12"
locations_default(#20058,#10000,2,12,2,12)
hasLocation(#20057,#20058)
regexp_const_value(#20057,"b")
#20059=*
regexpterm(#20059,14,#20039,4,"d")
#20060=@"loc,{#10000},2,13,2,13"
locations_default(#20060,#10000,2,13,2,13)
hasLocation(#20059,#20060)
regexp_const_value(#20059,"d")
#20061=*
regexpterm(#20061,14,#20037,1,"]")
#20062=@"loc,{#10000},2,15,2,15"
locations_default(#20062,#10000,2,15,2,15)
hasLocation(#20061,#20062)
regexp_const_value(#20061,"]")
#20063=*
regexp_parse_errors(#20063,#20037,"unexpected character")
hasLocation(#20063,#20062)
regexpterm(#20057,30,#20055,0,"[[abc]--[cbd]--[bde]]")
hasLocation(#20057,#20056)
#20058=*
regexpterm(#20058,23,#20057,0,"[abc]")
#20059=@"loc,{#10000},3,3,3,7"
locations_default(#20059,#10000,3,3,3,7)
hasLocation(#20058,#20059)
#20060=*
regexpterm(#20060,14,#20058,0,"a")
#20061=@"loc,{#10000},3,4,3,4"
locations_default(#20061,#10000,3,4,3,4)
hasLocation(#20060,#20061)
regexp_const_value(#20060,"a")
#20062=*
regexpterm(#20062,14,#20058,1,"b")
#20063=@"loc,{#10000},3,5,3,5"
locations_default(#20063,#10000,3,5,3,5)
hasLocation(#20062,#20063)
regexp_const_value(#20062,"b")
#20064=*
stmts(#20064,2,#20001,2,"/[[abc] ... de]]/v;")
hasLocation(#20064,#20007)
stmt_containers(#20064,#20001)
#20065=*
exprs(#20065,5,#20064,0,"/[[abc] ... bde]]/v")
hasLocation(#20065,#20017)
enclosing_stmt(#20065,#20064)
expr_containers(#20065,#20001)
literals("/[[abc]--[cbd]--[bde]]/v","/[[abc]--[cbd]--[bde]]/v",#20065)
regexpterm(#20064,14,#20058,2,"c")
#20065=@"loc,{#10000},3,6,3,6"
locations_default(#20065,#10000,3,6,3,6)
hasLocation(#20064,#20065)
regexp_const_value(#20064,"c")
#20066=*
regexpterm(#20066,1,#20065,0,"[[abc]--[cbd]--[bde]]")
#20067=@"loc,{#10000},3,2,3,22"
locations_default(#20067,#10000,3,2,3,22)
regexpterm(#20066,23,#20057,1,"[cbd]")
#20067=@"loc,{#10000},3,10,3,14"
locations_default(#20067,#10000,3,10,3,14)
hasLocation(#20066,#20067)
#20068=*
regexpterm(#20068,23,#20066,0,"[[abc]--[cbd]")
#20069=@"loc,{#10000},3,2,3,14"
locations_default(#20069,#10000,3,2,3,14)
regexpterm(#20068,14,#20066,0,"c")
#20069=@"loc,{#10000},3,11,3,11"
locations_default(#20069,#10000,3,11,3,11)
hasLocation(#20068,#20069)
regexp_const_value(#20068,"c")
#20070=*
regexpterm(#20070,23,#20068,0,"[abc]")
#20071=@"loc,{#10000},3,3,3,7"
locations_default(#20071,#10000,3,3,3,7)
regexpterm(#20070,14,#20066,1,"b")
#20071=@"loc,{#10000},3,12,3,12"
locations_default(#20071,#10000,3,12,3,12)
hasLocation(#20070,#20071)
regexp_const_value(#20070,"b")
#20072=*
regexpterm(#20072,14,#20070,0,"a")
#20073=@"loc,{#10000},3,4,3,4"
locations_default(#20073,#10000,3,4,3,4)
regexpterm(#20072,14,#20066,2,"d")
#20073=@"loc,{#10000},3,13,3,13"
locations_default(#20073,#10000,3,13,3,13)
hasLocation(#20072,#20073)
regexp_const_value(#20072,"a")
regexp_const_value(#20072,"d")
#20074=*
regexpterm(#20074,14,#20070,1,"b")
#20075=@"loc,{#10000},3,5,3,5"
locations_default(#20075,#10000,3,5,3,5)
regexpterm(#20074,23,#20057,2,"[bde]")
#20075=@"loc,{#10000},3,17,3,21"
locations_default(#20075,#10000,3,17,3,21)
hasLocation(#20074,#20075)
regexp_const_value(#20074,"b")
#20076=*
regexpterm(#20076,14,#20070,2,"c")
#20077=@"loc,{#10000},3,6,3,6"
locations_default(#20077,#10000,3,6,3,6)
regexpterm(#20076,14,#20074,0,"b")
#20077=@"loc,{#10000},3,18,3,18"
locations_default(#20077,#10000,3,18,3,18)
hasLocation(#20076,#20077)
regexp_const_value(#20076,"c")
regexp_const_value(#20076,"b")
#20078=*
regexpterm(#20078,24,#20068,1,"--[")
#20079=@"loc,{#10000},3,8,3,10"
locations_default(#20079,#10000,3,8,3,10)
regexpterm(#20078,14,#20074,1,"d")
#20079=@"loc,{#10000},3,19,3,19"
locations_default(#20079,#10000,3,19,3,19)
hasLocation(#20078,#20079)
regexp_const_value(#20078,"d")
#20080=*
regexpterm(#20080,14,#20078,0,"-")
#20081=@"loc,{#10000},3,8,3,8"
locations_default(#20081,#10000,3,8,3,8)
regexpterm(#20080,14,#20074,2,"e")
#20081=@"loc,{#10000},3,20,3,20"
locations_default(#20081,#10000,3,20,3,20)
hasLocation(#20080,#20081)
regexp_const_value(#20080,"-")
regexp_const_value(#20080,"e")
#20082=*
regexpterm(#20082,14,#20078,1,"[")
#20083=@"loc,{#10000},3,10,3,10"
locations_default(#20083,#10000,3,10,3,10)
entry_cfg_node(#20082,#20001)
#20083=@"loc,{#10000},1,1,1,0"
locations_default(#20083,#10000,1,1,1,0)
hasLocation(#20082,#20083)
regexp_const_value(#20082,"[")
#20084=*
regexpterm(#20084,14,#20068,2,"c")
#20085=@"loc,{#10000},3,11,3,11"
locations_default(#20085,#10000,3,11,3,11)
hasLocation(#20084,#20085)
regexp_const_value(#20084,"c")
#20086=*
regexpterm(#20086,14,#20068,3,"b")
#20087=@"loc,{#10000},3,12,3,12"
locations_default(#20087,#10000,3,12,3,12)
hasLocation(#20086,#20087)
regexp_const_value(#20086,"b")
#20088=*
regexpterm(#20088,14,#20068,4,"d")
#20089=@"loc,{#10000},3,13,3,13"
locations_default(#20089,#10000,3,13,3,13)
hasLocation(#20088,#20089)
regexp_const_value(#20088,"d")
#20090=*
regexpterm(#20090,14,#20066,1,"--")
#20091=@"loc,{#10000},3,15,3,16"
locations_default(#20091,#10000,3,15,3,16)
hasLocation(#20090,#20091)
regexp_const_value(#20090,"--")
#20092=*
regexpterm(#20092,23,#20066,2,"[bde]")
#20093=@"loc,{#10000},3,17,3,21"
locations_default(#20093,#10000,3,17,3,21)
hasLocation(#20092,#20093)
#20094=*
regexpterm(#20094,14,#20092,0,"b")
#20095=@"loc,{#10000},3,18,3,18"
locations_default(#20095,#10000,3,18,3,18)
hasLocation(#20094,#20095)
regexp_const_value(#20094,"b")
#20096=*
regexpterm(#20096,14,#20092,1,"d")
#20097=@"loc,{#10000},3,19,3,19"
locations_default(#20097,#10000,3,19,3,19)
hasLocation(#20096,#20097)
regexp_const_value(#20096,"d")
#20098=*
regexpterm(#20098,14,#20092,2,"e")
#20099=@"loc,{#10000},3,20,3,20"
locations_default(#20099,#10000,3,20,3,20)
hasLocation(#20098,#20099)
regexp_const_value(#20098,"e")
#20100=*
regexpterm(#20100,14,#20066,3,"]")
#20101=@"loc,{#10000},3,22,3,22"
locations_default(#20101,#10000,3,22,3,22)
hasLocation(#20100,#20101)
regexp_const_value(#20100,"]")
#20102=*
regexp_parse_errors(#20102,#20066,"unexpected character")
hasLocation(#20102,#20101)
#20103=*
entry_cfg_node(#20103,#20001)
#20104=@"loc,{#10000},1,1,1,0"
locations_default(#20104,#10000,1,1,1,0)
hasLocation(#20103,#20104)
#20105=*
exit_cfg_node(#20105,#20001)
hasLocation(#20105,#20021)
successor(#20064,#20065)
successor(#20065,#20105)
successor(#20035,#20036)
successor(#20036,#20064)
exit_cfg_node(#20084,#20001)
hasLocation(#20084,#20021)
successor(#20053,#20054)
successor(#20054,#20084)
successor(#20032,#20033)
successor(#20033,#20053)
successor(#20023,#20024)
successor(#20024,#20035)
successor(#20103,#20023)
successor(#20024,#20032)
successor(#20082,#20023)
numlines(#10000,3,3,0)
filetype(#10000,"javascript")