Files
codeql/python/ql/test/library-tests/regex/test.py
Taus Brock-Nannestad e812eb777d Python: Port URL sanitisation queries to API graphs
Really, this boils down to "Port `re` library model to use API graphs
instead of points-to", which is what this PR actually does.

Instead of using points-to to track flags, we use a type tracker. To
handle multiple flags at the same time, we add additional flow from

`x` to `x | y` and `y | x`

and, as an added bonus, the above with `+` instead of `|`, neatly
fixing https://github.com/github/codeql/issues/4707

I had to modify the `Qualified.ql` test slightly, as it now had a
result stemming from the standard library (in `warnings.py`) that
points-to previously ignored.

It might be possible to implement this as a type tracker on
`LocalSourceNode`s, but with the added steps for the above operations,
this was not obvious to me, and so I opted for the simpler
"`smallstep`" variant.
2021-02-23 22:02:35 +01:00

73 lines
1.7 KiB
Python

import re
# 0123456789ABCDEF
re.compile(r'012345678')
re.compile(r'(\033|~{)')
re.compile(r'\A[+-]?\d+')
re.compile(r'(?P<name>[\w]+)|')
re.compile(r'\|\[\][123]|\{\}')
re.compile(r'^.$')
re.compile(r'[^A-Z]')
# 0123456789ABCDEF
re.sub('(?m)^(?!$)', indent*' ', s)
re.compile("(?:(?:\n\r?)|^)( *)\S")
re.compile("[]]")
re.compile("[^]]")
re.compile("[^-]")
#Lookbehind group
re.compile(r'x|(?<!\w)l')
#braces, not qualifier
re.compile(r"x{Not qual}")
#Multiple carets and dollars
re.compile("^(^y|^z)(u$|v$)$")
#Multiples
re.compile("ax{3}")
re.compile("ax{,3}")
re.compile("ax{3,}")
re.compile("ax{01,3}")
#Negative lookahead
re.compile(r'(?!not-this)^[A-Z_]+$')
#Negative lookbehind
re.compile(r'^[A-Z_]+$(?<!not-this)')
#OK -- ODASA-ODASA-3968
re.compile('(?:[^%]|^)?%\((\w*)\)[a-z]')
#ODASA-3985
#Half Surrogate pairs
re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
#Outside BMP
re.compile(u'[\U00010000-\U0010ffff]')
#Modes
re.compile("", re.VERBOSE)
re.compile("", flags=re.VERBOSE)
re.compile("", re.VERBOSE|re.DOTALL)
re.compile("", flags=re.VERBOSE|re.IGNORECASE)
re.search("", None, re.UNICODE)
x = re.search("", flags=re.UNICODE)
# using addition for flags was reported as FP in https://github.com/github/codeql/issues/4707
re.compile("", re.VERBOSE+re.DOTALL)
# re.X is an alias for re.VERBOSE
re.compile("", re.X)
#empty choice
re.compile(r'|x')
re.compile(r'x|')
#Named group with caret and empty choice.
re.compile(r'(?:(?P<n1>^(?:|x)))')
#Misparsed on LGTM
re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)")
re.compile("", re.M) # ODASA-8056
# FP reported in https://github.com/github/codeql/issues/3712
# This does not define a regex (but could be used by other code to do so)
escaped = re.escape("https://www.humblebundle.com/home/library")