Python: Fix bad join in StrConst::isUnicode

Also fixes a bug ("`B`" was not recognised as a bytestring prefix).

The basic idea behind this fix is that the set of possible prefixes is
fairly small, so it's easier just to precompute them, and then join
them with the entire prefix of the string in question (rather than
look at each string in isolation, get its prefix, and _then_ check
whether it looks like it's a unicode string prefix, which essentially
is what the code did before).
This commit is contained in:
Taus Brock-Nannestad
2020-11-05 16:45:27 +01:00
parent 1251bc57f5
commit bae4acabb1

View File

@@ -584,18 +584,40 @@ class Slice extends Slice_ {
}
}
/**
* Returns all string prefixes in the database that are explicitly marked as Unicode strings.
*
* Helper predicate for `StrConst::isUnicode`.
*/
pragma[nomagic]
private string unicode_prefix() {
result = any(Str_ s).getPrefix() and
result.charAt(_) in ["u", "U"]
}
/**
* Returns all string prefixes in the database that are _not_ explicitly marked as bytestrings.
*
* Helper predicate for `StrConst::isUnicode`.
*/
pragma[nomagic]
private string non_byte_prefix() {
result = any(Str_ s).getPrefix() and
not result.charAt(_) in ["b", "B"]
}
/** A string constant. */
class StrConst extends Str_, ImmutableLiteral {
/* syntax: "hello" */
predicate isUnicode() {
this.getPrefix().charAt(_) = "u"
this.getPrefix() = unicode_prefix()
or
this.getPrefix().charAt(_) = "U"
or
not this.getPrefix().charAt(_) = "b" and major_version() = 3
or
not this.getPrefix().charAt(_) = "b" and
this.getEnclosingModule().hasFromFuture("unicode_literals")
this.getPrefix() = non_byte_prefix() and
(
major_version() = 3
or
this.getEnclosingModule().hasFromFuture("unicode_literals")
)
}
deprecated override string strValue() { result = this.getS() }