C#: Extractor support for UTF-8 encoded strings.

This commit is contained in:
Michael Nebel
2023-01-19 12:22:36 +01:00
parent 72fa3bd905
commit e8eedb7b4d
6 changed files with 26 additions and 5 deletions

View File

@@ -2,7 +2,6 @@ using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Semmle.Extraction.CSharp.Populators;
using Semmle.Extraction.Entities;
using Semmle.Extraction.Kinds;
namespace Semmle.Extraction.CSharp.Entities
@@ -107,6 +106,11 @@ namespace Semmle.Extraction.CSharp.Entities
return Expression.ValueAsString(val);
}
if (TryGetStringValueFromUtf8Literal(out var s))
{
return s;
}
return null;
}
}
@@ -181,6 +185,17 @@ namespace Semmle.Extraction.CSharp.Entities
return isTrue || isFalse;
}
private bool TryGetStringValueFromUtf8Literal(out string? value)
{
value = null;
if (Node.IsKind(SyntaxKind.Utf8StringLiteralExpression) && Node is LiteralExpressionSyntax literal)
{
value = literal.Token.ValueText;
return true;
}
return false;
}
public bool IsBoolLiteral()
{
return TryGetBoolValueFromLiteral(out var _);

View File

@@ -47,6 +47,7 @@ namespace Semmle.Extraction.CSharp.Entities.Expressions
case SyntaxKind.FalseLiteralExpression:
case SyntaxKind.TrueLiteralExpression:
case SyntaxKind.StringLiteralExpression:
case SyntaxKind.Utf8StringLiteralExpression:
case SyntaxKind.NullLiteralExpression:
case SyntaxKind.NumericLiteralExpression:
case SyntaxKind.CharacterLiteralExpression:

View File

@@ -27,7 +27,7 @@ namespace Semmle.Extraction.CSharp.Entities.Expressions
case SyntaxKind.InterpolatedStringText:
// Create a string literal
var interpolatedText = (InterpolatedStringTextSyntax)c;
new Expression(new ExpressionInfo(Context, Type, Context.CreateLocation(c.GetLocation()), ExprKind.STRING_LITERAL, this, child++, false, interpolatedText.TextToken.ValueText));
new Expression(new ExpressionInfo(Context, Type, Context.CreateLocation(c.GetLocation()), ExprKind.UTF16_STRING_LITERAL, this, child++, false, interpolatedText.TextToken.ValueText));
break;
default:
throw new InternalError(c, $"Unhandled interpolation kind {c.Kind()}");

View File

@@ -20,6 +20,8 @@ namespace Semmle.Extraction.CSharp.Entities.Expressions
{
case SyntaxKind.DefaultLiteralExpression:
return ExprKind.DEFAULT;
case SyntaxKind.Utf8StringLiteralExpression:
return ExprKind.UTF8_STRING_LITERAL;
case SyntaxKind.NullLiteralExpression:
info.SetType(null); // Don't use converted type.
return ExprKind.NULL_LITERAL;
@@ -63,7 +65,7 @@ namespace Semmle.Extraction.CSharp.Entities.Expressions
return ExprKind.FLOAT_LITERAL;
case SpecialType.System_String:
return ExprKind.STRING_LITERAL;
return ExprKind.UTF16_STRING_LITERAL;
case SpecialType.System_UInt16:
case SpecialType.System_UInt32:

View File

@@ -15,7 +15,7 @@ namespace Semmle.Extraction.Kinds
ULONG_LITERAL = 7,
FLOAT_LITERAL = 8,
DOUBLE_LITERAL = 9,
STRING_LITERAL = 10,
UTF16_STRING_LITERAL = 10,
NULL_LITERAL = 11,
THIS_ACCESS = 12,
BASE_ACCESS = 13,
@@ -129,6 +129,7 @@ namespace Semmle.Extraction.Kinds
SLICE_PATTERN = 132,
URSHIFT = 133,
ASSIGN_URSHIFT = 134,
UTF8_STRING_LITERAL = 135,
DEFINE_SYMBOL = 999,
}
}

View File

@@ -1005,7 +1005,7 @@ case @expr.kind of
| 7 = @ulong_literal_expr
| 8 = @float_literal_expr
| 9 = @double_literal_expr
| 10 = @string_literal_expr
| 10 = @utf16_string_literal_expr
| 11 = @null_literal_expr
/* primary & unary */
| 12 = @this_access_expr
@@ -1139,6 +1139,7 @@ case @expr.kind of
| 132 = @slice_pattern_expr
| 133 = @urshift_expr
| 134 = @assign_urshift_expr
| 135 = @utf8_string_literal_expr
/* Preprocessor */
| 999 = @define_symbol_expr
;
@@ -1152,6 +1153,7 @@ case @expr.kind of
@integer_literal_expr = @int_literal_expr | @long_literal_expr | @uint_literal_expr | @ulong_literal_expr;
@real_literal_expr = @float_literal_expr | @double_literal_expr | @decimal_literal_expr;
@string_literal_expr = @utf16_string_literal_expr | @utf8_string_literal_expr;
@literal_expr = @bool_literal_expr | @char_literal_expr | @integer_literal_expr | @real_literal_expr
| @string_literal_expr | @null_literal_expr;