mirror of
https://github.com/github/codeql.git
synced 2026-06-03 04:40:14 +02:00
Generalize endpoint tokenization to work correctly across multiple lines
This commit is contained in:
@@ -226,24 +226,38 @@ class CodexPrompt extends EndpointFeature, TCodexPrompt {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the location of `node` contains the location of `token`:
|
||||
* both are on the same single line of code and
|
||||
* the column range of `node` equals or contains
|
||||
* the column range of `token`.
|
||||
* Holds if the location of `node` contains the location of `token`
|
||||
*/
|
||||
cached
|
||||
predicate containsToken(AstNode node, Token token) {
|
||||
exists(string file, int line, int sc, int ec, int tsc, int tec |
|
||||
node.getLocation().hasLocationInfo(file, line, sc, line, ec) and
|
||||
token.getLocation().hasLocationInfo(file, line, tsc, line, tec) and
|
||||
sc <= tsc and
|
||||
tec <= ec
|
||||
exists(
|
||||
string file, int node_start_line, int node_start_column, int node_end_line,
|
||||
int node_end_column, int token_start_line, int token_start_column, int token_end_line,
|
||||
int token_end_column
|
||||
|
|
||||
node.getLocation()
|
||||
.hasLocationInfo(file, node_start_line, node_start_column, node_end_line, node_end_column) and
|
||||
token
|
||||
.getLocation()
|
||||
.hasLocationInfo(file, token_start_line, token_start_column, token_end_line,
|
||||
token_end_column) and
|
||||
(
|
||||
node_start_line < token_start_line
|
||||
or
|
||||
node_start_line = token_start_line and
|
||||
node_start_column <= token_start_column
|
||||
) and
|
||||
(
|
||||
node_end_line > token_end_line
|
||||
or
|
||||
node_end_line = token_end_line and
|
||||
node_end_column >= token_end_column
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the reconstructed source code text for `node`,
|
||||
* assuming it is on a single line of code.
|
||||
* Gets the reconstructed source code text for `node`.
|
||||
*/
|
||||
string tokenise(DataFlow::Node node) {
|
||||
result =
|
||||
@@ -254,6 +268,7 @@ class CodexPrompt extends EndpointFeature, TCodexPrompt {
|
||||
// Use space as the separator, since that is most likely.
|
||||
// May not be an exact reconstruction, e.g. if the code
|
||||
// had newlines between successive tokens.
|
||||
// TODO: Don't add a space if the current or previous token is a period.
|
||||
" "
|
||||
order by
|
||||
token.getLocation().getStartLine(), token.getLocation().getStartColumn()
|
||||
|
||||
Reference in New Issue
Block a user