From 2f1cfa816fd7eefb3bab86dae9ed1344479b8607 Mon Sep 17 00:00:00 2001 From: thiggy1342 Date: Thu, 7 Jul 2022 19:23:06 +0000 Subject: [PATCH 01/22] Add annotate arguments as sqli sink --- .../security/cwe-089/ActiveRecordInjection.rb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ruby/ql/test/query-tests/security/cwe-089/ActiveRecordInjection.rb b/ruby/ql/test/query-tests/security/cwe-089/ActiveRecordInjection.rb index 9c314a82b34..4c8dfcca10d 100644 --- a/ruby/ql/test/query-tests/security/cwe-089/ActiveRecordInjection.rb +++ b/ruby/ql/test/query-tests/security/cwe-089/ActiveRecordInjection.rb @@ -137,3 +137,17 @@ class BazController < BarController Admin.delete_by(params[:admin_condition]) end end + +class AnnotatedController < ActionController::Base + def index + name = params[:user_name] + # GOOD: string literal arguments not controlled by user are safe for annotations + users = User.annotate("this is a safe annotation").find_by(user_name: name) + end + + def unsafe_action + name = params[:user_name] + # BAD: user input passed into annotations are vulnerable to SQLi + users = User.annotate("this is an unsafe annotation:#{params[:comment]}").find_by(user_name: name) + end +end From b4869158f2970dc462c78e2ebafcf90ea4b83f33 Mon Sep 17 00:00:00 2001 From: thiggy1342 Date: Thu, 7 Jul 2022 19:23:57 +0000 Subject: [PATCH 02/22] expand query tests for cwe-089 --- ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll | 5 +++++ .../test/query-tests/security/cwe-089/SqlInjection.expected | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index cc63e64a083..b56304a657b 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -133,6 +133,11 @@ private Expr sqlFragmentArgument(MethodCall call) { or methodName = "reload" and result = call.getKeywordArgument("lock") + or + // Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to + // SQLi if user supplied input is passed in as an argument. + methodName = "annotate" and + result = call.getArgument(_) ) ) } diff --git a/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected b/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected index 6a9f5f771fb..dc814977cae 100644 --- a/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected +++ b/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected @@ -31,6 +31,8 @@ edges | ActiveRecordInjection.rb:99:11:99:17 | ...[...] : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | | ActiveRecordInjection.rb:137:21:137:26 | call to params : | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | ActiveRecordInjection.rb:20:22:20:30 | condition : | +| ActiveRecordInjection.rb:151:59:151:64 | call to params : | ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | +| ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | nodes | ActiveRecordInjection.rb:8:25:8:28 | name : | semmle.label | name : | | ActiveRecordInjection.rb:8:31:8:34 | pass : | semmle.label | pass : | @@ -80,6 +82,9 @@ nodes | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | semmle.label | ... + ... | | ActiveRecordInjection.rb:137:21:137:26 | call to params : | semmle.label | call to params : | | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | semmle.label | ...[...] : | +| ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | semmle.label | "this is an unsafe annotation:..." | +| ActiveRecordInjection.rb:151:59:151:64 | call to params : | semmle.label | call to params : | +| ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | semmle.label | ...[...] : | subpaths #select | ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | ActiveRecordInjection.rb:70:23:70:28 | call to params : | ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | This SQL query depends on $@. | ActiveRecordInjection.rb:70:23:70:28 | call to params | a user-provided value | @@ -99,3 +104,4 @@ subpaths | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | ActiveRecordInjection.rb:88:18:88:23 | call to params : | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:88:18:88:23 | call to params | a user-provided value | | ActiveRecordInjection.rb:92:21:92:35 | ...[...] | ActiveRecordInjection.rb:92:21:92:26 | call to params : | ActiveRecordInjection.rb:92:21:92:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:92:21:92:26 | call to params | a user-provided value | | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | ActiveRecordInjection.rb:98:10:98:15 | call to params : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | This SQL query depends on $@. | ActiveRecordInjection.rb:98:10:98:15 | call to params | a user-provided value | +| ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | ActiveRecordInjection.rb:151:59:151:64 | call to params : | ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | This SQL query depends on $@. | ActiveRecordInjection.rb:151:59:151:64 | call to params | a user-provided value | From 940254d2516be52e2aace4a27083d407b9bcb83d Mon Sep 17 00:00:00 2001 From: thiggy1342 Date: Thu, 7 Jul 2022 19:39:59 +0000 Subject: [PATCH 03/22] update framework tests --- .../library-tests/frameworks/ActiveRecord.expected | 4 ++++ ruby/ql/test/library-tests/frameworks/ActiveRecord.rb | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/ruby/ql/test/library-tests/frameworks/ActiveRecord.expected b/ruby/ql/test/library-tests/frameworks/ActiveRecord.expected index d8509f6a218..b416d853440 100644 --- a/ruby/ql/test/library-tests/frameworks/ActiveRecord.expected +++ b/ruby/ql/test/library-tests/frameworks/ActiveRecord.expected @@ -22,6 +22,7 @@ activeRecordSqlExecutionRanges | ActiveRecord.rb:46:20:46:32 | ... + ... | | ActiveRecord.rb:52:16:52:28 | "name #{...}" | | ActiveRecord.rb:56:20:56:39 | "username = #{...}" | +| ActiveRecord.rb:78:27:78:76 | "this is an unsafe annotation:..." | activeRecordModelClassMethodCalls | ActiveRecord.rb:2:3:2:17 | call to has_many | | ActiveRecord.rb:6:3:6:24 | call to belongs_to | @@ -44,6 +45,8 @@ activeRecordModelClassMethodCalls | ActiveRecord.rb:60:5:60:33 | call to find_by | | ActiveRecord.rb:62:5:62:34 | call to find | | ActiveRecord.rb:68:5:68:45 | call to delete_by | +| ActiveRecord.rb:74:13:74:54 | call to annotate | +| ActiveRecord.rb:78:13:78:77 | call to annotate | potentiallyUnsafeSqlExecutingMethodCall | ActiveRecord.rb:9:5:9:68 | call to find | | ActiveRecord.rb:19:5:19:25 | call to destroy_by | @@ -55,6 +58,7 @@ potentiallyUnsafeSqlExecutingMethodCall | ActiveRecord.rb:46:5:46:33 | call to delete_by | | ActiveRecord.rb:52:5:52:29 | call to order | | ActiveRecord.rb:56:7:56:40 | call to find_by | +| ActiveRecord.rb:78:13:78:77 | call to annotate | activeRecordModelInstantiations | ActiveRecord.rb:9:5:9:68 | call to find | ActiveRecord.rb:5:1:15:3 | User | | ActiveRecord.rb:13:5:13:40 | call to find_by | ActiveRecord.rb:1:1:3:3 | UserGroup | diff --git a/ruby/ql/test/library-tests/frameworks/ActiveRecord.rb b/ruby/ql/test/library-tests/frameworks/ActiveRecord.rb index 5045a5c2264..d25cbf901c3 100644 --- a/ruby/ql/test/library-tests/frameworks/ActiveRecord.rb +++ b/ruby/ql/test/library-tests/frameworks/ActiveRecord.rb @@ -68,3 +68,13 @@ class BazController < BarController Admin.delete_by(params[:admin_condition]) end end + +class AnnotatedController < ActionController::Base + def index + users = User.annotate("this is a safe annotation") + end + + def unsafe_action + users = User.annotate("this is an unsafe annotation:#{params[:comment]}") + end +end From 11e39aa0303d7207f5a03981fe69c651232828e6 Mon Sep 17 00:00:00 2001 From: thiggy1342 Date: Thu, 7 Jul 2022 21:40:16 +0000 Subject: [PATCH 04/22] Add changelog --- ruby/ql/lib/change-notes/released/0.3.1.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 ruby/ql/lib/change-notes/released/0.3.1.md diff --git a/ruby/ql/lib/change-notes/released/0.3.1.md b/ruby/ql/lib/change-notes/released/0.3.1.md new file mode 100644 index 00000000000..64efa15884a --- /dev/null +++ b/ruby/ql/lib/change-notes/released/0.3.1.md @@ -0,0 +1,5 @@ +## 0.3.1 + +### Minor Analysis Improvements + +- Calls to `ActiveRecord::Relation#annotate` have now been added to `ActiveRecordModelClass#sqlFragmentArgument` so that it can be used as a sink for queries like rb/sql-injection. \ No newline at end of file From bd50fd7f1e5413d1bd92ee95f43f15a46e63f7b0 Mon Sep 17 00:00:00 2001 From: thiggy1342 Date: Fri, 8 Jul 2022 17:20:41 +0000 Subject: [PATCH 05/22] format fix --- ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index b56304a657b..142d1455ce4 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -135,7 +135,7 @@ private Expr sqlFragmentArgument(MethodCall call) { result = call.getKeywordArgument("lock") or // Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to - // SQLi if user supplied input is passed in as an argument. + // SQLi if user supplied input is passed in as an argument. methodName = "annotate" and result = call.getArgument(_) ) From e8e8da1b3189adba08fe944db5df482287b52a49 Mon Sep 17 00:00:00 2001 From: thiggy1342 Date: Fri, 8 Jul 2022 19:01:01 +0000 Subject: [PATCH 06/22] fix lib test expect for ActionController --- .../test/library-tests/frameworks/ActionController.expected | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ruby/ql/test/library-tests/frameworks/ActionController.expected b/ruby/ql/test/library-tests/frameworks/ActionController.expected index d306f09b64b..52ab15995c7 100644 --- a/ruby/ql/test/library-tests/frameworks/ActionController.expected +++ b/ruby/ql/test/library-tests/frameworks/ActionController.expected @@ -2,6 +2,7 @@ actionControllerControllerClasses | ActiveRecord.rb:23:1:39:3 | FooController | | ActiveRecord.rb:41:1:64:3 | BarController | | ActiveRecord.rb:66:1:70:3 | BazController | +| ActiveRecord.rb:72:1:80:3 | AnnotatedController | | app/controllers/comments_controller.rb:1:1:7:3 | CommentsController | | app/controllers/foo/bars_controller.rb:3:1:39:3 | BarsController | | app/controllers/photos_controller.rb:1:1:4:3 | PhotosController | @@ -12,6 +13,8 @@ actionControllerActionMethods | ActiveRecord.rb:42:3:47:5 | some_other_request_handler | | ActiveRecord.rb:49:3:63:5 | safe_paths | | ActiveRecord.rb:67:3:69:5 | yet_another_handler | +| ActiveRecord.rb:73:3:75:5 | index | +| ActiveRecord.rb:77:3:79:5 | unsafe_action | | app/controllers/comments_controller.rb:2:3:3:5 | index | | app/controllers/comments_controller.rb:5:3:6:5 | show | | app/controllers/foo/bars_controller.rb:5:3:7:5 | index | @@ -38,6 +41,7 @@ paramsCalls | ActiveRecord.rb:59:12:59:17 | call to params | | ActiveRecord.rb:62:15:62:20 | call to params | | ActiveRecord.rb:68:21:68:26 | call to params | +| ActiveRecord.rb:78:59:78:64 | call to params | | app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params | | app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params | | app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params | @@ -57,6 +61,7 @@ paramsSources | ActiveRecord.rb:59:12:59:17 | call to params | | ActiveRecord.rb:62:15:62:20 | call to params | | ActiveRecord.rb:68:21:68:26 | call to params | +| ActiveRecord.rb:78:59:78:64 | call to params | | app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params | | app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params | | app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params | From 8ca7d7d775cf8540acbb7539fad7830c04eb3ed2 Mon Sep 17 00:00:00 2001 From: thiggy1342 Date: Thu, 14 Jul 2022 00:22:38 +0000 Subject: [PATCH 07/22] update change note --- ruby/ql/lib/change-notes/released/0.3.1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ruby/ql/lib/change-notes/released/0.3.1.md b/ruby/ql/lib/change-notes/released/0.3.1.md index 64efa15884a..392aa6f0b27 100644 --- a/ruby/ql/lib/change-notes/released/0.3.1.md +++ b/ruby/ql/lib/change-notes/released/0.3.1.md @@ -2,4 +2,4 @@ ### Minor Analysis Improvements -- Calls to `ActiveRecord::Relation#annotate` have now been added to `ActiveRecordModelClass#sqlFragmentArgument` so that it can be used as a sink for queries like rb/sql-injection. \ No newline at end of file +- Calls to `ActiveRecord::Relation#annotate` are now recognized as`SqlExecution`s so that it will be considered as a sink for queries like rb/sql-injection. \ No newline at end of file From 39fb714ad17e91cefbb3fa9a7160836ef8f3a303 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:54:35 +0100 Subject: [PATCH 08/22] Swift: Add test with substring declared differently. --- .../CWE-135/StringLengthConflation.expected | 4 ++ .../CWE-135/StringLengthConflation2.swift | 42 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation2.swift diff --git a/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation.expected b/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation.expected index 1af416dad2f..e5191b6446d 100644 --- a/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation.expected +++ b/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation.expected @@ -1,4 +1,5 @@ edges +| StringLengthConflation2.swift:37:34:37:36 | .count : | StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | | StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | | StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | | StringLengthConflation.swift:93:28:93:31 | .length : | StringLengthConflation.swift:93:28:93:40 | ... call to -(_:_:) ... | @@ -15,6 +16,8 @@ edges | StringLengthConflation.swift:135:36:135:38 | .count : | StringLengthConflation.swift:135:36:135:46 | ... call to -(_:_:) ... | | StringLengthConflation.swift:141:28:141:30 | .count : | StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... | nodes +| StringLengthConflation2.swift:37:34:37:36 | .count : | semmle.label | .count : | +| StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... | | StringLengthConflation.swift:53:43:53:46 | .length | semmle.label | .length | | StringLengthConflation.swift:60:47:60:50 | .length : | semmle.label | .length : | | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | semmle.label | ... call to /(_:_:) ... | @@ -50,6 +53,7 @@ nodes | StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... | subpaths #select +| StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | StringLengthConflation2.swift:37:34:37:36 | .count : | StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | This String length is used in an NSString, but it may not be equivalent. | | StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | This NSString length is used in a String, but it may not be equivalent. | | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. | | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. | diff --git a/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation2.swift b/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation2.swift new file mode 100644 index 00000000000..39b1456e604 --- /dev/null +++ b/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation2.swift @@ -0,0 +1,42 @@ +// this test is in a separate file, because we want to test with a slightly different library +// implementation. In this version, some of the functions of `NSString` are in fact implemented +// in a base class `NSStringBase`. + +// --- stubs --- + +func print(_ items: Any...) {} + +typealias unichar = UInt16 + +class NSObject +{ +} + +class NSStringBase : NSObject +{ + func substring(from: Int) -> String { return "" } +} + +class NSString : NSStringBase +{ + init(string: String) { length = string.count } + + func substring(to: Int) -> String { return "" } + + private(set) var length: Int +} + +// --- tests --- + +func test(s: String) { + let ns = NSString(string: s) + + let nstr1 = ns.substring(from: ns.length - 1) // GOOD + let nstr2 = ns.substring(from: s.count - 1) // BAD: String length used in NSString [NOT DETECTED] + let nstr3 = ns.substring(to: ns.length - 1) // GOOD + let nstr4 = ns.substring(to: s.count - 1) // BAD: String length used in NSString + print("substrings '\(nstr1)' '\(nstr2)' / '\(nstr3)' '\(nstr4)'") +} + +// `begin :thumbsup: end`, with thumbs up emoji and skin tone modifier +test(s: "begin \u{0001F44D}\u{0001F3FF} end") From 4854679a4010f415e80151c4f797afff3e1f2ed0 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 18 Jul 2022 13:04:20 +0100 Subject: [PATCH 09/22] Swift: Clean up isSink (1 - move common variables to an outer exists). --- .../CWE-135/StringLengthConflation.ql | 163 +++++++++--------- 1 file changed, 81 insertions(+), 82 deletions(-) diff --git a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql index 398c48f01e5..fa5c5f25e9d 100644 --- a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql +++ b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql @@ -41,88 +41,87 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { } override predicate isSink(DataFlow::Node node, string flowstate) { - // arguments to method calls... - exists( - string className, string methodName, string paramName, ClassDecl c, AbstractFunctionDecl f, - CallExpr call, int arg - | - ( - // `NSRange.init` - className = "NSRange" and - methodName = "init(location:length:)" and - paramName = ["location", "length"] - or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - methodName = "character(at:)" and - paramName = "at" - or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - methodName = "substring(from:)" and - paramName = "from" - or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - methodName = "substring(to:)" and - paramName = "to" - or - // `NSMutableString.insert` - className = "NSMutableString" and - methodName = "insert(_:at:)" and - paramName = "at" - ) and - c.getName() = className and - c.getAMember() = f and // TODO: will this even work if its defined in a parent class? - call.getFunction().(ApplyExpr).getStaticTarget() = f and - f.getName() = methodName and - f.getParam(pragma[only_bind_into](arg)).getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "String" // `String` length flowing into `NSString` - ) - or - // arguments to function calls... - exists(string funcName, string paramName, CallExpr call, int arg | - // `NSMakeRange` - funcName = "NSMakeRange(_:_:)" and - paramName = ["loc", "len"] and - call.getStaticTarget().getName() = funcName and - call.getStaticTarget().getParam(pragma[only_bind_into](arg)).getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "String" // `String` length flowing into `NSString` - ) - or - // arguments to function calls... - exists(string funcName, string paramName, CallExpr call, int arg | - ( - // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` - funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and - paramName = "k" - or - // `String.prefix`, `String.suffix` - funcName = ["prefix(_:)", "suffix(_:)"] and - paramName = "maxLength" - or - // `String.Index.init` - funcName = "init(encodedOffset:)" and - paramName = "offset" - or - // `String.index` - funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and - paramName = "n" - or - // `String.formIndex` - funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and - paramName = "distance" - ) and - call.getFunction().(ApplyExpr).getStaticTarget().getName() = funcName and - call.getFunction() - .(ApplyExpr) - .getStaticTarget() - .getParam(pragma[only_bind_into](arg)) - .getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "NSString" // `NSString` length flowing into `String` + exists(CallExpr call, string paramName, int arg | + // arguments to method calls... + exists(string className, string methodName, ClassDecl c, AbstractFunctionDecl f | + ( + // `NSRange.init` + className = "NSRange" and + methodName = "init(location:length:)" and + paramName = ["location", "length"] + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + methodName = "character(at:)" and + paramName = "at" + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + methodName = "substring(from:)" and + paramName = "from" + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + methodName = "substring(to:)" and + paramName = "to" + or + // `NSMutableString.insert` + className = "NSMutableString" and + methodName = "insert(_:at:)" and + paramName = "at" + ) and + c.getName() = className and + c.getAMember() = f and // TODO: will this even work if its defined in a parent class? + call.getFunction().(ApplyExpr).getStaticTarget() = f and + f.getName() = methodName and + f.getParam(pragma[only_bind_into](arg)).getName() = paramName and + call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and + flowstate = "String" // `String` length flowing into `NSString` + ) + or + // arguments to function calls... + exists(string funcName | + // `NSMakeRange` + funcName = "NSMakeRange(_:_:)" and + paramName = ["loc", "len"] and + call.getStaticTarget().getName() = funcName and + call.getStaticTarget().getParam(pragma[only_bind_into](arg)).getName() = paramName and + call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and + flowstate = "String" // `String` length flowing into `NSString` + ) + or + // arguments to function calls... + exists(string funcName | + ( + // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` + funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and + paramName = "k" + or + // `String.prefix`, `String.suffix` + funcName = ["prefix(_:)", "suffix(_:)"] and + paramName = "maxLength" + or + // `String.Index.init` + funcName = "init(encodedOffset:)" and + paramName = "offset" + or + // `String.index` + funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and + paramName = "n" + or + // `String.formIndex` + funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and + paramName = "distance" + ) and + call.getFunction().(ApplyExpr).getStaticTarget().getName() = funcName and + call.getFunction() + .(ApplyExpr) + .getStaticTarget() + .getParam(pragma[only_bind_into](arg)) + .getName() = paramName and + call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and + flowstate = "NSString" // `NSString` length flowing into `String` + ) ) } From 0bd94a630761b0fdf103832f8f864407755b5790 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 18 Jul 2022 13:04:57 +0100 Subject: [PATCH 10/22] Swift: Clean up isSink (2 - rename methodName -> funcName and move that out as well). --- .../CWE-135/StringLengthConflation.ql | 92 +++++++++---------- 1 file changed, 44 insertions(+), 48 deletions(-) diff --git a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql index fa5c5f25e9d..131e2b72ceb 100644 --- a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql +++ b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql @@ -41,87 +41,83 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { } override predicate isSink(DataFlow::Node node, string flowstate) { - exists(CallExpr call, string paramName, int arg | + exists(CallExpr call, string funcName, string paramName, int arg | // arguments to method calls... - exists(string className, string methodName, ClassDecl c, AbstractFunctionDecl f | + exists(string className, ClassDecl c, AbstractFunctionDecl f | ( // `NSRange.init` className = "NSRange" and - methodName = "init(location:length:)" and + funcName = "init(location:length:)" and paramName = ["location", "length"] or // `NSString.character` className = ["NSString", "NSMutableString"] and - methodName = "character(at:)" and + funcName = "character(at:)" and paramName = "at" or // `NSString.character` className = ["NSString", "NSMutableString"] and - methodName = "substring(from:)" and + funcName = "substring(from:)" and paramName = "from" or // `NSString.character` className = ["NSString", "NSMutableString"] and - methodName = "substring(to:)" and + funcName = "substring(to:)" and paramName = "to" or // `NSMutableString.insert` className = "NSMutableString" and - methodName = "insert(_:at:)" and + funcName = "insert(_:at:)" and paramName = "at" ) and c.getName() = className and c.getAMember() = f and // TODO: will this even work if its defined in a parent class? call.getFunction().(ApplyExpr).getStaticTarget() = f and - f.getName() = methodName and + f.getName() = funcName and f.getParam(pragma[only_bind_into](arg)).getName() = paramName and call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and flowstate = "String" // `String` length flowing into `NSString` ) or // arguments to function calls... - exists(string funcName | - // `NSMakeRange` - funcName = "NSMakeRange(_:_:)" and - paramName = ["loc", "len"] and - call.getStaticTarget().getName() = funcName and - call.getStaticTarget().getParam(pragma[only_bind_into](arg)).getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "String" // `String` length flowing into `NSString` - ) + // `NSMakeRange` + funcName = "NSMakeRange(_:_:)" and + paramName = ["loc", "len"] and + call.getStaticTarget().getName() = funcName and + call.getStaticTarget().getParam(pragma[only_bind_into](arg)).getName() = paramName and + call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and + flowstate = "String" // `String` length flowing into `NSString` or // arguments to function calls... - exists(string funcName | - ( - // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` - funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and - paramName = "k" - or - // `String.prefix`, `String.suffix` - funcName = ["prefix(_:)", "suffix(_:)"] and - paramName = "maxLength" - or - // `String.Index.init` - funcName = "init(encodedOffset:)" and - paramName = "offset" - or - // `String.index` - funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and - paramName = "n" - or - // `String.formIndex` - funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and - paramName = "distance" - ) and - call.getFunction().(ApplyExpr).getStaticTarget().getName() = funcName and - call.getFunction() - .(ApplyExpr) - .getStaticTarget() - .getParam(pragma[only_bind_into](arg)) - .getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "NSString" // `NSString` length flowing into `String` - ) + ( + // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` + funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and + paramName = "k" + or + // `String.prefix`, `String.suffix` + funcName = ["prefix(_:)", "suffix(_:)"] and + paramName = "maxLength" + or + // `String.Index.init` + funcName = "init(encodedOffset:)" and + paramName = "offset" + or + // `String.index` + funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and + paramName = "n" + or + // `String.formIndex` + funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and + paramName = "distance" + ) and + call.getFunction().(ApplyExpr).getStaticTarget().getName() = funcName and + call.getFunction() + .(ApplyExpr) + .getStaticTarget() + .getParam(pragma[only_bind_into](arg)) + .getName() = paramName and + call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and + flowstate = "NSString" // `NSString` length flowing into `String` ) } From b136790efd53ba3488fc3a3c4b4c0b0f9749dbe7 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 18 Jul 2022 13:06:32 +0100 Subject: [PATCH 11/22] Swift: Clean up isSink (3 - rename f -> funcDecl and move that out as well; in the other two cases this variable didn't exist, now it does). --- .../CWE-135/StringLengthConflation.ql | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql index 131e2b72ceb..117cbac1e27 100644 --- a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql +++ b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql @@ -41,9 +41,11 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { } override predicate isSink(DataFlow::Node node, string flowstate) { - exists(CallExpr call, string funcName, string paramName, int arg | + exists( + AbstractFunctionDecl funcDecl, CallExpr call, string funcName, string paramName, int arg + | // arguments to method calls... - exists(string className, ClassDecl c, AbstractFunctionDecl f | + exists(string className, ClassDecl c | ( // `NSRange.init` className = "NSRange" and @@ -71,10 +73,10 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { paramName = "at" ) and c.getName() = className and - c.getAMember() = f and // TODO: will this even work if its defined in a parent class? - call.getFunction().(ApplyExpr).getStaticTarget() = f and - f.getName() = funcName and - f.getParam(pragma[only_bind_into](arg)).getName() = paramName and + c.getAMember() = funcDecl and // TODO: will this even work if its defined in a parent class? + call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and + funcDecl.getName() = funcName and + funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and flowstate = "String" // `String` length flowing into `NSString` ) @@ -83,8 +85,9 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { // `NSMakeRange` funcName = "NSMakeRange(_:_:)" and paramName = ["loc", "len"] and - call.getStaticTarget().getName() = funcName and - call.getStaticTarget().getParam(pragma[only_bind_into](arg)).getName() = paramName and + call.getStaticTarget() = funcDecl and + funcDecl.getName() = funcName and + funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and flowstate = "String" // `String` length flowing into `NSString` or @@ -110,12 +113,9 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and paramName = "distance" ) and - call.getFunction().(ApplyExpr).getStaticTarget().getName() = funcName and - call.getFunction() - .(ApplyExpr) - .getStaticTarget() - .getParam(pragma[only_bind_into](arg)) - .getName() = paramName and + call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and + funcDecl.getName() = funcName and + funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and flowstate = "NSString" // `NSString` length flowing into `String` ) From 9474e63faf23cda600ef3b1006dda2d4dfb31a15 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 18 Jul 2022 13:08:31 +0100 Subject: [PATCH 12/22] Swift: Clean up isSink (4 - move common code out). --- .../CWE-135/StringLengthConflation.ql | 132 +++++++++--------- 1 file changed, 64 insertions(+), 68 deletions(-) diff --git a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql index 117cbac1e27..62e166487ec 100644 --- a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql +++ b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql @@ -44,80 +44,76 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { exists( AbstractFunctionDecl funcDecl, CallExpr call, string funcName, string paramName, int arg | - // arguments to method calls... - exists(string className, ClassDecl c | - ( - // `NSRange.init` - className = "NSRange" and - funcName = "init(location:length:)" and - paramName = ["location", "length"] - or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - funcName = "character(at:)" and - paramName = "at" - or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - funcName = "substring(from:)" and - paramName = "from" - or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - funcName = "substring(to:)" and - paramName = "to" - or - // `NSMutableString.insert` - className = "NSMutableString" and - funcName = "insert(_:at:)" and - paramName = "at" - ) and - c.getName() = className and - c.getAMember() = funcDecl and // TODO: will this even work if its defined in a parent class? - call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and - funcDecl.getName() = funcName and - funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "String" // `String` length flowing into `NSString` - ) - or - // arguments to function calls... - // `NSMakeRange` - funcName = "NSMakeRange(_:_:)" and - paramName = ["loc", "len"] and - call.getStaticTarget() = funcDecl and - funcDecl.getName() = funcName and - funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "String" // `String` length flowing into `NSString` - or - // arguments to function calls... ( - // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` - funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and - paramName = "k" + // arguments to method calls... + exists(string className, ClassDecl c | + ( + // `NSRange.init` + className = "NSRange" and + funcName = "init(location:length:)" and + paramName = ["location", "length"] + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + funcName = "character(at:)" and + paramName = "at" + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + funcName = "substring(from:)" and + paramName = "from" + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + funcName = "substring(to:)" and + paramName = "to" + or + // `NSMutableString.insert` + className = "NSMutableString" and + funcName = "insert(_:at:)" and + paramName = "at" + ) and + c.getName() = className and + c.getAMember() = funcDecl and // TODO: will this even work if its defined in a parent class? + call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and + flowstate = "String" // `String` length flowing into `NSString` + ) or - // `String.prefix`, `String.suffix` - funcName = ["prefix(_:)", "suffix(_:)"] and - paramName = "maxLength" + // arguments to function calls... + // `NSMakeRange` + funcName = "NSMakeRange(_:_:)" and + paramName = ["loc", "len"] and + call.getStaticTarget() = funcDecl and + flowstate = "String" // `String` length flowing into `NSString` or - // `String.Index.init` - funcName = "init(encodedOffset:)" and - paramName = "offset" - or - // `String.index` - funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and - paramName = "n" - or - // `String.formIndex` - funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and - paramName = "distance" + // arguments to function calls... + ( + // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` + funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and + paramName = "k" + or + // `String.prefix`, `String.suffix` + funcName = ["prefix(_:)", "suffix(_:)"] and + paramName = "maxLength" + or + // `String.Index.init` + funcName = "init(encodedOffset:)" and + paramName = "offset" + or + // `String.index` + funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and + paramName = "n" + or + // `String.formIndex` + funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and + paramName = "distance" + ) and + call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and + flowstate = "NSString" // `NSString` length flowing into `String` ) and - call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and funcDecl.getName() = funcName and funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "NSString" // `NSString` length flowing into `String` + call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() ) } From 336548f746dedabeb5813821e7a4b05f7b8ce76b Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 18 Jul 2022 13:10:18 +0100 Subject: [PATCH 13/22] Swift: Improve comments. --- .../ql/src/queries/Security/CWE-135/StringLengthConflation.ql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql index 62e166487ec..50001985055 100644 --- a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql +++ b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql @@ -86,7 +86,7 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { call.getStaticTarget() = funcDecl and flowstate = "String" // `String` length flowing into `NSString` or - // arguments to function calls... + // arguments to method calls... ( // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and @@ -111,6 +111,7 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and flowstate = "NSString" // `NSString` length flowing into `String` ) and + // match up `funcName`, `paramName`, `arg`, `node`. funcDecl.getName() = funcName and funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() From 541df9b5505eca66b5afe8aa1c91487d77672157 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 18 Jul 2022 14:26:08 +0100 Subject: [PATCH 14/22] Swift: Remove TODO comment. We have a test for this problem now. --- swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql index 50001985055..4e78d56f7d2 100644 --- a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql +++ b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql @@ -74,7 +74,7 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { paramName = "at" ) and c.getName() = className and - c.getAMember() = funcDecl and // TODO: will this even work if its defined in a parent class? + c.getAMember() = funcDecl and call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and flowstate = "String" // `String` length flowing into `NSString` ) From 962155fd61596271a83003d05ae25cdc365b0488 Mon Sep 17 00:00:00 2001 From: thiggy1342 Date: Tue, 19 Jul 2022 00:33:04 +0000 Subject: [PATCH 15/22] fix changenotes --- ...=> 2022-07-18-sqli-in-activerecord-relation-annotate.md} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename ruby/ql/lib/change-notes/{released/0.3.1.md => 2022-07-18-sqli-in-activerecord-relation-annotate.md} (64%) diff --git a/ruby/ql/lib/change-notes/released/0.3.1.md b/ruby/ql/lib/change-notes/2022-07-18-sqli-in-activerecord-relation-annotate.md similarity index 64% rename from ruby/ql/lib/change-notes/released/0.3.1.md rename to ruby/ql/lib/change-notes/2022-07-18-sqli-in-activerecord-relation-annotate.md index 392aa6f0b27..60ab137f8b2 100644 --- a/ruby/ql/lib/change-notes/released/0.3.1.md +++ b/ruby/ql/lib/change-notes/2022-07-18-sqli-in-activerecord-relation-annotate.md @@ -1,5 +1,5 @@ -## 0.3.1 - -### Minor Analysis Improvements +--- +category: minorAnalysis +--- - Calls to `ActiveRecord::Relation#annotate` are now recognized as`SqlExecution`s so that it will be considered as a sink for queries like rb/sql-injection. \ No newline at end of file From 7620a6f6539dcbdf4efab6bd9a08335023c4d95a Mon Sep 17 00:00:00 2001 From: Aditya Sharad Date: Thu, 14 Jul 2022 12:08:22 -0700 Subject: [PATCH 16/22] Docs: Update supported languages page with links to CLI and pack information Include links to the CLI changelog, CLI releases, bundle releases, pack changelogs, and pack source. Clarify that this support information applies to the current version of the CLI, bundle, query packs, and library packs. --- .../supported-languages-and-frameworks.rst | 7 ++++-- docs/codeql/support/reusables/frameworks.rst | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/docs/codeql/codeql-overview/supported-languages-and-frameworks.rst b/docs/codeql/codeql-overview/supported-languages-and-frameworks.rst index 4353f9402b7..be66125846a 100644 --- a/docs/codeql/codeql-overview/supported-languages-and-frameworks.rst +++ b/docs/codeql/codeql-overview/supported-languages-and-frameworks.rst @@ -11,14 +11,17 @@ CodeQL. Languages and compilers ####################### -CodeQL supports the following languages and compilers. +The current versions of the CodeQL CLI (`changelog `__, `releases `__), +CodeQL library packs (`source `__), +and CodeQL bundle (`releases `__) +support the following languages and compilers. .. include:: ../support/reusables/versions-compilers.rst Frameworks and libraries ######################## -The libraries and queries in the current version of CodeQL have been explicitly checked against the libraries and frameworks listed below. +The current versions of the CodeQL library and query packs (`source `__) have been explicitly checked against the libraries and frameworks listed below. .. pull-quote:: diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index 12bcd5af8e6..fc5410648cf 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -1,6 +1,10 @@ C and C++ built-in support ================================ +Provided by the current versions of the +CodeQL query pack ``codeql/cpp-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/cpp-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -14,6 +18,10 @@ C and C++ built-in support C# built-in support ================================ +Provided by the current versions of the +CodeQL query pack ``codeql/csharp-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/csharp-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -33,6 +41,10 @@ C# built-in support Go built-in support ================================ +Provided by the current versions of the +CodeQL query pack ``codeql/go-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/go-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -84,6 +96,10 @@ Go built-in support Java built-in support ================================== +Provided by the current versions of the +CodeQL query pack ``codeql/java-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/java-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -113,6 +129,10 @@ Java built-in support JavaScript and TypeScript built-in support ======================================================= +Provided by the current versions of the +CodeQL query pack ``codeql/javascript-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/javascript-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -156,6 +176,10 @@ JavaScript and TypeScript built-in support Python built-in support ==================================== +Provided by the current versions of the +CodeQL query pack ``codeql/python-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/python-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable From e9e5d948b33e8ba3f84f417aedaeb7cc3445b72a Mon Sep 17 00:00:00 2001 From: Cornelius Riemenschneider Date: Thu, 9 Sep 2021 17:21:08 +0000 Subject: [PATCH 17/22] C#: Implement proper `dotnet build` handling in the Lua tracing config. For proper C# tracing, `dotnet build` needs the parameter /p:UseSharedCompilation=false. However, we can't pass that to the other subcommands of `dotnet`, therefore we need to figure out which subcommand of `dotnet` is being invoked. --- csharp/tools/tracing-config.lua | 61 ++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/csharp/tools/tracing-config.lua b/csharp/tools/tracing-config.lua index b4ff0206b02..8aafc63e7e5 100644 --- a/csharp/tools/tracing-config.lua +++ b/csharp/tools/tracing-config.lua @@ -2,7 +2,54 @@ function RegisterExtractorPack(id) local extractor = GetPlatformToolsDirectory() .. 'Semmle.Extraction.CSharp.Driver' if OperatingSystem == 'windows' then extractor = extractor .. '.exe' end + + function DotnetMatcherBuild(compilerName, compilerPath, compilerArguments, + _languageId) + if compilerName ~= 'dotnet' and compilerName ~= 'dotnet.exe' then + return nil + end + + -- The dotnet CLI has the following usage instructions: + -- dotnet [sdk-options] [command] [command-options] [arguments] + -- we are interested in dotnet build, which has the following usage instructions: + -- dotnet [options] build [...] + -- For now, parse the command line as follows: + -- Everything that starts with `-` (or `/`) will be ignored. + -- The first non-option argument is treated as the command. + -- if that's `build`, we append `/p:UseSharedCompilation=false` to the command line, + -- otherwise we do nothing. + local match = false + local argv = compilerArguments.argv + if OperatingSystem == 'windows' then + -- let's hope that this split matches the escaping rules `dotnet` applies to command line arguments + -- or, at least, that it is close enough + argv = + NativeArgumentsToArgv(compilerArguments.nativeArgumentPointer) + end + for i, arg in ipairs(argv) do + -- dotnet options start with either - or / (both are legal) + local firstCharacter = string.sub(arg, 1, 1) + if not (firstCharacter == '-') and not (firstCharacter == '/') then + Log(1, 'Dotnet subcommand detected: %s', arg) + if arg == 'build' then match = true end + break + end + end + if match then + return { + order = ORDER_REPLACE, + invocation = BuildExtractorInvocation(id, compilerPath, + compilerPath, + compilerArguments, nil, { + '/p:UseSharedCompilation=false' + }) + } + end + return nil + end + local windowsMatchers = { + DotnetMatcherBuild, CreatePatternMatcher({'^dotnet%.exe$'}, MatchCompilerName, extractor, { prepend = {'--dotnetexec', '--cil'}, order = ORDER_BEFORE @@ -10,22 +57,21 @@ function RegisterExtractorPack(id) CreatePatternMatcher({'^csc.*%.exe$'}, MatchCompilerName, extractor, { prepend = {'--compiler', '"${compiler}"', '--cil'}, order = ORDER_BEFORE - }), CreatePatternMatcher({'^fakes.*%.exe$', 'moles.*%.exe'}, MatchCompilerName, nil, {trace = false}) } local posixMatchers = { - CreatePatternMatcher({'^mcs%.exe$', '^csc%.exe$'}, MatchCompilerName, - extractor, { - prepend = {'--compiler', '"${compiler}"', '--cil'}, - order = ORDER_BEFORE - - }), + DotnetMatcherBuild, CreatePatternMatcher({'^mono', '^dotnet$'}, MatchCompilerName, extractor, { prepend = {'--dotnetexec', '--cil'}, order = ORDER_BEFORE + }), + CreatePatternMatcher({'^mcs%.exe$', '^csc%.exe$'}, MatchCompilerName, + extractor, { + prepend = {'--compiler', '"${compiler}"', '--cil'}, + order = ORDER_BEFORE }), function(compilerName, compilerPath, compilerArguments, _languageId) if MatchCompilerName('^msbuild$', compilerName, compilerPath, compilerArguments) or @@ -49,7 +95,6 @@ function RegisterExtractorPack(id) else return posixMatchers end - end -- Return a list of minimum supported versions of the configuration file format From 694d6395d581a4f2a876754c49046606c519f368 Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Wed, 20 Jul 2022 16:25:33 +0200 Subject: [PATCH 18/22] C++: Fix join-order problem in `cpp/command-line-injection` Before on Abseil Linux: ``` Evaluated relational algebra for predicate ExecTainted::ExecState#class#91000ffb#fff@41084cm7 with tuple counts: 40879811 ~0% {2} r1 = SCAN DataFlowUtil::Node::getLocation#dispred#f0820431#ff OUTPUT In.1, In.0 40879811 ~0% {2} r2 = JOIN r1 WITH Location::Location::toString#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.1, Rhs.1 7527 ~3% {3} r3 = JOIN r2 WITH ExecTainted::interestingConcatenation#91000ffb#ff_10#join_rhs ON FIRST 1 OUTPUT Rhs.1, Lhs.1, Lhs.0 7527 ~0% {4} r4 = JOIN r3 WITH DataFlowUtil::Node::toString#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.2, Lhs.1, Lhs.0, Rhs.1 7527 ~0% {5} r5 = JOIN r4 WITH DataFlowUtil::Node::toString#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.2, Lhs.1, Lhs.0, Lhs.3, Rhs.1 7527 ~0% {6} r6 = JOIN r5 WITH DataFlowUtil::Node::getLocation#dispred#f0820431#ff ON FIRST 1 OUTPUT Rhs.1, Lhs.1, Lhs.2, Lhs.0, Lhs.3, Lhs.4 7527 ~0% {3} r7 = JOIN r6 WITH Location::Location::toString#dispred#f0820431#ff ON FIRST 1 OUTPUT ((((((("ExecState (" ++ Rhs.1) ++ " | ") ++ Lhs.4) ++ ", ") ++ Lhs.1) ++ " | ") ++ Lhs.5 ++ ")"), Lhs.3, Lhs.2 return r7 ``` After: ``` Evaluated relational algebra for predicate ExecTainted::ExecState#class#91000ffb#fff@1ffe61ps with tuple counts: 7527 ~0% {3} r1 = JOIN ExecTainted::interestingConcatenation#91000ffb#ff WITH DataFlowUtil::Node::toString#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.1, Lhs.0, Rhs.1 7527 ~0% {4} r2 = JOIN r1 WITH DataFlowUtil::Node::toString#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.0, Lhs.1, Lhs.2, Rhs.1 7527 ~1% {5} r3 = JOIN r2 WITH DataFlowUtil::Node::getLocation#dispred#f0820431#ff ON FIRST 1 OUTPUT Rhs.1, Lhs.1, Lhs.0, Lhs.2, Lhs.3 7527 ~0% {5} r4 = JOIN r3 WITH Location::Location::toString#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.1, Lhs.2, Lhs.3, Lhs.4, Rhs.1 7527 ~4% {6} r5 = JOIN r4 WITH DataFlowUtil::Node::getLocation#dispred#f0820431#ff ON FIRST 1 OUTPUT Rhs.1, Lhs.0, Lhs.1, Lhs.2, Lhs.3, Lhs.4 7527 ~0% {3} r6 = JOIN r5 WITH Location::Location::toString#dispred#f0820431#ff ON FIRST 1 OUTPUT ((((((("ExecState (" ++ Rhs.1) ++ " | ") ++ Lhs.3) ++ ", ") ++ Lhs.5) ++ " | ") ++ Lhs.4 ++ ")"), Lhs.1, Lhs.2 return r6 ``` --- cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql b/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql index 73fcf034096..7a3877f638c 100644 --- a/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql +++ b/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql @@ -77,7 +77,7 @@ class ExecState extends DataFlow::FlowState { ExecState() { this = "ExecState (" + fst.getLocation() + " | " + fst + ", " + snd.getLocation() + " | " + snd + ")" and - interestingConcatenation(fst, snd) + interestingConcatenation(pragma[only_bind_into](fst), pragma[only_bind_into](snd)) } DataFlow::Node getFstNode() { result = fst } From 8d80e0332efafa59f80632b233de2821e5993691 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Wed, 20 Jul 2022 16:13:46 +0200 Subject: [PATCH 19/22] Ruby: update tree-sitter-ruby --- ruby/Cargo.lock | Bin 15139 -> 15139 bytes ruby/extractor/Cargo.toml | 2 +- ruby/generator/Cargo.toml | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ruby/Cargo.lock b/ruby/Cargo.lock index 1be66824a3e4184f0dabbebc3fec55848f3da549..3d7e6eb5713079c870275af07d906cca3d12cdca 100644 GIT binary patch delta 102 zcmZ2nwzzDAw^?ASxoL`liHU(pvSmt&xnW|8nT3U^L5gXznYmGtk&#)7X{wo}Nt&@T Uu?i Date: Mon, 1 Nov 2021 16:24:28 +0000 Subject: [PATCH 20/22] Ruby: separate trap-writer into its own module --- ruby/extractor/src/extractor.rs | 454 +++++++++----------------------- ruby/extractor/src/main.rs | 66 +---- ruby/extractor/src/trap.rs | 272 +++++++++++++++++++ 3 files changed, 410 insertions(+), 382 deletions(-) create mode 100644 ruby/extractor/src/trap.rs diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 8cdaff1b738..6c462e11bb4 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -1,161 +1,112 @@ +use crate::trap; use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName}; -use std::borrow::Cow; use std::collections::BTreeMap as Map; use std::collections::BTreeSet as Set; use std::fmt; -use std::io::Write; use std::path::Path; use tracing::{error, info, span, Level}; use tree_sitter::{Language, Node, Parser, Range, Tree}; -pub struct TrapWriter { - /// The accumulated trap entries - trap_output: Vec, - /// A counter for generating fresh labels - counter: u32, - /// cache of global keys - global_keys: std::collections::HashMap, +pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label { + let (file_label, fresh) = + writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path))); + if fresh { + writer.add_tuple( + "files", + vec![ + trap::Arg::Label(file_label), + trap::Arg::String(normalize_path(absolute_path)), + ], + ); + populate_parent_folders(writer, file_label, absolute_path.parent()); + } + file_label } -pub fn new_trap_writer() -> TrapWriter { - TrapWriter { - counter: 0, - trap_output: Vec::new(), - global_keys: std::collections::HashMap::new(), +fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label { + let (file_label, fresh) = writer.global_id("empty;sourcefile"); + if fresh { + writer.add_tuple( + "files", + vec![ + trap::Arg::Label(file_label), + trap::Arg::String("".to_string()), + ], + ); } + file_label } -impl TrapWriter { - /// Gets a label that will hold the unique ID of the passed string at import time. - /// This can be used for incrementally importable TRAP files -- use globally unique - /// strings to compute a unique ID for table tuples. - /// - /// Note: You probably want to make sure that the key strings that you use are disjoint - /// for disjoint column types; the standard way of doing this is to prefix (or append) - /// the column type name to the ID. Thus, you might identify methods in Java by the - /// full ID "methods_com.method.package.DeclaringClass.method(argumentList)". +pub fn populate_empty_location(writer: &mut trap::Writer) { + let file_label = populate_empty_file(writer); + location(writer, file_label, 0, 0, 0, 0); +} - fn fresh_id(&mut self) -> Label { - let label = Label(self.counter); - self.counter += 1; - self.trap_output.push(TrapEntry::FreshId(label)); - label - } - - fn global_id(&mut self, key: &str) -> (Label, bool) { - if let Some(label) = self.global_keys.get(key) { - return (*label, false); - } - let label = Label(self.counter); - self.counter += 1; - self.global_keys.insert(key.to_owned(), label); - self.trap_output - .push(TrapEntry::MapLabelToKey(label, key.to_owned())); - (label, true) - } - - fn add_tuple(&mut self, table_name: &str, args: Vec) { - self.trap_output - .push(TrapEntry::GenericTuple(table_name.to_owned(), args)) - } - - fn populate_file(&mut self, absolute_path: &Path) -> Label { - let (file_label, fresh) = self.global_id(&full_id_for_file(absolute_path)); - if fresh { - self.add_tuple( - "files", - vec![ - Arg::Label(file_label), - Arg::String(normalize_path(absolute_path)), - ], - ); - self.populate_parent_folders(file_label, absolute_path.parent()); - } - file_label - } - - fn populate_empty_file(&mut self) -> Label { - let (file_label, fresh) = self.global_id("empty;sourcefile"); - if fresh { - self.add_tuple( - "files", - vec![Arg::Label(file_label), Arg::String("".to_string())], - ); - } - file_label - } - - pub fn populate_empty_location(&mut self) { - let file_label = self.populate_empty_file(); - self.location(file_label, 0, 0, 0, 0); - } - - fn populate_parent_folders(&mut self, child_label: Label, path: Option<&Path>) { - let mut path = path; - let mut child_label = child_label; - loop { - match path { - None => break, - Some(folder) => { - let (folder_label, fresh) = self.global_id(&full_id_for_folder(folder)); - self.add_tuple( - "containerparent", - vec![Arg::Label(folder_label), Arg::Label(child_label)], +pub fn populate_parent_folders( + writer: &mut trap::Writer, + child_label: trap::Label, + path: Option<&Path>, +) { + let mut path = path; + let mut child_label = child_label; + loop { + match path { + None => break, + Some(folder) => { + let (folder_label, fresh) = + writer.global_id(&trap::full_id_for_folder(&normalize_path(folder))); + writer.add_tuple( + "containerparent", + vec![ + trap::Arg::Label(folder_label), + trap::Arg::Label(child_label), + ], + ); + if fresh { + writer.add_tuple( + "folders", + vec![ + trap::Arg::Label(folder_label), + trap::Arg::String(normalize_path(folder)), + ], ); - if fresh { - self.add_tuple( - "folders", - vec![ - Arg::Label(folder_label), - Arg::String(normalize_path(folder)), - ], - ); - path = folder.parent(); - child_label = folder_label; - } else { - break; - } + path = folder.parent(); + child_label = folder_label; + } else { + break; } } } } +} - fn location( - &mut self, - file_label: Label, - start_line: usize, - start_column: usize, - end_line: usize, - end_column: usize, - ) -> Label { - let (loc_label, fresh) = self.global_id(&format!( - "loc,{{{}}},{},{},{},{}", - file_label, start_line, start_column, end_line, end_column - )); - if fresh { - self.add_tuple( - "locations_default", - vec![ - Arg::Label(loc_label), - Arg::Label(file_label), - Arg::Int(start_line), - Arg::Int(start_column), - Arg::Int(end_line), - Arg::Int(end_column), - ], - ); - } - loc_label - } - - fn comment(&mut self, text: String) { - self.trap_output.push(TrapEntry::Comment(text)); - } - - pub fn output(self, writer: &mut dyn Write) -> std::io::Result<()> { - write!(writer, "{}", Program(self.trap_output)) +fn location( + writer: &mut trap::Writer, + file_label: trap::Label, + start_line: usize, + start_column: usize, + end_line: usize, + end_column: usize, +) -> trap::Label { + let (loc_label, fresh) = writer.global_id(&format!( + "loc,{{{}}},{},{},{},{}", + file_label, start_line, start_column, end_line, end_column + )); + if fresh { + writer.add_tuple( + "locations_default", + vec![ + trap::Arg::Label(loc_label), + trap::Arg::Label(file_label), + trap::Arg::Int(start_line), + trap::Arg::Int(start_column), + trap::Arg::Int(end_line), + trap::Arg::Int(end_column), + ], + ); } + loc_label } /// Extracts the source file at `path`, which is assumed to be canonicalized. @@ -163,7 +114,7 @@ pub fn extract( language: Language, language_prefix: &str, schema: &NodeTypeMap, - trap_writer: &mut TrapWriter, + trap_writer: &mut trap::Writer, path: &Path, source: &[u8], ranges: &[Range], @@ -183,13 +134,13 @@ pub fn extract( parser.set_included_ranges(ranges).unwrap(); let tree = parser.parse(&source, None).expect("Failed to parse file"); trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display())); - let file_label = &trap_writer.populate_file(path); + let file_label = populate_file(trap_writer, path); let mut visitor = Visitor { source, trap_writer, // TODO: should we handle path strings that are not valid UTF8 better? path: format!("{}", path.display()), - file_label: *file_label, + file_label, toplevel_child_counter: 0, stack: Vec::new(), language_prefix, @@ -201,33 +152,6 @@ pub fn extract( Ok(()) } -/// Escapes a string for use in a TRAP key, by replacing special characters with -/// HTML entities. -fn escape_key<'a, S: Into>>(key: S) -> Cow<'a, str> { - fn needs_escaping(c: char) -> bool { - matches!(c, '&' | '{' | '}' | '"' | '@' | '#') - } - - let key = key.into(); - if key.contains(needs_escaping) { - let mut escaped = String::with_capacity(2 * key.len()); - for c in key.chars() { - match c { - '&' => escaped.push_str("&"), - '{' => escaped.push_str("{"), - '}' => escaped.push_str("}"), - '"' => escaped.push_str("""), - '@' => escaped.push_str("@"), - '#' => escaped.push_str("#"), - _ => escaped.push(c), - } - } - Cow::Owned(escaped) - } else { - key - } -} - /// Normalizes the path according the common CodeQL specification. Assumes that /// `path` has already been canonicalized using `std::fs::canonicalize`. fn normalize_path(path: &Path) -> String { @@ -267,17 +191,9 @@ fn normalize_path(path: &Path) -> String { } } -fn full_id_for_file(path: &Path) -> String { - format!("{};sourcefile", escape_key(&normalize_path(path))) -} - -fn full_id_for_folder(path: &Path) -> String { - format!("{};folder", escape_key(&normalize_path(path))) -} - struct ChildNode { field_name: Option<&'static str>, - label: Label, + label: trap::Label, type_name: TypeName, } @@ -286,11 +202,11 @@ struct Visitor<'a> { path: String, /// The label to use whenever we need to refer to the `@file` entity of this /// source file. - file_label: Label, + file_label: trap::Label, /// The source code as a UTF-8 byte array source: &'a [u8], - /// A TrapWriter to accumulate trap entries - trap_writer: &'a mut TrapWriter, + /// A trap::Writer to accumulate trap entries + trap_writer: &'a mut trap::Writer, /// A counter for top-level child nodes toplevel_child_counter: usize, /// Language prefix @@ -303,7 +219,7 @@ struct Visitor<'a> { /// node the list containing the child data is popped from the stack and /// matched against the dbscheme for the node. If the expectations are met /// the corresponding row definitions are added to the trap_output. - stack: Vec<(Label, usize, Vec)>, + stack: Vec<(trap::Label, usize, Vec)>, } impl Visitor<'_> { @@ -311,19 +227,19 @@ impl Visitor<'_> { &mut self, error_message: String, full_error_message: String, - loc: Label, + loc: trap::Label, ) { error!("{}", full_error_message); let id = self.trap_writer.fresh_id(); self.trap_writer.add_tuple( "diagnostics", vec![ - Arg::Label(id), - Arg::Int(40), // severity 40 = error - Arg::String("parse_error".to_string()), - Arg::String(error_message), - Arg::String(full_error_message), - Arg::Label(loc), + trap::Arg::Label(id), + trap::Arg::Int(40), // severity 40 = error + trap::Arg::String("parse_error".to_string()), + trap::Arg::String(error_message), + trap::Arg::String(full_error_message), + trap::Arg::Label(loc), ], ); } @@ -335,7 +251,8 @@ impl Visitor<'_> { node: Node, ) { let (start_line, start_column, end_line, end_column) = location_for(self.source, node); - let loc = self.trap_writer.location( + let loc = location( + self.trap_writer, self.file_label, start_line, start_column, @@ -374,7 +291,8 @@ impl Visitor<'_> { } let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack"); let (start_line, start_column, end_line, end_column) = location_for(self.source, node); - let loc = self.trap_writer.location( + let loc = location( + self.trap_writer, self.file_label, start_line, start_column, @@ -404,18 +322,19 @@ impl Visitor<'_> { self.trap_writer.add_tuple( &format!("{}_ast_node_info", self.language_prefix), vec![ - Arg::Label(id), - Arg::Label(parent_id), - Arg::Int(parent_index), - Arg::Label(loc), + trap::Arg::Label(id), + trap::Arg::Label(parent_id), + trap::Arg::Int(parent_index), + trap::Arg::Label(loc), ], ); self.trap_writer.add_tuple( &format!("{}_tokeninfo", self.language_prefix), vec![ - Arg::Label(id), - Arg::Int(*kind_id), + trap::Arg::Label(id), + trap::Arg::Int(*kind_id), sliced_source_arg(self.source, node), + trap::Arg::Label(loc), ], ); } @@ -427,13 +346,13 @@ impl Visitor<'_> { self.trap_writer.add_tuple( &format!("{}_ast_node_info", self.language_prefix), vec![ - Arg::Label(id), - Arg::Label(parent_id), - Arg::Int(parent_index), - Arg::Label(loc), + trap::Arg::Label(id), + trap::Arg::Label(parent_id), + trap::Arg::Int(parent_index), + trap::Arg::Label(loc), ], ); - let mut all_args = vec![Arg::Label(id)]; + let mut all_args = vec![trap::Arg::Label(id)]; all_args.extend(args); self.trap_writer.add_tuple(table_name, all_args); } @@ -472,9 +391,9 @@ impl Visitor<'_> { node: &Node, fields: &[Field], child_nodes: &[ChildNode], - parent_id: Label, - ) -> Option> { - let mut map: Map<&Option, (&Field, Vec)> = Map::new(); + parent_id: trap::Label, + ) -> Option> { + let mut map: Map<&Option, (&Field, Vec)> = Map::new(); for field in fields { map.insert(&field.name, (field, Vec::new())); } @@ -488,9 +407,9 @@ impl Visitor<'_> { { // We can safely unwrap because type_matches checks the key is in the map. let (int_value, _) = int_mapping.get(&child_node.type_name.kind).unwrap(); - values.push(Arg::Int(*int_value)); + values.push(trap::Arg::Int(*int_value)); } else { - values.push(Arg::Label(child_node.label)); + values.push(trap::Arg::Label(child_node.label)); } } else if field.name.is_some() { let error_message = format!( @@ -569,9 +488,9 @@ impl Visitor<'_> { ); break; } - let mut args = vec![Arg::Label(parent_id)]; + let mut args = vec![trap::Arg::Label(parent_id)]; if *has_index { - args.push(Arg::Int(index)) + args.push(trap::Arg::Int(index)) } args.push(child_value.clone()); self.trap_writer.add_tuple(table_name, args); @@ -625,9 +544,9 @@ impl Visitor<'_> { } // Emit a slice of a source file as an Arg. -fn sliced_source_arg(source: &[u8], n: Node) -> Arg { +fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg { let range = n.byte_range(); - Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned()) + trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned()) } // Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated. @@ -699,59 +618,6 @@ fn traverse(tree: &Tree, visitor: &mut Visitor) { } } -pub struct Program(Vec); - -impl fmt::Display for Program { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut text = String::new(); - for trap_entry in &self.0 { - text.push_str(&format!("{}\n", trap_entry)); - } - write!(f, "{}", text) - } -} - -enum TrapEntry { - /// Maps the label to a fresh id, e.g. `#123=*`. - FreshId(Label), - /// Maps the label to a key, e.g. `#7=@"foo"`. - MapLabelToKey(Label, String), - /// foo_bar(arg*) - GenericTuple(String, Vec), - Comment(String), -} -impl fmt::Display for TrapEntry { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - TrapEntry::FreshId(label) => write!(f, "{}=*", label), - TrapEntry::MapLabelToKey(label, key) => { - write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\"")) - } - TrapEntry::GenericTuple(name, args) => { - write!(f, "{}(", name)?; - for (index, arg) in args.iter().enumerate() { - if index > 0 { - write!(f, ",")?; - } - write!(f, "{}", arg)?; - } - write!(f, ")") - } - TrapEntry::Comment(line) => write!(f, "// {}", line), - } - } -} - -#[derive(Debug, Copy, Clone)] -// Identifiers of the form #0, #1... -struct Label(u32); - -impl fmt::Display for Label { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "#{:x}", self.0) - } -} - // Numeric indices. #[derive(Debug, Copy, Clone)] struct Index(usize); @@ -761,69 +627,3 @@ impl fmt::Display for Index { write!(f, "{}", self.0) } } - -// Some untyped argument to a TrapEntry. -#[derive(Debug, Clone)] -enum Arg { - Label(Label), - Int(usize), - String(String), -} - -const MAX_STRLEN: usize = 1048576; - -impl fmt::Display for Arg { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Arg::Label(x) => write!(f, "{}", x), - Arg::Int(x) => write!(f, "{}", x), - Arg::String(x) => write!( - f, - "\"{}\"", - limit_string(x, MAX_STRLEN).replace("\"", "\"\"") - ), - } - } -} - -/// Limit the length (in bytes) of a string. If the string's length in bytes is -/// less than or equal to the limit then the entire string is returned. Otherwise -/// the string is sliced at the provided limit. If there is a multi-byte character -/// at the limit then the returned slice will be slightly shorter than the limit to -/// avoid splitting that multi-byte character. -fn limit_string(string: &str, max_size: usize) -> &str { - if string.len() <= max_size { - return string; - } - let p = string.as_bytes(); - let mut index = max_size; - // We want to clip the string at [max_size]; however, the character at that position - // may span several bytes. We need to find the first byte of the character. In UTF-8 - // encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte. - // Therefore we decrement the index as long as there are bytes matching this pattern. - // This ensures we cut the string at the border between one character and another. - while index > 0 && (p[index] & 0b11000000) == 0b10000000 { - index -= 1; - } - &string[0..index] -} - -#[test] -fn limit_string_test() { - assert_eq!("hello", limit_string(&"hello world".to_owned(), 5)); - assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6)); - assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5)); -} - -#[test] -fn escape_key_test() { - assert_eq!("foo!", escape_key("foo!")); - assert_eq!("foo{}", escape_key("foo{}")); - assert_eq!("{}", escape_key("{}")); - assert_eq!("", escape_key("")); - assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb")); - assert_eq!( - "/path/to/foo&{}"@#.rb", - escape_key("/path/to/foo&{}\"@#.rb") - ); -} diff --git a/ruby/extractor/src/main.rs b/ruby/extractor/src/main.rs index 7e4aa973518..4fc886a50ad 100644 --- a/ruby/extractor/src/main.rs +++ b/ruby/extractor/src/main.rs @@ -1,51 +1,15 @@ mod extractor; +mod trap; extern crate num_cpus; use clap::arg; -use flate2::write::GzEncoder; use rayon::prelude::*; use std::fs; -use std::io::{BufRead, BufWriter}; +use std::io::BufRead; use std::path::{Path, PathBuf}; use tree_sitter::{Language, Parser, Range}; -enum TrapCompression { - None, - Gzip, -} - -impl TrapCompression { - fn from_env() -> TrapCompression { - match std::env::var("CODEQL_RUBY_TRAP_COMPRESSION") { - Ok(method) => match TrapCompression::from_string(&method) { - Some(c) => c, - None => { - tracing::error!("Unknown compression method '{}'; using gzip.", &method); - TrapCompression::Gzip - } - }, - // Default compression method if the env var isn't set: - Err(_) => TrapCompression::Gzip, - } - } - - fn from_string(s: &str) -> Option { - match s.to_lowercase().as_ref() { - "none" => Some(TrapCompression::None), - "gzip" => Some(TrapCompression::Gzip), - _ => None, - } - } - - fn extension(&self) -> &str { - match self { - TrapCompression::None => "trap", - TrapCompression::Gzip => "trap.gz", - } - } -} - /** * Gets the number of threads the extractor should use, by reading the * CODEQL_THREADS environment variable and using it as described in the @@ -118,7 +82,7 @@ fn main() -> std::io::Result<()> { .value_of("output-dir") .expect("missing --output-dir"); let trap_dir = PathBuf::from(trap_dir); - let trap_compression = TrapCompression::from_env(); + let trap_compression = trap::Compression::from_env("CODEQL_RUBY_TRAP_COMPRESSION"); let file_list = matches.value_of("file-list").expect("missing --file-list"); let file_list = fs::File::open(file_list)?; @@ -141,7 +105,7 @@ fn main() -> std::io::Result<()> { let src_archive_file = path_for(&src_archive_dir, &path, ""); let mut source = std::fs::read(&path)?; let code_ranges; - let mut trap_writer = extractor::new_trap_writer(); + let mut trap_writer = trap::Writer::new(); if path.extension().map_or(false, |x| x == "erb") { tracing::info!("scanning: {}", path.display()); extractor::extract( @@ -181,33 +145,25 @@ fn main() -> std::io::Result<()> { )?; std::fs::create_dir_all(&src_archive_file.parent().unwrap())?; std::fs::copy(&path, &src_archive_file)?; - write_trap(&trap_dir, path, trap_writer, &trap_compression) + write_trap(&trap_dir, path, &trap_writer, trap_compression) }) .expect("failed to extract files"); let path = PathBuf::from("extras"); - let mut trap_writer = extractor::new_trap_writer(); - trap_writer.populate_empty_location(); - write_trap(&trap_dir, path, trap_writer, &trap_compression) + let mut trap_writer = trap::Writer::new(); + extractor::populate_empty_location(&mut trap_writer); + write_trap(&trap_dir, path, &trap_writer, trap_compression) } fn write_trap( trap_dir: &Path, path: PathBuf, - trap_writer: extractor::TrapWriter, - trap_compression: &TrapCompression, + trap_writer: &trap::Writer, + trap_compression: trap::Compression, ) -> std::io::Result<()> { let trap_file = path_for(trap_dir, &path, trap_compression.extension()); std::fs::create_dir_all(&trap_file.parent().unwrap())?; - let trap_file = std::fs::File::create(&trap_file)?; - let mut trap_file = BufWriter::new(trap_file); - match trap_compression { - TrapCompression::None => trap_writer.output(&mut trap_file), - TrapCompression::Gzip => { - let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast()); - trap_writer.output(&mut compressed_writer) - } - } + trap_writer.write_to_file(&trap_file, trap_compression) } fn scan_erb( diff --git a/ruby/extractor/src/trap.rs b/ruby/extractor/src/trap.rs new file mode 100644 index 00000000000..d64c520c4cc --- /dev/null +++ b/ruby/extractor/src/trap.rs @@ -0,0 +1,272 @@ +use std::borrow::Cow; +use std::fmt; +use std::io::{BufWriter, Write}; +use std::path::Path; + +use flate2::write::GzEncoder; + +pub struct Writer { + /// The accumulated trap entries + trap_output: Vec, + /// A counter for generating fresh labels + counter: u32, + /// cache of global keys + global_keys: std::collections::HashMap, +} + +impl Writer { + pub fn new() -> Writer { + Writer { + counter: 0, + trap_output: Vec::new(), + global_keys: std::collections::HashMap::new(), + } + } + + pub fn fresh_id(&mut self) -> Label { + let label = Label(self.counter); + self.counter += 1; + self.trap_output.push(Entry::FreshId(label)); + label + } + + /// Gets a label that will hold the unique ID of the passed string at import time. + /// This can be used for incrementally importable TRAP files -- use globally unique + /// strings to compute a unique ID for table tuples. + /// + /// Note: You probably want to make sure that the key strings that you use are disjoint + /// for disjoint column types; the standard way of doing this is to prefix (or append) + /// the column type name to the ID. Thus, you might identify methods in Java by the + /// full ID "methods_com.method.package.DeclaringClass.method(argumentList)". + pub fn global_id(&mut self, key: &str) -> (Label, bool) { + if let Some(label) = self.global_keys.get(key) { + return (*label, false); + } + let label = Label(self.counter); + self.counter += 1; + self.global_keys.insert(key.to_owned(), label); + self.trap_output + .push(Entry::MapLabelToKey(label, key.to_owned())); + (label, true) + } + + pub fn add_tuple(&mut self, table_name: &str, args: Vec) { + self.trap_output + .push(Entry::GenericTuple(table_name.to_owned(), args)) + } + + pub fn comment(&mut self, text: String) { + self.trap_output.push(Entry::Comment(text)); + } + + pub fn write_to_file(&self, path: &Path, compression: Compression) -> std::io::Result<()> { + let trap_file = std::fs::File::create(path)?; + let mut trap_file = BufWriter::new(trap_file); + match compression { + Compression::None => self.write_trap_entries(&mut trap_file), + Compression::Gzip => { + let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast()); + self.write_trap_entries(&mut compressed_writer) + } + } + } + + fn write_trap_entries(&self, file: &mut W) -> std::io::Result<()> { + for trap_entry in &self.trap_output { + writeln!(file, "{}", trap_entry)?; + } + std::io::Result::Ok(()) + } +} + +pub enum Entry { + /// Maps the label to a fresh id, e.g. `#123=*`. + FreshId(Label), + /// Maps the label to a key, e.g. `#7=@"foo"`. + MapLabelToKey(Label, String), + /// foo_bar(arg*) + GenericTuple(String, Vec), + Comment(String), +} + +impl fmt::Display for Entry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Entry::FreshId(label) => write!(f, "{}=*", label), + Entry::MapLabelToKey(label, key) => { + write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\"")) + } + Entry::GenericTuple(name, args) => { + write!(f, "{}(", name)?; + for (index, arg) in args.iter().enumerate() { + if index > 0 { + write!(f, ",")?; + } + write!(f, "{}", arg)?; + } + write!(f, ")") + } + Entry::Comment(line) => write!(f, "// {}", line), + } + } +} + +#[derive(Debug, Copy, Clone)] +// Identifiers of the form #0, #1... +pub struct Label(u32); + +impl fmt::Display for Label { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "#{:x}", self.0) + } +} + +// Some untyped argument to a TrapEntry. +#[derive(Debug, Clone)] +pub enum Arg { + Label(Label), + Int(usize), + String(String), +} + +const MAX_STRLEN: usize = 1048576; + +impl fmt::Display for Arg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Arg::Label(x) => write!(f, "{}", x), + Arg::Int(x) => write!(f, "{}", x), + Arg::String(x) => write!( + f, + "\"{}\"", + limit_string(x, MAX_STRLEN).replace("\"", "\"\"") + ), + } + } +} + +pub struct Program(Vec); + +impl fmt::Display for Program { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut text = String::new(); + for trap_entry in &self.0 { + text.push_str(&format!("{}\n", trap_entry)); + } + write!(f, "{}", text) + } +} + +pub fn full_id_for_file(normalized_path: &str) -> String { + format!("{};sourcefile", escape_key(normalized_path)) +} + +pub fn full_id_for_folder(normalized_path: &str) -> String { + format!("{};folder", escape_key(normalized_path)) +} + +/// Escapes a string for use in a TRAP key, by replacing special characters with +/// HTML entities. +fn escape_key<'a, S: Into>>(key: S) -> Cow<'a, str> { + fn needs_escaping(c: char) -> bool { + matches!(c, '&' | '{' | '}' | '"' | '@' | '#') + } + + let key = key.into(); + if key.contains(needs_escaping) { + let mut escaped = String::with_capacity(2 * key.len()); + for c in key.chars() { + match c { + '&' => escaped.push_str("&"), + '{' => escaped.push_str("{"), + '}' => escaped.push_str("}"), + '"' => escaped.push_str("""), + '@' => escaped.push_str("@"), + '#' => escaped.push_str("#"), + _ => escaped.push(c), + } + } + Cow::Owned(escaped) + } else { + key + } +} + +/// Limit the length (in bytes) of a string. If the string's length in bytes is +/// less than or equal to the limit then the entire string is returned. Otherwise +/// the string is sliced at the provided limit. If there is a multi-byte character +/// at the limit then the returned slice will be slightly shorter than the limit to +/// avoid splitting that multi-byte character. +fn limit_string(string: &str, max_size: usize) -> &str { + if string.len() <= max_size { + return string; + } + let p = string.as_bytes(); + let mut index = max_size; + // We want to clip the string at [max_size]; however, the character at that position + // may span several bytes. We need to find the first byte of the character. In UTF-8 + // encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte. + // Therefore we decrement the index as long as there are bytes matching this pattern. + // This ensures we cut the string at the border between one character and another. + while index > 0 && (p[index] & 0b11000000) == 0b10000000 { + index -= 1; + } + &string[0..index] +} + +#[derive(Clone, Copy)] +pub enum Compression { + None, + Gzip, +} + +impl Compression { + pub fn from_env(var_name: &str) -> Compression { + match std::env::var(var_name) { + Ok(method) => match Compression::from_string(&method) { + Some(c) => c, + None => { + tracing::error!("Unknown compression method '{}'; using gzip.", &method); + Compression::Gzip + } + }, + // Default compression method if the env var isn't set: + Err(_) => Compression::Gzip, + } + } + + pub fn from_string(s: &str) -> Option { + match s.to_lowercase().as_ref() { + "none" => Some(Compression::None), + "gzip" => Some(Compression::Gzip), + _ => None, + } + } + + pub fn extension(&self) -> &str { + match self { + Compression::None => "trap", + Compression::Gzip => "trap.gz", + } + } +} + +#[test] +fn limit_string_test() { + assert_eq!("hello", limit_string(&"hello world".to_owned(), 5)); + assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6)); + assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5)); +} + +#[test] +fn escape_key_test() { + assert_eq!("foo!", escape_key("foo!")); + assert_eq!("foo{}", escape_key("foo{}")); + assert_eq!("{}", escape_key("{}")); + assert_eq!("", escape_key("")); + assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb")); + assert_eq!( + "/path/to/foo&{}"@#.rb", + escape_key("/path/to/foo&{}\"@#.rb") + ); +} From 0a8ecd3cf73296779e92966682fced6793cdce33 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Mon, 1 Nov 2021 16:31:30 +0000 Subject: [PATCH 21/22] Ruby: compute path string only once --- ruby/extractor/src/extractor.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 6c462e11bb4..74a8db28030 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -119,27 +119,28 @@ pub fn extract( source: &[u8], ranges: &[Range], ) -> std::io::Result<()> { + let path_str = format!("{}", path.display()); let span = span!( Level::TRACE, "extract", - file = %path.display() + file = %path_str ); let _enter = span.enter(); - info!("extracting: {}", path.display()); + info!("extracting: {}", path_str); let mut parser = Parser::new(); parser.set_language(language).unwrap(); parser.set_included_ranges(ranges).unwrap(); let tree = parser.parse(&source, None).expect("Failed to parse file"); - trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display())); + trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str)); let file_label = populate_file(trap_writer, path); let mut visitor = Visitor { source, trap_writer, // TODO: should we handle path strings that are not valid UTF8 better? - path: format!("{}", path.display()), + path: &path_str, file_label, toplevel_child_counter: 0, stack: Vec::new(), @@ -199,7 +200,7 @@ struct ChildNode { struct Visitor<'a> { /// The file path of the source code (as string) - path: String, + path: &'a str, /// The label to use whenever we need to refer to the `@file` entity of this /// source file. file_label: trap::Label, From 8dae85e1b1a249f9a2c0836eb184df26f4fadce5 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Mon, 1 Nov 2021 17:19:26 +0000 Subject: [PATCH 22/22] Ruby: avoid repeated construction of table name strings --- ruby/extractor/src/extractor.rs | 44 ++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 74a8db28030..db280634ae5 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -136,17 +136,15 @@ pub fn extract( let tree = parser.parse(&source, None).expect("Failed to parse file"); trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str)); let file_label = populate_file(trap_writer, path); - let mut visitor = Visitor { + let mut visitor = Visitor::new( source, trap_writer, // TODO: should we handle path strings that are not valid UTF8 better? - path: &path_str, + &path_str, file_label, - toplevel_child_counter: 0, - stack: Vec::new(), language_prefix, schema, - }; + ); traverse(&tree, &mut visitor); parser.reset(); @@ -210,8 +208,10 @@ struct Visitor<'a> { trap_writer: &'a mut trap::Writer, /// A counter for top-level child nodes toplevel_child_counter: usize, - /// Language prefix - language_prefix: &'a str, + /// Language-specific name of the AST info table + ast_node_info_table_name: String, + /// Language-specific name of the tokeninfo table + tokeninfo_table_name: String, /// A lookup table from type name to node types schema: &'a NodeTypeMap, /// A stack for gathering information from child nodes. Whenever a node is @@ -223,7 +223,28 @@ struct Visitor<'a> { stack: Vec<(trap::Label, usize, Vec)>, } -impl Visitor<'_> { +impl<'a> Visitor<'a> { + fn new( + source: &'a [u8], + trap_writer: &'a mut trap::Writer, + path: &'a str, + file_label: trap::Label, + language_prefix: &str, + schema: &'a NodeTypeMap, + ) -> Visitor<'a> { + Visitor { + path, + file_label, + source, + trap_writer, + toplevel_child_counter: 0, + ast_node_info_table_name: format!("{}_ast_node_info", language_prefix), + tokeninfo_table_name: format!("{}_tokeninfo", language_prefix), + schema, + stack: Vec::new(), + } + } + fn record_parse_error( &mut self, error_message: String, @@ -321,7 +342,7 @@ impl Visitor<'_> { match &table.kind { EntryKind::Token { kind_id, .. } => { self.trap_writer.add_tuple( - &format!("{}_ast_node_info", self.language_prefix), + &self.ast_node_info_table_name, vec![ trap::Arg::Label(id), trap::Arg::Label(parent_id), @@ -330,12 +351,11 @@ impl Visitor<'_> { ], ); self.trap_writer.add_tuple( - &format!("{}_tokeninfo", self.language_prefix), + &self.tokeninfo_table_name, vec![ trap::Arg::Label(id), trap::Arg::Int(*kind_id), sliced_source_arg(self.source, node), - trap::Arg::Label(loc), ], ); } @@ -345,7 +365,7 @@ impl Visitor<'_> { } => { if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) { self.trap_writer.add_tuple( - &format!("{}_ast_node_info", self.language_prefix), + &self.ast_node_info_table_name, vec![ trap::Arg::Label(id), trap::Arg::Label(parent_id),