From 32be2296e6a5c445efa22eba7d092b6a2da3607d Mon Sep 17 00:00:00 2001 From: Ian Lynagh Date: Thu, 12 Sep 2024 14:57:29 +0100 Subject: [PATCH] Java/Kotlin: Add some dbscheme comments --- java/ql/lib/config/semmlecode.dbscheme | 194 ++++++++++++++++++++++++- 1 file changed, 190 insertions(+), 4 deletions(-) diff --git a/java/ql/lib/config/semmlecode.dbscheme b/java/ql/lib/config/semmlecode.dbscheme index 876cabc76c5..2dd13e7d6fb 100644 --- a/java/ql/lib/config/semmlecode.dbscheme +++ b/java/ql/lib/config/semmlecode.dbscheme @@ -22,15 +22,30 @@ compilations( string name : string ref ); +/** + * Indicates whether this was a Java or Kotlin compilation. + */ case @compilation.kind of 1 = @javacompilation | 2 = @kotlincompilation ; +/** + * This indicates that extraction has started for this compilation. + * There will be a corresponding compilation_finished row once + * extraction has completed. If there is no compilation_finished row + * then the extractor did not terminate properly. + */ compilation_started( int id : @compilation ref ) +/** + * This provides ad-hoc information about a compilation. For example, + * key "Annotation processors enabled" and value "true", or + * key "Used annotation processor" and value + * "lombok.launch.AnnotationProcessorHider$AnnotationProcessor". + */ compilation_info( int id : @compilation ref, string info_key: string ref, @@ -166,6 +181,9 @@ compilation_finished( int result : int ref ); +/** + * An error or warning generated by the extractor. + */ diagnostics( unique int id: @diagnostic, string generated_by: string ref, // TODO: Sync this with the other languages? @@ -177,7 +195,8 @@ diagnostics( ); /** - * An error or warning generated by the extractor. + * Where a diagnostic was generated. A diagnostic may not have a row + * here if it can't be connected to a particular file. * The diagnostic message `diagnostic` was generated during compiler * invocation `compilation`, and is the `file_number_diagnostic_number`th * message generated while extracting the `file_number`th file of that @@ -191,10 +210,12 @@ diagnostic_for( int file_number_diagnostic_number : int ref ); -/* - * External artifacts +/** + * External data, loaded from CSV files during database creation. + * The `path` is the filename of the CSV file, and each row in the file + * has its own `id`. Within that row, `column` is the (0-based) index + * of the column, and `value` is the contents of that cell. */ - externalData( int id : @externalDataElement, string path : string ref, @@ -202,6 +223,10 @@ externalData( string value : string ref ); +/** + * The path to the source code. + * Anything outside this path is considered to be 3rd party code. + */ sourceLocationPrefix( string prefix : string ref ); @@ -240,6 +265,13 @@ smap_lines( @location = @location_default ; +/** + * The location of an element. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `file`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ locations_default( unique int id: @location_default, int file: @file ref, @@ -249,6 +281,9 @@ locations_default( int endColumn: int ref ); +/** + * The element `locatableid` has location `id`. + */ hasLocation( int locatableid: @locatable ref, int id: @location ref @@ -256,6 +291,14 @@ hasLocation( @sourceline = @locatable ; +/** + * The total number of lines (`num_lines`), + * number of lines containing code (`num_code`) and + * number of lines containing comments (`num_comment`) + * in the element `element_id`. + * In a modern database, this element will be a source + * file, class or method. + */ #keyset[element_id] numlines( int element_id: @sourceline ref, @@ -264,11 +307,17 @@ numlines( int num_comment: int ref ); +/** + * Associates the filepath `name` with the ID `id`. + */ files( unique int id: @file, string name: string ref ); +/** + * Associates the filepath `name` with the ID `id`. + */ folders( unique int id: @folder, string name: string ref @@ -276,6 +325,13 @@ folders( @container = @folder | @file +/** + * Gives the parent of a container (file or folder). + * Note that unintuitive parent relationships are possible; + * for example, we might have the parent of the folder + * `/usr/share/java/junit4.jar/junit` be the file + * `/usr/share/java/junit4.jar`. + */ containerparent( int parent: @container ref, unique int child: @container ref @@ -285,11 +341,19 @@ containerparent( * Java */ +/** + * Associates a compilation unit (`.java`, `.kt` or `.class` file) + * with its declared package. + */ cupackage( unique int id: @file ref, int packageid: @package ref ); +/** + * Gives the key/value pairs from the main section of the JAR manifest + * file `fileid`. + */ #keyset[fileid,keyName] jarManifestMain( int fileid: @file ref, @@ -297,6 +361,10 @@ jarManifestMain( string value: string ref ); +/** + * Gives the key/value pairs from the named section of the JAR manifest + * file `fileid`. + */ #keyset[fileid,entryName,keyName] jarManifestEntries( int fileid: @file ref, @@ -305,16 +373,26 @@ jarManifestEntries( string value: string ref ); +/** + * Gives the ID for the package name `nodeName`. + */ packages( unique int id: @package, string nodeName: string ref ); +/** + * Gives the ID for the primitive type (e.g. `void`, `int`, or + * ``) `nodeName`. + */ primitives( unique int id: @primitive, string nodeName: string ref ); +/** + * Gives the ID for the modifier (e.g. `public` or `static`) `nodeName`. + */ modifiers( unique int id: @modifier, string nodeName: string ref @@ -328,6 +406,21 @@ error_type( unique int id: @errortype ); +/** + * A class or interface `nodeName`, in the package `parentid`. + * The `nodeName` will be of the form `Foo` for the source + * class, and `sourceid` will equal `id`. + * For non-generic classes this same `id` will be the class used in + * types. + * For generic classes, there will be separate `id`s for the raw type + * (with `nodeName` `Foo<>`), and instantiations (with `nodeName` + * `Foo`). In both cases, the `sourceid` will be the `id` of + * the corresponding source class. + * For nested classes, `nodeName` is only the name of the nested + * class itself. To get the full name, including the name of the + * outer class, the outer class needs to be looked up via the + * `enclInReftype` relation. + */ classes_or_interfaces( unique int id: @classorinterface, string nodeName: string ref, @@ -335,15 +428,33 @@ classes_or_interfaces( int sourceid: @classorinterface ref ); +/** + * This holds for classes that are Kotlin file classes, i.e. classes + * that are generated to contain top-level Kotlin declarations such as + * functions that are not otherwise within a JVM class. + */ file_class( int id: @classorinterface ref ); +/** + * For a Kotlin object expression `object MyObject { ... }`, a JVM class + * is generated that has a `public static final MyObject INSTANCE` + * field. This table links the class to the field, and thus + * identifies the class as originating from a Kotlin object expression. + */ class_object( unique int id: @classorinterface ref, unique int instance: @field ref ); +/** + * For a Kotlin `companion object`, this links the `id` of the class + * containing the companion object to the class `companion_object` that + * is generated to implement the companion, and the static field + * `instance` that is generated in the containing class to refer to the + * companion object. + */ type_companion_object( unique int id: @classorinterface ref, unique int instance: @field ref, @@ -368,19 +479,32 @@ kt_type_alias( @kt_type = @kt_nullable_type | @kt_notnull_type +/** + * This holds if `id` is an `interface`, rather than a `class`. + */ isInterface( unique int id: @classorinterface ref ); +/** + * This holds if `id` is a Java `record` class. + */ isRecord( unique int id: @classorinterface ref ); +/** + * This holds if `id` is a field declaration in `parentid`. + * In Java, a single declaration may declare multiple fields. + */ fielddecls( unique int id: @fielddecl, int parentid: @reftype ref ); +/** + * The `pos`th (0-based) field declared in `fieldDeclId` is `fieldId`. + */ #keyset[fieldId] #keyset[fieldDeclId,pos] fieldDeclaredIn( int fieldId: @field ref, @@ -388,6 +512,10 @@ fieldDeclaredIn( int pos: int ref ); +/** + * Defines `id` to be a field with name `nodeName` and type `typeid` + * in class `parentid`. + */ fields( unique int id: @field, string nodeName: string ref, @@ -395,11 +523,29 @@ fields( int parentid: @reftype ref ); +/** + * The Kotlin type of field `id` is `kttypeid`. + */ fieldsKotlinType( unique int id: @field ref, int kttypeid: @kt_type ref ); +/** + * A constructor `nodeName` for the reftype `parentid`. + * The `nodeName` will be of the form `Foo` for the source + * constructor, and `sourceid` will equal `id`. + * For constructors of non-generic classes this same `id` will be the + * constructor used by callers. + * For generic classes, there will be separate `id`s for the raw type's + * constructor (with `nodeName` `Foo<>`), and instantiations (with + * `nodeName` `Foo`). In both cases, the `sourceid` will be the + * `id` of the corresponding source constructor. + * The `typeid` is the return type, which is always `void`. + * The `signature` gives a string representation of the constructor's + * signature in which all types are fully qualified, e.g. + * `Constr(java.lang.Object,java.lang.String,int)`. + */ constrs( unique int id: @constructor, string nodeName: string ref, @@ -414,6 +560,21 @@ constrsKotlinType( int kttypeid: @kt_type ref ); +/** + * A method `nodeName` that is a member of the reftype `parentid`. + * The `nodeName` will be the method's name, e.g. `foo`. + * For source methods, and `sourceid` will equal `id`. + * For methods that are themselves generic (`public void foo(X x)`) + * there is still only a source method. + * For methods in generic classes, there will be separate `id`s for the + * methods in different instantiations. These will all have the same + * `nodeName` (e.g. `foo`), and the `sourceid` will be the `id` of the + * corresponding source method. + * The `typeid` is the return type. + * The `signature` gives a string representation of the constructor's + * signature in which all types are fully qualified, e.g. + * `foo(java.lang.Object,java.lang.String,int)`. + */ methods( unique int id: @method, string nodeName: string ref, @@ -428,6 +589,13 @@ methodsKotlinType( int kttypeid: @kt_type ref ); +/** + * The `pos`th (0-based) parameter of the callable `parentid`, with + * type `typeid`. + * If the callable is a source callable, then `sourceid` will equal `id`. + * If the callable is not a source callable, then `sourceid` will be + * the id of the corresponding parameter in its source callable. + */ #keyset[parentid,pos] params( unique int id: @param, @@ -442,11 +610,29 @@ paramsKotlinType( int kttypeid: @kt_type ref ); +/** + * The parameter `id` has name `nodeName`. + * There will be no row for a particular parameter for various reasons, + * such as that parameter belongs to an external declaration that we + * don't have the source code for, or it is a parameter of a method in + * an instantiation of a generic class. + */ paramName( unique int id: @param ref, string nodeName: string ref ); +/** + * Holds if `param` is a var args parameter, such as `bar` in + * ``` + * void myMethod(String foo, String... bar) {} + * ``` + * in Java code, or in + * ``` + * fun myMethod(foo: String, vararg bar: String) {} + * ``` + * in Kotlin code. + */ isVarargsParam( int param: @param ref );