mirror of
https://github.com/github/codeql.git
synced 2025-12-21 11:16:30 +01:00
Rework and simplify comment extraction
This commit is contained in:
@@ -153,7 +153,7 @@ fun <T> fakeLabel(): Label<T> {
|
||||
|
||||
class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val file: IrFile) {
|
||||
|
||||
private val commentExtractor: CommentExtractor = CommentExtractor(logger, tw, file)
|
||||
private val commentExtractor: CommentExtractor = CommentExtractor(logger, tw, file, this)
|
||||
|
||||
val fileClass by lazy {
|
||||
extractFileClass(file)
|
||||
@@ -165,7 +165,6 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
tw.writeCupackage(id, pkgId)
|
||||
file.declarations.map { extractDeclaration(it, Optional.empty()) }
|
||||
commentExtractor.extract()
|
||||
commentExtractor.bindCommentsToElement()
|
||||
}
|
||||
|
||||
|
||||
@@ -261,7 +260,27 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
}
|
||||
}
|
||||
|
||||
fun getTypeParameterLabel(param: IrTypeParameter): String {
|
||||
fun getLabel(element: IrElement) : String? {
|
||||
when (element) {
|
||||
is IrFile -> return "@\"${element.path};sourcefile\"" // todo: remove copy-pasted code
|
||||
is IrClass -> return getClassLabel(element)
|
||||
is IrTypeParameter -> return getTypeParameterLabel(element)
|
||||
is IrFunction -> return getFunctionLabel(element)
|
||||
is IrValueParameter -> return getValueParameterLabel(element)
|
||||
is IrProperty -> return getPropertyLabel(element)
|
||||
|
||||
// Fresh entities:
|
||||
is IrBody -> return "*"
|
||||
is IrExpression -> return "*"
|
||||
|
||||
// todo:
|
||||
is IrField -> return null
|
||||
// todo add others:
|
||||
else -> return null
|
||||
}
|
||||
}
|
||||
|
||||
private fun getTypeParameterLabel(param: IrTypeParameter): String {
|
||||
val parentLabel = useDeclarationParent(param.parent)
|
||||
return "@\"typevar;{$parentLabel};${param.name}\""
|
||||
}
|
||||
@@ -270,7 +289,7 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
return tw.getLabelFor(getTypeParameterLabel(param))
|
||||
}
|
||||
|
||||
fun getClassLabel(c: IrClass): String {
|
||||
private fun getClassLabel(c: IrClass): String {
|
||||
val pkg = c.packageFqName?.asString() ?: ""
|
||||
val cls = c.name.asString()
|
||||
val qualClassName = if (pkg.isEmpty()) cls else "$pkg.$cls"
|
||||
@@ -294,7 +313,6 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
}
|
||||
|
||||
fun extractClass(c: IrClass): Label<out DbClassorinterface> {
|
||||
commentExtractor.addPossibleCommentOwner(c)
|
||||
val id = addClassLabel(c)
|
||||
val locId = tw.getLocation(c)
|
||||
val pkg = c.packageFqName?.asString() ?: ""
|
||||
@@ -364,20 +382,30 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
return t
|
||||
}
|
||||
|
||||
fun useFunction(f: IrFunction): Label<out DbMethod> {
|
||||
private fun getFunctionLabel(f: IrFunction) : String {
|
||||
val paramTypeIds = f.valueParameters.joinToString() { "{${useType(erase(it.type)).toString()}}" }
|
||||
val returnTypeId = useType(erase(f.returnType))
|
||||
val parentId = useDeclarationParent(f.parent)
|
||||
val label = "@\"callable;{$parentId}.${f.name.asString()}($paramTypeIds){$returnTypeId}\""
|
||||
return label
|
||||
}
|
||||
|
||||
fun useFunction(f: IrFunction): Label<out DbMethod> {
|
||||
val label = getFunctionLabel(f)
|
||||
val id: Label<DbMethod> = tw.getLabelFor(label)
|
||||
return id
|
||||
}
|
||||
|
||||
fun useValueParameter(vp: IrValueParameter): Label<out DbParam> {
|
||||
private fun getValueParameterLabel(vp: IrValueParameter) : String {
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
val parentId: Label<out DbMethod> = useDeclarationParent(vp.parent) as Label<out DbMethod>
|
||||
val idx = vp.index
|
||||
val label = "@\"params;{$parentId};$idx\""
|
||||
return label
|
||||
}
|
||||
|
||||
fun useValueParameter(vp: IrValueParameter): Label<out DbParam> {
|
||||
val label = getValueParameterLabel(vp)
|
||||
val id = tw.getLabelFor<DbParam>(label)
|
||||
return id
|
||||
}
|
||||
@@ -392,7 +420,6 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
}
|
||||
|
||||
fun extractFunction(f: IrFunction, parentid: Label<out DbReftype>) {
|
||||
commentExtractor.addPossibleCommentOwner(f)
|
||||
val id = useFunction(f)
|
||||
val locId = tw.getLocation(f)
|
||||
val signature = "TODO"
|
||||
@@ -408,15 +435,19 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
}
|
||||
}
|
||||
|
||||
fun useProperty(p: IrProperty): Label<out DbField> {
|
||||
private fun getPropertyLabel(p: IrProperty) : String {
|
||||
val parentId = useDeclarationParent(p.parent)
|
||||
val label = "@\"field;{$parentId};${p.name.asString()}\""
|
||||
return label
|
||||
}
|
||||
|
||||
fun useProperty(p: IrProperty): Label<out DbField> {
|
||||
var label = getPropertyLabel(p)
|
||||
val id: Label<DbField> = tw.getLabelFor(label)
|
||||
return id
|
||||
}
|
||||
|
||||
fun extractProperty(p: IrProperty, parentid: Label<out DbReftype>) {
|
||||
commentExtractor.addPossibleCommentOwner(p)
|
||||
val bf = p.backingField
|
||||
if(bf == null) {
|
||||
logger.warnElement(Severity.ErrorSevere, "IrProperty without backing field", p)
|
||||
@@ -430,7 +461,6 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
}
|
||||
|
||||
fun extractBody(b: IrBody, callable: Label<out DbCallable>) {
|
||||
commentExtractor.addPossibleCommentOwner(b)
|
||||
when(b) {
|
||||
is IrBlockBody -> extractBlockBody(b, callable, callable, 0)
|
||||
else -> logger.warnElement(Severity.ErrorSevere, "Unrecognised IrBody: " + b.javaClass, b)
|
||||
@@ -467,7 +497,6 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
}
|
||||
|
||||
fun extractStatement(s: IrStatement, callable: Label<out DbCallable>, parent: Label<out DbStmtparent>, idx: Int) {
|
||||
commentExtractor.addPossibleCommentOwner(s)
|
||||
when(s) {
|
||||
is IrExpression -> {
|
||||
extractExpression(s, callable, parent, idx)
|
||||
@@ -593,7 +622,6 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
|
||||
}
|
||||
|
||||
fun extractExpression(e: IrExpression, callable: Label<out DbCallable>, parent: Label<out DbExprparent>, idx: Int) {
|
||||
commentExtractor.addPossibleCommentOwner(e)
|
||||
when(e) {
|
||||
is IrCall -> extractCall(e, callable, parent, idx)
|
||||
is IrConst<*> -> {
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
package com.github.codeql
|
||||
|
||||
import org.jetbrains.kotlin.ir.IrElement
|
||||
|
||||
data class Location(val startOffset: Int, val endOffset: Int){
|
||||
fun contains(location: Location) : Boolean {
|
||||
return this.startOffset <= location.startOffset &&
|
||||
this.endOffset >= location.endOffset
|
||||
}
|
||||
}
|
||||
|
||||
fun IrElement.getLocation() : Location {
|
||||
return Location(this.startOffset, this.endOffset)
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.github.codeql.comments
|
||||
|
||||
import com.github.codeql.Location
|
||||
import utils.Location
|
||||
|
||||
|
||||
data class Comment(val rawText: String, val startOffset: Int, val endOffset: Int, val type: CommentType){
|
||||
fun getLocation() : Location {
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
package com.github.codeql.comments
|
||||
|
||||
enum class CommentBinding { // from C#
|
||||
Parent, // The parent element of a comment
|
||||
Best, // The most likely element associated with a comment
|
||||
Before, // The element before the comment
|
||||
After // The element after the comment
|
||||
}
|
||||
@@ -1,130 +1,29 @@
|
||||
package com.github.codeql.comments
|
||||
|
||||
import com.github.codeql.FileLogger
|
||||
import com.github.codeql.Logger
|
||||
import com.github.codeql.Severity
|
||||
import com.github.codeql.TrapWriter
|
||||
import com.github.codeql.*
|
||||
import com.github.codeql.utils.IrVisitorLookup
|
||||
import com.intellij.psi.PsiComment
|
||||
import com.intellij.psi.PsiElement
|
||||
import org.jetbrains.kotlin.backend.common.psi.PsiSourceManager
|
||||
import org.jetbrains.kotlin.backend.jvm.ir.getKtFile
|
||||
import org.jetbrains.kotlin.ir.IrElement
|
||||
import org.jetbrains.kotlin.ir.declarations.IrFile
|
||||
import org.jetbrains.kotlin.ir.declarations.path
|
||||
import org.jetbrains.kotlin.ir.util.dump
|
||||
import org.jetbrains.kotlin.kdoc.psi.api.KDoc
|
||||
import org.jetbrains.kotlin.lexer.KtTokens
|
||||
import org.jetbrains.kotlin.psi.KtDeclaration
|
||||
import org.jetbrains.kotlin.psi.KtVisitor
|
||||
import org.jetbrains.kotlin.psi.findDocComment.findDocComment
|
||||
import org.jetbrains.kotlin.psi.psiUtil.endOffset
|
||||
import org.jetbrains.kotlin.psi.psiUtil.startOffset
|
||||
import org.jetbrains.kotlin.utils.addToStdlib.cast
|
||||
|
||||
class CommentExtractor(private val logger: FileLogger, private val tw: TrapWriter, private val file: IrFile) {
|
||||
class CommentExtractor(private val logger: FileLogger, private val tw: FileTrapWriter, private val file: IrFile, private val fileExtractor: KotlinFileExtractor) {
|
||||
private val ktFile = file.getKtFile()
|
||||
|
||||
private val comments = mutableListOf<Comment>()
|
||||
private val elements = mutableListOf<IrElement>()
|
||||
|
||||
init {
|
||||
if (ktFile == null) {
|
||||
logger.warn(Severity.Warn, "Comments are not being processed in ${file.path}.")
|
||||
}
|
||||
}
|
||||
|
||||
fun addPossibleCommentOwner(elem: IrElement) {
|
||||
if (ktFile == null) {
|
||||
return
|
||||
}
|
||||
|
||||
if (elem.startOffset == -1 || elem.endOffset == -1) {
|
||||
logger.info("Skipping element with negative offsets: ${elem.dump()}")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
val psiElement = PsiSourceManager.findPsiElement(elem, file)
|
||||
if (psiElement != null) {
|
||||
println("PSI: $psiElement for ${elem.dump()}")
|
||||
if (psiElement is KtDeclaration) {
|
||||
val docComment = findDocComment(psiElement)
|
||||
if (docComment != null) {
|
||||
println("doc comment: ${docComment.text}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
elements.add(elem)
|
||||
}
|
||||
|
||||
/**
|
||||
* Match comments to program elements.
|
||||
*/
|
||||
fun bindCommentsToElement() {
|
||||
if (comments.isEmpty()) {
|
||||
return
|
||||
}
|
||||
|
||||
comments.sortBy { it.startOffset }
|
||||
elements.sortBy { it.startOffset }
|
||||
|
||||
var commentIndex: Int = 0
|
||||
var elementIndex: Int = 0
|
||||
val elementStack: ElementStack = ElementStack()
|
||||
|
||||
while (elementIndex < elements.size) {
|
||||
val nextElement = elements[elementIndex]
|
||||
val commentsForElement = mutableListOf<Comment>()
|
||||
while (commentIndex < comments.size &&
|
||||
comments[commentIndex].endOffset < nextElement.startOffset) {
|
||||
|
||||
commentsForElement.add(comments[commentIndex])
|
||||
commentIndex++
|
||||
}
|
||||
|
||||
bindCommentsToElements(commentsForElement, elementStack, nextElement)
|
||||
|
||||
elementStack.push(nextElement)
|
||||
|
||||
elementIndex++
|
||||
}
|
||||
|
||||
// Comments after last element
|
||||
val commentsForElement = mutableListOf<Comment>()
|
||||
while (commentIndex < comments.size) {
|
||||
|
||||
commentsForElement.add(comments[commentIndex])
|
||||
commentIndex++
|
||||
}
|
||||
|
||||
bindCommentsToElements(commentsForElement, elementStack, null)
|
||||
}
|
||||
|
||||
/**
|
||||
* Bind selected comments to elements. Elements are selected from the element stack or from the next element.
|
||||
*/
|
||||
private fun bindCommentsToElements(
|
||||
commentsForElement: Collection<Comment>,
|
||||
elementStack: ElementStack,
|
||||
nextElement: IrElement?
|
||||
) {
|
||||
if (commentsForElement.any()) {
|
||||
for (comment in commentsForElement) {
|
||||
println("Comment: $comment")
|
||||
val parent = elementStack.findParent(comment.getLocation())
|
||||
println("parent: ${parent?.dump()}")
|
||||
val before = elementStack.findBefore(comment.getLocation())
|
||||
println("before: ${before?.dump()}")
|
||||
val after = elementStack.findAfter(comment.getLocation(), nextElement)
|
||||
println("after: ${after?.dump()}")
|
||||
// todo: best match
|
||||
}
|
||||
}
|
||||
|
||||
// todo write matches to DB: tw.writeHasJavadoc()
|
||||
}
|
||||
|
||||
fun extract() {
|
||||
ktFile?.accept(
|
||||
object : KtVisitor<Unit, Unit>() {
|
||||
@@ -138,10 +37,6 @@ class CommentExtractor(private val logger: FileLogger, private val tw: TrapWrite
|
||||
}
|
||||
|
||||
private fun visitCommentElement(comment: PsiComment) {
|
||||
// val loc = tw.getLocation(comment.startOffset, comment.endOffset)
|
||||
// val id: Label<DbJavadoc> = tw.getLabelFor(";comment")
|
||||
// tw.writeJavadoc(id)
|
||||
|
||||
val type: CommentType = when (comment.tokenType) {
|
||||
KtTokens.EOL_COMMENT -> {
|
||||
CommentType.SingleLine
|
||||
@@ -158,19 +53,67 @@ class CommentExtractor(private val logger: FileLogger, private val tw: TrapWrite
|
||||
}
|
||||
}
|
||||
|
||||
if (comment.tokenType == KtTokens.DOC_COMMENT)
|
||||
{
|
||||
val kdoc = comment.cast<KDoc>()
|
||||
for (sec in kdoc.getAllSections())
|
||||
println("section content: ${sec.getContent()}")
|
||||
val commentLabel = tw.getFreshIdLabel<Label<*>>()
|
||||
tw.writeTrap("// kt_comment($commentLabel,${type.value},${escapeTrapString(comment.text)})\n")
|
||||
val locId = tw.getLocation(comment.startOffset, comment.endOffset)
|
||||
tw.writeHasLocation(commentLabel as Label<out DbLocatable>, locId)
|
||||
|
||||
if (comment.tokenType == KtTokens.DOC_COMMENT) {
|
||||
val kdoc = comment.cast<KDoc>()
|
||||
for (sec in kdoc.getAllSections()) {
|
||||
val commentSectionLabel = tw.getFreshIdLabel<Label<*>>()
|
||||
tw.writeTrap("// kt_comment_section($commentSectionLabel,$commentLabel,${escapeTrapString(sec.getContent())})\n")
|
||||
if (sec.name != null) {
|
||||
tw.writeTrap("// kt_comment_section_name($commentSectionLabel,${sec.name}})\n")
|
||||
}
|
||||
if (sec.getSubjectName() != null) {
|
||||
tw.writeTrap("// kt_comment_section_subject_name($commentSectionLabel,${sec.getSubjectName()}})\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
comments.add(Comment(comment.text, comment.startOffset, comment.endOffset, type))
|
||||
// todo:
|
||||
// - store each comment in the DB
|
||||
// - do further processing on Doc comments (extract @tag text, @tag name text, @tag[name] text)
|
||||
val owner = getCommentOwner(comment)
|
||||
val elements = mutableListOf<IrElement>()
|
||||
file.accept(IrVisitorLookup(owner, file), elements)
|
||||
|
||||
for (owner in elements) {
|
||||
val label = fileExtractor.getLabel(owner)
|
||||
if (label == null) {
|
||||
logger.warn(Severity.Warn, "Couldn't get label for element: $owner")
|
||||
continue
|
||||
}
|
||||
if (label == "*") {
|
||||
logger.info("Skipping fresh entity label for element: $owner")
|
||||
continue
|
||||
}
|
||||
val existingLabel = tw.getExistingLabelFor<Label<*>>(label)
|
||||
if (existingLabel == null) {
|
||||
logger.warn(Severity.Warn, "Couldn't get existing label for $label")
|
||||
continue
|
||||
}
|
||||
|
||||
tw.writeTrap("// kt_comment_owner($commentLabel,$existingLabel)\n")
|
||||
}
|
||||
}
|
||||
|
||||
private fun getCommentOwner(comment: PsiComment) : PsiElement {
|
||||
if (comment.tokenType == KtTokens.DOC_COMMENT) {
|
||||
if (comment is KDoc) {
|
||||
if (comment.owner == null) {
|
||||
logger.warn(Severity.Warn, "Couldn't get owner of KDoc, using parent instead")
|
||||
return comment.parent
|
||||
} else {
|
||||
return comment.owner!!
|
||||
}
|
||||
} else {
|
||||
logger.warn(Severity.Warn, "Unexpected comment type with DocComment token type")
|
||||
return comment.parent
|
||||
}
|
||||
} else {
|
||||
return comment.parent
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
package com.github.codeql.comments
|
||||
|
||||
enum class CommentType {
|
||||
SingleLine, Block, Doc
|
||||
enum class CommentType(val value: Int) {
|
||||
SingleLine(1), Block(2), Doc(3)
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
package com.github.codeql.comments
|
||||
|
||||
import com.github.codeql.Location
|
||||
import com.github.codeql.getLocation
|
||||
import org.jetbrains.kotlin.ir.IrElement
|
||||
import java.util.ArrayDeque
|
||||
|
||||
/**
|
||||
* Stack of elements, where each element in the stack fully contains the elements above it.
|
||||
*/
|
||||
class ElementStack {
|
||||
private val stack = ArrayDeque<IrElement>()
|
||||
|
||||
/**
|
||||
* Pops all elements from the stack that don't fully contain the new element. And then pushes the element onto the
|
||||
* stack.
|
||||
*/
|
||||
fun push(element: IrElement) {
|
||||
while (!stack.isEmpty() && !stack.peek().getLocation().contains(element.getLocation())) {
|
||||
stack.pop();
|
||||
}
|
||||
|
||||
stack.push(element);
|
||||
}
|
||||
|
||||
fun findBefore(location: Location) : IrElement? {
|
||||
return stack.lastOrNull { it.getLocation().endOffset < location.startOffset }
|
||||
}
|
||||
|
||||
fun findAfter(location: Location, next: IrElement?) : IrElement? {
|
||||
if (next == null) {
|
||||
return null
|
||||
}
|
||||
|
||||
val parent = findParent(location) ?: return next;
|
||||
|
||||
if (parent.getLocation().contains(next.getLocation())) {
|
||||
return next
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
fun findParent(location: Location) : IrElement? {
|
||||
return stack.firstOrNull { it.getLocation().contains(location) }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
package com.github.codeql.utils
|
||||
|
||||
import com.intellij.psi.PsiElement
|
||||
import org.jetbrains.kotlin.backend.common.psi.PsiSourceManager
|
||||
import org.jetbrains.kotlin.ir.IrElement
|
||||
import org.jetbrains.kotlin.ir.declarations.IrFile
|
||||
import org.jetbrains.kotlin.ir.visitors.IrElementVisitor
|
||||
import utils.getLocation
|
||||
|
||||
class IrVisitorLookup(private val psi: PsiElement, private val file: IrFile) :
|
||||
IrElementVisitor<Unit, MutableCollection<IrElement>> {
|
||||
private val location = psi.getLocation()
|
||||
|
||||
override fun visitElement(element: IrElement, data: MutableCollection<IrElement>): Unit {
|
||||
val elementLocation = element.getLocation()
|
||||
|
||||
if (!location.intersects(elementLocation)) {
|
||||
// No need to visit children.
|
||||
return
|
||||
}
|
||||
|
||||
if (location.contains(elementLocation)) {
|
||||
val psiElement = PsiSourceManager.findPsiElement(element, file)
|
||||
if (psiElement == psi) {
|
||||
// There can be multiple IrElements that match the same PSI element.
|
||||
data.add(element)
|
||||
}
|
||||
}
|
||||
element.acceptChildren(this, data)
|
||||
}
|
||||
}
|
||||
24
java/kotlin-extractor/src/main/kotlin/utils/Location.kt
Normal file
24
java/kotlin-extractor/src/main/kotlin/utils/Location.kt
Normal file
@@ -0,0 +1,24 @@
|
||||
package utils
|
||||
|
||||
import com.intellij.psi.PsiElement
|
||||
import org.jetbrains.kotlin.ir.IrElement
|
||||
import org.jetbrains.kotlin.psi.psiUtil.endOffset
|
||||
import org.jetbrains.kotlin.psi.psiUtil.startOffset
|
||||
|
||||
data class Location(val startOffset: Int, val endOffset: Int){
|
||||
fun contains(location: Location) : Boolean {
|
||||
return this.startOffset <= location.startOffset && this.endOffset >= location.endOffset
|
||||
}
|
||||
|
||||
fun intersects(location: Location): Boolean {
|
||||
return this.endOffset >= location.startOffset && this.startOffset <= location.endOffset
|
||||
}
|
||||
}
|
||||
|
||||
fun IrElement.getLocation() : Location {
|
||||
return Location(this.startOffset, this.endOffset)
|
||||
}
|
||||
|
||||
fun PsiElement.getLocation() : Location {
|
||||
return Location(this.startOffset, this.endOffset)
|
||||
}
|
||||
Reference in New Issue
Block a user