Kotlin: Extract LighterAST comments as well as PSI comments

This commit is contained in:
Ian Lynagh
2023-09-14 11:54:11 +01:00
parent df9c601571
commit 52d924924b
5 changed files with 302 additions and 164 deletions

View File

@@ -1,6 +1,7 @@
package com.github.codeql
import com.github.codeql.comments.CommentExtractor
import com.github.codeql.comments.CommentExtractorPSI
import com.github.codeql.comments.CommentExtractorLighterAST
import com.github.codeql.utils.*
import com.github.codeql.utils.versions.*
import com.semmle.extractor.java.OdasaOutput
@@ -127,7 +128,15 @@ open class KotlinFileExtractor(
}
}
extractStaticInitializer(file, { extractFileClass(file) })
CommentExtractor(this, file, tw.fileId).extract()
val psiCommentsExtracted = CommentExtractorPSI(this, file, tw.fileId).extract()
val lighterAstCommentsExtracted = CommentExtractorLighterAST(this, file, tw.fileId).extract()
if (psiCommentsExtracted == lighterAstCommentsExtracted) {
if (psiCommentsExtracted) {
logger.warnElement("Found both PSI and LightAST comments in ${file.path}.", file)
} else {
logger.warnElement("Comments could not be processed in ${file.path}.", file)
}
}
if (!declarationStack.isEmpty()) {
logger.errorElement("Declaration stack is not empty after processing the file", file)

View File

@@ -1,187 +1,82 @@
package com.github.codeql.comments
import com.github.codeql.*
import com.github.codeql.utils.IrVisitorLookup
import com.github.codeql.utils.isLocalFunction
import com.github.codeql.utils.Psi2IrFacade
import com.github.codeql.utils.versions.getPsi2Ir
import com.intellij.psi.PsiComment
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.config.KotlinCompilerVersion
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.ir.expressions.IrBody
import org.jetbrains.kotlin.ir.expressions.IrExpression
import org.jetbrains.kotlin.ir.util.isAnonymousObject
import org.jetbrains.kotlin.ir.util.parentClassOrNull
import org.jetbrains.kotlin.kdoc.psi.api.KDoc
import org.jetbrains.kotlin.lexer.KtTokens
import org.jetbrains.kotlin.psi.KtVisitor
import org.jetbrains.kotlin.psi.psiUtil.endOffset
import org.jetbrains.kotlin.psi.psiUtil.startOffset
class CommentExtractor(private val fileExtractor: KotlinFileExtractor, private val file: IrFile, private val fileLabel: Label<out DbFile>) {
private val tw = fileExtractor.tw
private val logger = fileExtractor.logger
open class CommentExtractor(protected val fileExtractor: KotlinFileExtractor, protected val file: IrFile, protected val fileLabel: Label<out DbFile>) {
protected val tw = fileExtractor.tw
protected val logger = fileExtractor.logger
fun extract() {
val psi2Ir = getPsi2Ir()
if (psi2Ir == null) {
logger.warn("Comments will not be extracted as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})")
return
protected fun getLabel(element: IrElement): Label<out DbTop>? {
if (element == file)
return fileLabel
if (element is IrValueParameter && element.index == -1) {
// Don't attribute comments to the implicit `this` parameter of a function.
return null
}
val ktFile = psi2Ir.getKtFile(file)
if (ktFile == null) {
logger.warn("Comments are not being processed in ${file.path}.")
return
val label: String
val existingLabel = if (element is IrVariable) {
// local variables are not named globally, so we need to get them from the variable label cache
label = "variable ${element.name.asString()}"
tw.getExistingVariableLabelFor(element)
} else if (element is IrFunction && element.isLocalFunction()) {
// local functions are not named globally, so we need to get them from the local function label cache
label = "local function ${element.name.asString()}"
fileExtractor.getExistingLocallyVisibleFunctionLabel(element)
} else {
label = getLabelForNamedElement(element) ?: return null
tw.getExistingLabelFor<DbTop>(label)
}
val commentVisitor = mkCommentVisitor(psi2Ir)
ktFile.accept(commentVisitor)
if (existingLabel == null) {
logger.warn("Couldn't get existing label for $label")
return null
}
return existingLabel
}
private fun mkCommentVisitor(psi2Ir: Psi2IrFacade): KtVisitor<Unit, Unit> =
object : KtVisitor<Unit, Unit>() {
override fun visitElement(element: PsiElement) {
element.acceptChildren(this)
// Slightly hacky, but `visitComment` doesn't seem to visit comments with `tokenType` `KtTokens.DOC_COMMENT`
if (element is PsiComment){
visitCommentElement(element)
}
}
private fun visitCommentElement(comment: PsiComment) {
val type: CommentType = when (comment.tokenType) {
KtTokens.EOL_COMMENT -> {
CommentType.SingleLine
}
KtTokens.BLOCK_COMMENT -> {
CommentType.Block
}
KtTokens.DOC_COMMENT -> {
CommentType.Doc
}
else -> {
logger.warn("Unhandled comment token type: ${comment.tokenType}")
return
}
}
val commentLabel = tw.getFreshIdLabel<DbKtcomment>()
tw.writeKtComments(commentLabel, type.value, comment.text)
val locId = tw.getLocation(comment.startOffset, comment.endOffset)
tw.writeHasLocation(commentLabel, locId)
if (comment.tokenType != KtTokens.DOC_COMMENT) {
return
}
if (comment !is KDoc) {
logger.warn("Unexpected comment type with DocComment token type.")
return
}
for (sec in comment.getAllSections()) {
val commentSectionLabel = tw.getFreshIdLabel<DbKtcommentsection>()
tw.writeKtCommentSections(commentSectionLabel, commentLabel, sec.getContent())
val name = sec.name
if (name != null) {
tw.writeKtCommentSectionNames(commentSectionLabel, name)
}
val subjectName = sec.getSubjectName()
if (subjectName != null) {
tw.writeKtCommentSectionSubjectNames(commentSectionLabel, subjectName)
}
}
// Only storing the owner of doc comments:
val ownerPsi = getKDocOwner(comment) ?: return
val owners = mutableListOf<IrElement>()
file.accept(IrVisitorLookup(psi2Ir, ownerPsi, file), owners)
for (ownerIr in owners) {
val ownerLabel = getLabel(ownerIr)
if (ownerLabel != null) {
tw.writeKtCommentOwners(commentLabel, ownerLabel)
}
}
}
private fun getKDocOwner(comment: KDoc) : PsiElement? {
val owner = comment.owner
if (owner == null) {
logger.warn("Couldn't get owner of KDoc. The comment is extracted without an owner.")
}
return owner
}
private fun getLabel(element: IrElement): Label<out DbTop>? {
if (element == file)
return fileLabel
if (element is IrValueParameter && element.index == -1) {
// Don't attribute comments to the implicit `this` parameter of a function.
return null
}
val label: String
val existingLabel = if (element is IrVariable) {
// local variables are not named globally, so we need to get them from the variable label cache
label = "variable ${element.name.asString()}"
tw.getExistingVariableLabelFor(element)
} else if (element is IrFunction && element.isLocalFunction()) {
// local functions are not named globally, so we need to get them from the local function label cache
label = "local function ${element.name.asString()}"
fileExtractor.getExistingLocallyVisibleFunctionLabel(element)
private fun getLabelForNamedElement(element: IrElement) : String? {
when (element) {
is IrClass -> return fileExtractor.getClassLabel(element, listOf()).classLabel
is IrTypeParameter -> return fileExtractor.getTypeParameterLabel(element)
is IrFunction -> {
return if (element.isLocalFunction()) {
null
} else {
label = getLabelForNamedElement(element) ?: return null
tw.getExistingLabelFor<DbTop>(label)
fileExtractor.getFunctionLabel(element, null)
}
if (existingLabel == null) {
logger.warn("Couldn't get existing label for $label")
}
is IrValueParameter -> return fileExtractor.getValueParameterLabel(element, null)
is IrProperty -> return fileExtractor.getPropertyLabel(element)
is IrField -> return fileExtractor.getFieldLabel(element)
is IrEnumEntry -> return fileExtractor.getEnumEntryLabel(element)
is IrTypeAlias -> return fileExtractor.getTypeAliasLabel(element)
is IrAnonymousInitializer -> {
val parentClass = element.parentClassOrNull
if (parentClass == null) {
logger.warnElement("Parent of anonymous initializer is not a class", element)
return null
}
return existingLabel
// Assign the comment to the class. The content of the `init` blocks might be extracted in multiple constructors.
return getLabelForNamedElement(parentClass)
}
private fun getLabelForNamedElement(element: IrElement) : String? {
when (element) {
is IrClass -> return fileExtractor.getClassLabel(element, listOf()).classLabel
is IrTypeParameter -> return fileExtractor.getTypeParameterLabel(element)
is IrFunction -> {
return if (element.isLocalFunction()) {
null
} else {
fileExtractor.getFunctionLabel(element, null)
}
}
is IrValueParameter -> return fileExtractor.getValueParameterLabel(element, null)
is IrProperty -> return fileExtractor.getPropertyLabel(element)
is IrField -> return fileExtractor.getFieldLabel(element)
is IrEnumEntry -> return fileExtractor.getEnumEntryLabel(element)
is IrTypeAlias -> return fileExtractor.getTypeAliasLabel(element)
// Fresh entities, not named elements:
is IrBody -> return null
is IrExpression -> return null
is IrAnonymousInitializer -> {
val parentClass = element.parentClassOrNull
if (parentClass == null) {
logger.warnElement("Parent of anonymous initializer is not a class", element)
return null
}
// Assign the comment to the class. The content of the `init` blocks might be extracted in multiple constructors.
return getLabelForNamedElement(parentClass)
}
// Fresh entities, not named elements:
is IrBody -> return null
is IrExpression -> return null
// todo add others:
else -> {
logger.warnElement("Unhandled element type found during comment extraction: ${element::class}", element)
return null
}
}
// todo add others:
else -> {
logger.warnElement("Unhandled element type found during comment extraction: ${element::class}", element)
return null
}
}
}
}

View File

@@ -0,0 +1,112 @@
package com.github.codeql.comments
import com.github.codeql.*
import com.github.codeql.utils.IrVisitorLookup
import com.github.codeql.utils.Psi2IrFacade
import com.github.codeql.utils.versions.getPsi2Ir
import com.intellij.psi.PsiComment
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.config.KotlinCompilerVersion
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.kdoc.psi.api.KDoc
import org.jetbrains.kotlin.lexer.KtTokens
import org.jetbrains.kotlin.psi.KtVisitor
import org.jetbrains.kotlin.psi.psiUtil.endOffset
import org.jetbrains.kotlin.psi.psiUtil.startOffset
class CommentExtractorPSI(fileExtractor: KotlinFileExtractor, file: IrFile, fileLabel: Label<out DbFile>): CommentExtractor(fileExtractor, file, fileLabel) {
// Returns true if it extracted the comments; false otherwise.
fun extract(): Boolean {
val psi2Ir = getPsi2Ir()
if (psi2Ir == null) {
logger.warn("Comments will not be extracted as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})")
return false
}
val ktFile = psi2Ir.getKtFile(file)
if (ktFile == null) {
return false
}
val commentVisitor = mkCommentVisitor(psi2Ir)
ktFile.accept(commentVisitor)
return true
}
private fun mkCommentVisitor(psi2Ir: Psi2IrFacade): KtVisitor<Unit, Unit> =
object : KtVisitor<Unit, Unit>() {
override fun visitElement(element: PsiElement) {
element.acceptChildren(this)
// Slightly hacky, but `visitComment` doesn't seem to visit comments with `tokenType` `KtTokens.DOC_COMMENT`
if (element is PsiComment){
visitCommentElement(element)
}
}
private fun visitCommentElement(comment: PsiComment) {
val type: CommentType = when (comment.tokenType) {
KtTokens.EOL_COMMENT -> {
CommentType.SingleLine
}
KtTokens.BLOCK_COMMENT -> {
CommentType.Block
}
KtTokens.DOC_COMMENT -> {
CommentType.Doc
}
else -> {
logger.warn("Unhandled comment token type: ${comment.tokenType}")
return
}
}
val commentLabel = tw.getFreshIdLabel<DbKtcomment>()
tw.writeKtComments(commentLabel, type.value, comment.text)
val locId = tw.getLocation(comment.startOffset, comment.endOffset)
tw.writeHasLocation(commentLabel, locId)
if (comment.tokenType != KtTokens.DOC_COMMENT) {
return
}
if (comment !is KDoc) {
logger.warn("Unexpected comment type with DocComment token type.")
return
}
for (sec in comment.getAllSections()) {
val commentSectionLabel = tw.getFreshIdLabel<DbKtcommentsection>()
tw.writeKtCommentSections(commentSectionLabel, commentLabel, sec.getContent())
val name = sec.name
if (name != null) {
tw.writeKtCommentSectionNames(commentSectionLabel, name)
}
val subjectName = sec.getSubjectName()
if (subjectName != null) {
tw.writeKtCommentSectionSubjectNames(commentSectionLabel, subjectName)
}
}
// Only storing the owner of doc comments:
val ownerPsi = getKDocOwner(comment) ?: return
val owners = mutableListOf<IrElement>()
file.accept(IrVisitorLookup(psi2Ir, ownerPsi, file), owners)
for (ownerIr in owners) {
val ownerLabel = getLabel(ownerIr)
if (ownerLabel != null) {
tw.writeKtCommentOwners(commentLabel, ownerLabel)
}
}
}
private fun getKDocOwner(comment: KDoc) : PsiElement? {
val owner = comment.owner
if (owner == null) {
logger.warn("Couldn't get owner of KDoc. The comment is extracted without an owner.")
}
return owner
}
}
}

View File

@@ -0,0 +1,11 @@
package com.github.codeql.comments
import com.github.codeql.*
import org.jetbrains.kotlin.ir.declarations.*
class CommentExtractorLighterAST(fileExtractor: KotlinFileExtractor, file: IrFile, fileLabel: Label<out DbFile>): CommentExtractor(fileExtractor, file, fileLabel) {
// We don't support LighterAST with old Kotlin versions
fun extract(): Boolean {
return false
}
}

View File

@@ -0,0 +1,111 @@
package com.github.codeql.comments
import com.github.codeql.*
import com.intellij.lang.LighterASTNode
import com.intellij.util.diff.FlyweightCapableTreeStructure
import org.jetbrains.kotlin.fir.backend.FirMetadataSource
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.UNDEFINED_OFFSET
import org.jetbrains.kotlin.ir.util.SYNTHETIC_OFFSET
import org.jetbrains.kotlin.ir.visitors.acceptChildrenVoid
import org.jetbrains.kotlin.ir.visitors.acceptVoid
import org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid
import org.jetbrains.kotlin.kdoc.lexer.KDocTokens
import org.jetbrains.kotlin.lexer.KtTokens
import org.jetbrains.kotlin.psi.psiUtil.endOffset
import org.jetbrains.kotlin.psi.psiUtil.startOffset
import org.jetbrains.kotlin.util.getChildren
class CommentExtractorLighterAST(fileExtractor: KotlinFileExtractor, file: IrFile, fileLabel: Label<out DbFile>): CommentExtractor(fileExtractor, file, fileLabel) {
// Returns true if it extracted the comments; false otherwise.
fun extract(): Boolean {
val sourceElement = (file.metadata as? FirMetadataSource.File)?.files?.get(0)?.source
val treeStructure = sourceElement?.treeStructure
if (treeStructure == null) {
return false
}
val owners = findKDocOwners(file)
extractComments(treeStructure.root, treeStructure, owners)
return true
}
private fun findKDocOwners(file: IrFile): Map<Int, List<IrElement>> {
fun LighterASTNode.isKDocComment() = this.tokenType == KDocTokens.KDOC
val kDocOwners = mutableMapOf<Int, MutableList<IrElement>>()
val visitor = object : IrElementVisitorVoid {
override fun visitElement(element: IrElement) {
val metadata = (element as? IrMetadataSourceOwner)?.metadata
val sourceElement = (metadata as? FirMetadataSource)?.fir?.source
val treeStructure = sourceElement?.treeStructure
if (treeStructure != null) {
sourceElement.lighterASTNode.getChildren(treeStructure).firstOrNull { it.isKDocComment() }
?.let { kDoc ->
// LighterASTNodes are not stable, so we can't
// use the node itself as the key. But the
// startOffset should uniquely identify them
// anyway.
val startOffset = kDoc.startOffset
if (startOffset != UNDEFINED_OFFSET && startOffset != SYNTHETIC_OFFSET) {
kDocOwners.getOrPut(startOffset, {mutableListOf<IrElement>()}).add(element)
}
}
}
element.acceptChildrenVoid(this)
}
}
file.acceptVoid(visitor)
return kDocOwners
}
private fun extractComments(node: LighterASTNode, treeStructure: FlyweightCapableTreeStructure<LighterASTNode>, owners: Map<Int, List<IrElement>>) {
node.getChildren(treeStructure).forEach {
if (KtTokens.COMMENTS.contains(it.tokenType)) {
extractComment(it, owners)
} else {
extractComments(it, treeStructure, owners)
}
}
}
private fun extractComment(comment: LighterASTNode, owners: Map<Int, List<IrElement>>) {
val type: CommentType = when (comment.tokenType) {
KtTokens.EOL_COMMENT -> {
CommentType.SingleLine
}
KtTokens.BLOCK_COMMENT -> {
CommentType.Block
}
KtTokens.DOC_COMMENT -> {
CommentType.Doc
}
else -> {
logger.warn("Unhandled comment token type: ${comment.tokenType}")
return
}
}
val commentLabel = tw.getFreshIdLabel<DbKtcomment>()
tw.writeKtComments(commentLabel, type.value, comment.toString())
val locId = tw.getLocation(comment.startOffset, comment.endOffset)
tw.writeHasLocation(commentLabel, locId)
if (comment.tokenType != KtTokens.DOC_COMMENT) {
return
}
// TODO: The PSI comment extractor extracts comment.getAllSections()
// here, so we should too
for (owner in owners.getOrDefault(comment.startOffset, listOf())) {
val ownerLabel = getLabel(owner)
if (ownerLabel != null) {
tw.writeKtCommentOwners(commentLabel, ownerLabel)
}
}
}
}