Merge pull request #14887 from igfoo/igfoo/kloc

Kotlin: Add LighterAST support to numlines extraction
This commit is contained in:
Ian Lynagh
2023-11-24 13:43:34 +00:00
committed by GitHub
7 changed files with 320 additions and 123 deletions

View File

@@ -133,7 +133,7 @@ open class KotlinFileExtractor(
val lighterAstCommentsExtracted = CommentExtractorLighterAST(this, file, tw.fileId).extract()
if (psiCommentsExtracted == lighterAstCommentsExtracted) {
if (psiCommentsExtracted) {
logger.warnElement("Found both PSI and LightAST comments in ${file.path}.", file)
logger.warnElement("Found both PSI and LighterAST comments in ${file.path}.", file)
} else {
logger.warnElement("Comments could not be processed in ${file.path}.", file)
}

View File

@@ -1,138 +1,28 @@
package com.github.codeql
import com.github.codeql.utils.versions.getPsi2Ir
import com.intellij.psi.PsiComment
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiWhiteSpace
import org.jetbrains.kotlin.config.KotlinCompilerVersion
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.kdoc.psi.api.KDocElement
import org.jetbrains.kotlin.psi.KtCodeFragment
import org.jetbrains.kotlin.psi.KtVisitor
class LinesOfCode(
val logger: FileLogger,
val tw: FileTrapWriter,
val file: IrFile
) {
val psi2Ir = getPsi2Ir().also {
if (it == null) {
logger.warn("Lines of code will not be populated as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})")
}
}
val linesOfCodePSI = LinesOfCodePSI(logger, tw, file)
val linesOfCodeLighterAST = LinesOfCodeLighterAST(logger, tw, file)
fun linesOfCodeInFile(id: Label<DbFile>) {
if (psi2Ir == null) {
return
val psiExtracted = linesOfCodePSI.linesOfCodeInFile(id)
val lighterASTExtracted = linesOfCodeLighterAST.linesOfCodeInFile(id)
if (psiExtracted && lighterASTExtracted) {
logger.warnElement("Both PSI and LighterAST number-of-lines-in-file information for ${file.path}.", file)
}
val ktFile = psi2Ir.getKtFile(file)
if (ktFile == null) {
return
}
linesOfCodeInPsi(id, ktFile, file)
}
fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label<out DbSourceline>) {
if (psi2Ir == null) {
return
val psiExtracted = linesOfCodePSI.linesOfCodeInDeclaration(d, id)
val lighterASTExtracted = linesOfCodeLighterAST.linesOfCodeInDeclaration(d, id)
if (psiExtracted && lighterASTExtracted) {
logger.warnElement("Both PSI and LighterAST number-of-lines-in-file information for declaration.", d)
}
val p = psi2Ir.findPsiElement(d, file)
if (p == null) {
return
}
linesOfCodeInPsi(id, p, d)
}
private fun linesOfCodeInPsi(id: Label<out DbSourceline>, root: PsiElement, e: IrElement) {
val document = root.getContainingFile().getViewProvider().getDocument()
if (document == null) {
logger.errorElement("Cannot find document for PSI", e)
tw.writeNumlines(id, 0, 0, 0)
return
}
val rootRange = root.getTextRange()
val rootFirstLine = document.getLineNumber(rootRange.getStartOffset())
val rootLastLine = document.getLineNumber(rootRange.getEndOffset())
if (rootLastLine < rootFirstLine) {
logger.errorElement("PSI ends before it starts", e)
tw.writeNumlines(id, 0, 0, 0)
return
}
val numLines = 1 + rootLastLine - rootFirstLine
val lineContents = Array(numLines) { LineContent() }
val visitor =
object : KtVisitor<Unit, Unit>() {
override fun visitElement(element: PsiElement) {
val isComment = element is PsiComment
// Comments may include nodes that aren't PsiComments,
// so we don't want to visit them or we'll think they
// are code.
if (!isComment) {
element.acceptChildren(this)
}
if (element is PsiWhiteSpace) {
return
}
// Leaf nodes are assumed to be tokens, and
// therefore we count any lines that they are on.
// For comments, we actually need to look at the
// outermost node, as the leaves of KDocs don't
// necessarily cover all lines.
if (isComment || element.getChildren().size == 0) {
val range = element.getTextRange()
val startOffset = range.getStartOffset()
val endOffset = range.getEndOffset()
// The PSI doesn't seem to have anything like
// the IR's UNDEFINED_OFFSET and SYNTHETIC_OFFSET,
// but < 0 still seem to represent bad/unknown
// locations.
if (startOffset < 0 || endOffset < 0) {
logger.errorElement("PSI has negative offset", e)
return
}
if (startOffset > endOffset) {
return
}
// We might get e.g. an import list for a file
// with no imports, which claims to have start
// and end offsets of 0. Anything of 0 width
// we therefore just skip.
if (startOffset == endOffset) {
return
}
val firstLine = document.getLineNumber(startOffset)
val lastLine = document.getLineNumber(endOffset)
if (firstLine < rootFirstLine) {
logger.errorElement("PSI element starts before root", e)
return
} else if (lastLine > rootLastLine) {
logger.errorElement("PSI element ends after root", e)
return
}
for (line in firstLine..lastLine) {
val lineContent = lineContents[line - rootFirstLine]
if (isComment) {
lineContent.containsComment = true
} else {
lineContent.containsCode = true
}
}
}
}
}
root.accept(visitor)
val total = lineContents.size
val code = lineContents.count { it.containsCode }
val comment = lineContents.count { it.containsComment }
tw.writeNumlines(id, total, code, comment)
}
private class LineContent {
var containsComment = false
var containsCode = false
}
}

View File

@@ -0,0 +1,153 @@
package com.github.codeql
import com.github.codeql.utils.versions.getPsi2Ir
import com.intellij.psi.PsiComment
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiWhiteSpace
import org.jetbrains.kotlin.config.KotlinCompilerVersion
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.kdoc.psi.api.KDocElement
import org.jetbrains.kotlin.psi.KtCodeFragment
import org.jetbrains.kotlin.psi.KtVisitor
class LinesOfCodePSI(
val logger: FileLogger,
val tw: FileTrapWriter,
val file: IrFile
) {
val psi2Ir = getPsi2Ir().also {
if (it == null) {
logger.warn("Lines of code will not be populated as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})")
}
}
fun linesOfCodeInFile(id: Label<DbFile>): Boolean {
if (psi2Ir == null) {
return false
}
val ktFile = psi2Ir.getKtFile(file)
if (ktFile == null) {
return false
}
linesOfCodeInPsi(id, ktFile, file)
// Even if linesOfCodeInPsi didn't manage to extract any
// information, if we got as far as calling it then we have
// PSI info for the file
return true
}
fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label<out DbSourceline>): Boolean {
if (psi2Ir == null) {
return false
}
val p = psi2Ir.findPsiElement(d, file)
if (p == null) {
return false
}
linesOfCodeInPsi(id, p, d)
// Even if linesOfCodeInPsi didn't manage to extract any
// information, if we got as far as calling it then we have
// PSI info for the declaration
return true
}
private fun linesOfCodeInPsi(id: Label<out DbSourceline>, root: PsiElement, e: IrElement) {
val document = root.getContainingFile().getViewProvider().getDocument()
if (document == null) {
logger.errorElement("Cannot find document for PSI", e)
tw.writeNumlines(id, 0, 0, 0)
return
}
val rootRange = root.getTextRange()
val rootStartOffset = rootRange.getStartOffset()
val rootEndOffset = rootRange.getEndOffset()
if (rootStartOffset < 0 || rootEndOffset < 0) {
// This is synthetic, or has an invalid location
tw.writeNumlines(id, 0, 0, 0)
return
}
val rootFirstLine = document.getLineNumber(rootStartOffset)
val rootLastLine = document.getLineNumber(rootEndOffset)
if (rootLastLine < rootFirstLine) {
logger.errorElement("PSI ends before it starts", e)
tw.writeNumlines(id, 0, 0, 0)
return
}
val numLines = 1 + rootLastLine - rootFirstLine
val lineContents = Array(numLines) { LineContent() }
val visitor =
object : KtVisitor<Unit, Unit>() {
override fun visitElement(element: PsiElement) {
val isComment = element is PsiComment
// Comments may include nodes that aren't PsiComments,
// so we don't want to visit them or we'll think they
// are code.
if (!isComment) {
element.acceptChildren(this)
}
if (element is PsiWhiteSpace) {
return
}
// Leaf nodes are assumed to be tokens, and
// therefore we count any lines that they are on.
// For comments, we actually need to look at the
// outermost node, as the leaves of KDocs don't
// necessarily cover all lines.
if (isComment || element.getChildren().size == 0) {
val range = element.getTextRange()
val startOffset = range.getStartOffset()
val endOffset = range.getEndOffset()
// The PSI doesn't seem to have anything like
// the IR's UNDEFINED_OFFSET and SYNTHETIC_OFFSET,
// but < 0 still seem to represent bad/unknown
// locations.
if (startOffset < 0 || endOffset < 0) {
logger.errorElement("PSI element has negative offset", e)
return
}
if (startOffset > endOffset) {
logger.errorElement("PSI element has negative size", e)
return
}
// We might get e.g. an import list for a file
// with no imports, which claims to have start
// and end offsets of 0. Anything of 0 width
// we therefore just skip.
if (startOffset == endOffset) {
return
}
val firstLine = document.getLineNumber(startOffset)
val lastLine = document.getLineNumber(endOffset)
if (firstLine < rootFirstLine) {
logger.errorElement("PSI element starts before root", e)
return
} else if (lastLine > rootLastLine) {
logger.errorElement("PSI element ends after root", e)
return
}
for (line in firstLine..lastLine) {
val lineContent = lineContents[line - rootFirstLine]
if (isComment) {
lineContent.containsComment = true
} else {
lineContent.containsCode = true
}
}
}
}
}
root.accept(visitor)
val code = lineContents.count { it.containsCode }
val comment = lineContents.count { it.containsComment }
tw.writeNumlines(id, numLines, code, comment)
}
private class LineContent {
var containsComment = false
var containsCode = false
}
}

View File

@@ -0,0 +1,19 @@
package com.github.codeql
import org.jetbrains.kotlin.ir.declarations.*
class LinesOfCodeLighterAST(
val logger: FileLogger,
val tw: FileTrapWriter,
val file: IrFile
) {
// We don't support LighterAST with old Kotlin versions
fun linesOfCodeInFile(@Suppress("UNUSED_PARAMETER") id: Label<DbFile>): Boolean {
return false
}
// We don't support LighterAST with old Kotlin versions
fun linesOfCodeInDeclaration(@Suppress("UNUSED_PARAMETER") d: IrDeclaration, @Suppress("UNUSED_PARAMETER") id: Label<out DbSourceline>): Boolean {
return false
}
}

View File

@@ -13,8 +13,6 @@ import org.jetbrains.kotlin.ir.visitors.acceptVoid
import org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid
import org.jetbrains.kotlin.kdoc.lexer.KDocTokens
import org.jetbrains.kotlin.lexer.KtTokens
import org.jetbrains.kotlin.psi.psiUtil.endOffset
import org.jetbrains.kotlin.psi.psiUtil.startOffset
import org.jetbrains.kotlin.util.getChildren
class CommentExtractorLighterAST(fileExtractor: KotlinFileExtractor, file: IrFile, fileLabel: Label<out DbFile>): CommentExtractor(fileExtractor, file, fileLabel) {

View File

@@ -0,0 +1,133 @@
package com.github.codeql
import com.intellij.lang.LighterASTNode
import com.intellij.util.diff.FlyweightCapableTreeStructure
import org.jetbrains.kotlin.config.KotlinCompilerVersion
import org.jetbrains.kotlin.fir.backend.FirMetadataSource
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.KtSourceElement
import org.jetbrains.kotlin.lexer.KtTokens
import org.jetbrains.kotlin.util.getChildren
class LinesOfCodeLighterAST(
val logger: FileLogger,
val tw: FileTrapWriter,
val file: IrFile
) {
val fileEntry = file.fileEntry
fun linesOfCodeInFile(id: Label<DbFile>): Boolean {
val sourceElement = (file.metadata as? FirMetadataSource.File)?.files?.elementAtOrNull(0)?.source
if (sourceElement == null) {
return false
}
linesOfCodeInLighterAST(id, file, sourceElement)
// Even if linesOfCodeInLighterAST didn't manage to extract any
// information, if we got as far as calling it then we have
// LighterAST info for the file
return true
}
fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label<out DbSourceline>): Boolean {
val metadata = (d as? IrMetadataSourceOwner)?.metadata
val sourceElement = (metadata as? FirMetadataSource)?.fir?.source
if (sourceElement == null) {
return false
}
linesOfCodeInLighterAST(id, d, sourceElement)
// Even if linesOfCodeInLighterAST didn't manage to extract any
// information, if we got as far as calling it then we have
// LighterAST info for the declaration
return true
}
private fun linesOfCodeInLighterAST(id: Label<out DbSourceline>, e: IrElement, s: KtSourceElement) {
val rootStartOffset = s.startOffset
val rootEndOffset = s.endOffset
if (rootStartOffset < 0 || rootEndOffset < 0) {
// This is synthetic, or has an invalid location
tw.writeNumlines(id, 0, 0, 0)
return
}
val rootFirstLine = fileEntry.getLineNumber(rootStartOffset)
val rootLastLine = fileEntry.getLineNumber(rootEndOffset)
if (rootLastLine < rootFirstLine) {
logger.errorElement("Source element ends before it starts", e)
tw.writeNumlines(id, 0, 0, 0)
return
}
val numLines = 1 + rootLastLine - rootFirstLine
val lineContents = Array(numLines) { LineContent() }
val treeStructure = s.treeStructure
processSubtree(e, treeStructure, rootFirstLine, rootLastLine, lineContents, s.lighterASTNode)
val code = lineContents.count { it.containsCode }
val comment = lineContents.count { it.containsComment }
tw.writeNumlines(id, numLines, code, comment)
}
private fun processSubtree(e: IrElement, treeStructure: FlyweightCapableTreeStructure<LighterASTNode>, rootFirstLine: Int, rootLastLine: Int, lineContents: Array<LineContent>, node: LighterASTNode) {
if (KtTokens.WHITESPACES.contains(node.tokenType)) {
return
}
val isComment = KtTokens.COMMENTS.contains(node.tokenType)
val children = node.getChildren(treeStructure)
// Leaf nodes are assumed to be tokens, and
// therefore we count any lines that they are on.
// For comments, we actually need to look at the
// outermost node, as the leaves of KDocs don't
// necessarily cover all lines.
if (isComment || children.isEmpty()) {
val startOffset = node.getStartOffset()
val endOffset = node.getEndOffset()
if (startOffset < 0 || endOffset < 0) {
logger.errorElement("LighterAST node has negative offset", e)
return
}
if (startOffset > endOffset) {
logger.errorElement("LighterAST node has negative size", e)
return
}
// This may not be possible with LighterAST, but:
// We might get e.g. an import list for a file
// with no imports, which claims to have start
// and end offsets of 0. Anything of 0 width
// we therefore just skip.
if (startOffset == endOffset) {
return
}
val firstLine = fileEntry.getLineNumber(startOffset)
val lastLine = fileEntry.getLineNumber(endOffset)
if (firstLine < rootFirstLine) {
logger.errorElement("LighterAST element starts before root", e)
return
} else if (lastLine > rootLastLine) {
logger.errorElement("LighterAST element ends after root", e)
return
}
for (line in firstLine..lastLine) {
val lineContent = lineContents[line - rootFirstLine]
if (isComment) {
lineContent.containsComment = true
} else {
lineContent.containsCode = true
}
}
} else {
for(child in children) {
processSubtree(e, treeStructure, rootFirstLine, rootLastLine, lineContents, child)
}
}
}
private class LineContent {
var containsComment = false
var containsCode = false
}
}

View File

@@ -1,3 +1,7 @@
| Number of files with extension jar | 1 |
| Number of files with extension kt | 1 |
| Number of lines of code | 2 |
| Number of lines of code with extension kt | 2 |
| Total number of lines | 3 |
| Total number of lines with extension kt | 3 |
| Uses Kotlin 2: true | 1 |