mirror of
https://github.com/github/codeql.git
synced 2026-01-22 19:02:59 +01:00
Merge pull request #14887 from igfoo/igfoo/kloc
Kotlin: Add LighterAST support to numlines extraction
This commit is contained in:
@@ -133,7 +133,7 @@ open class KotlinFileExtractor(
|
||||
val lighterAstCommentsExtracted = CommentExtractorLighterAST(this, file, tw.fileId).extract()
|
||||
if (psiCommentsExtracted == lighterAstCommentsExtracted) {
|
||||
if (psiCommentsExtracted) {
|
||||
logger.warnElement("Found both PSI and LightAST comments in ${file.path}.", file)
|
||||
logger.warnElement("Found both PSI and LighterAST comments in ${file.path}.", file)
|
||||
} else {
|
||||
logger.warnElement("Comments could not be processed in ${file.path}.", file)
|
||||
}
|
||||
|
||||
@@ -1,138 +1,28 @@
|
||||
package com.github.codeql
|
||||
|
||||
import com.github.codeql.utils.versions.getPsi2Ir
|
||||
import com.intellij.psi.PsiComment
|
||||
import com.intellij.psi.PsiElement
|
||||
import com.intellij.psi.PsiWhiteSpace
|
||||
import org.jetbrains.kotlin.config.KotlinCompilerVersion
|
||||
import org.jetbrains.kotlin.ir.IrElement
|
||||
import org.jetbrains.kotlin.ir.declarations.*
|
||||
import org.jetbrains.kotlin.kdoc.psi.api.KDocElement
|
||||
import org.jetbrains.kotlin.psi.KtCodeFragment
|
||||
import org.jetbrains.kotlin.psi.KtVisitor
|
||||
|
||||
class LinesOfCode(
|
||||
val logger: FileLogger,
|
||||
val tw: FileTrapWriter,
|
||||
val file: IrFile
|
||||
) {
|
||||
val psi2Ir = getPsi2Ir().also {
|
||||
if (it == null) {
|
||||
logger.warn("Lines of code will not be populated as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})")
|
||||
}
|
||||
}
|
||||
val linesOfCodePSI = LinesOfCodePSI(logger, tw, file)
|
||||
val linesOfCodeLighterAST = LinesOfCodeLighterAST(logger, tw, file)
|
||||
|
||||
fun linesOfCodeInFile(id: Label<DbFile>) {
|
||||
if (psi2Ir == null) {
|
||||
return
|
||||
val psiExtracted = linesOfCodePSI.linesOfCodeInFile(id)
|
||||
val lighterASTExtracted = linesOfCodeLighterAST.linesOfCodeInFile(id)
|
||||
if (psiExtracted && lighterASTExtracted) {
|
||||
logger.warnElement("Both PSI and LighterAST number-of-lines-in-file information for ${file.path}.", file)
|
||||
}
|
||||
val ktFile = psi2Ir.getKtFile(file)
|
||||
if (ktFile == null) {
|
||||
return
|
||||
}
|
||||
linesOfCodeInPsi(id, ktFile, file)
|
||||
}
|
||||
|
||||
fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label<out DbSourceline>) {
|
||||
if (psi2Ir == null) {
|
||||
return
|
||||
val psiExtracted = linesOfCodePSI.linesOfCodeInDeclaration(d, id)
|
||||
val lighterASTExtracted = linesOfCodeLighterAST.linesOfCodeInDeclaration(d, id)
|
||||
if (psiExtracted && lighterASTExtracted) {
|
||||
logger.warnElement("Both PSI and LighterAST number-of-lines-in-file information for declaration.", d)
|
||||
}
|
||||
val p = psi2Ir.findPsiElement(d, file)
|
||||
if (p == null) {
|
||||
return
|
||||
}
|
||||
linesOfCodeInPsi(id, p, d)
|
||||
}
|
||||
|
||||
private fun linesOfCodeInPsi(id: Label<out DbSourceline>, root: PsiElement, e: IrElement) {
|
||||
val document = root.getContainingFile().getViewProvider().getDocument()
|
||||
if (document == null) {
|
||||
logger.errorElement("Cannot find document for PSI", e)
|
||||
tw.writeNumlines(id, 0, 0, 0)
|
||||
return
|
||||
}
|
||||
|
||||
val rootRange = root.getTextRange()
|
||||
val rootFirstLine = document.getLineNumber(rootRange.getStartOffset())
|
||||
val rootLastLine = document.getLineNumber(rootRange.getEndOffset())
|
||||
if (rootLastLine < rootFirstLine) {
|
||||
logger.errorElement("PSI ends before it starts", e)
|
||||
tw.writeNumlines(id, 0, 0, 0)
|
||||
return
|
||||
}
|
||||
val numLines = 1 + rootLastLine - rootFirstLine
|
||||
val lineContents = Array(numLines) { LineContent() }
|
||||
|
||||
val visitor =
|
||||
object : KtVisitor<Unit, Unit>() {
|
||||
override fun visitElement(element: PsiElement) {
|
||||
val isComment = element is PsiComment
|
||||
// Comments may include nodes that aren't PsiComments,
|
||||
// so we don't want to visit them or we'll think they
|
||||
// are code.
|
||||
if (!isComment) {
|
||||
element.acceptChildren(this)
|
||||
}
|
||||
|
||||
if (element is PsiWhiteSpace) {
|
||||
return
|
||||
}
|
||||
// Leaf nodes are assumed to be tokens, and
|
||||
// therefore we count any lines that they are on.
|
||||
// For comments, we actually need to look at the
|
||||
// outermost node, as the leaves of KDocs don't
|
||||
// necessarily cover all lines.
|
||||
if (isComment || element.getChildren().size == 0) {
|
||||
val range = element.getTextRange()
|
||||
val startOffset = range.getStartOffset()
|
||||
val endOffset = range.getEndOffset()
|
||||
// The PSI doesn't seem to have anything like
|
||||
// the IR's UNDEFINED_OFFSET and SYNTHETIC_OFFSET,
|
||||
// but < 0 still seem to represent bad/unknown
|
||||
// locations.
|
||||
if (startOffset < 0 || endOffset < 0) {
|
||||
logger.errorElement("PSI has negative offset", e)
|
||||
return
|
||||
}
|
||||
if (startOffset > endOffset) {
|
||||
return
|
||||
}
|
||||
// We might get e.g. an import list for a file
|
||||
// with no imports, which claims to have start
|
||||
// and end offsets of 0. Anything of 0 width
|
||||
// we therefore just skip.
|
||||
if (startOffset == endOffset) {
|
||||
return
|
||||
}
|
||||
val firstLine = document.getLineNumber(startOffset)
|
||||
val lastLine = document.getLineNumber(endOffset)
|
||||
if (firstLine < rootFirstLine) {
|
||||
logger.errorElement("PSI element starts before root", e)
|
||||
return
|
||||
} else if (lastLine > rootLastLine) {
|
||||
logger.errorElement("PSI element ends after root", e)
|
||||
return
|
||||
}
|
||||
for (line in firstLine..lastLine) {
|
||||
val lineContent = lineContents[line - rootFirstLine]
|
||||
if (isComment) {
|
||||
lineContent.containsComment = true
|
||||
} else {
|
||||
lineContent.containsCode = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
root.accept(visitor)
|
||||
val total = lineContents.size
|
||||
val code = lineContents.count { it.containsCode }
|
||||
val comment = lineContents.count { it.containsComment }
|
||||
tw.writeNumlines(id, total, code, comment)
|
||||
}
|
||||
|
||||
private class LineContent {
|
||||
var containsComment = false
|
||||
var containsCode = false
|
||||
}
|
||||
}
|
||||
|
||||
153
java/kotlin-extractor/src/main/kotlin/LinesOfCodePSI.kt
Normal file
153
java/kotlin-extractor/src/main/kotlin/LinesOfCodePSI.kt
Normal file
@@ -0,0 +1,153 @@
|
||||
package com.github.codeql
|
||||
|
||||
import com.github.codeql.utils.versions.getPsi2Ir
|
||||
import com.intellij.psi.PsiComment
|
||||
import com.intellij.psi.PsiElement
|
||||
import com.intellij.psi.PsiWhiteSpace
|
||||
import org.jetbrains.kotlin.config.KotlinCompilerVersion
|
||||
import org.jetbrains.kotlin.ir.IrElement
|
||||
import org.jetbrains.kotlin.ir.declarations.*
|
||||
import org.jetbrains.kotlin.kdoc.psi.api.KDocElement
|
||||
import org.jetbrains.kotlin.psi.KtCodeFragment
|
||||
import org.jetbrains.kotlin.psi.KtVisitor
|
||||
|
||||
class LinesOfCodePSI(
|
||||
val logger: FileLogger,
|
||||
val tw: FileTrapWriter,
|
||||
val file: IrFile
|
||||
) {
|
||||
val psi2Ir = getPsi2Ir().also {
|
||||
if (it == null) {
|
||||
logger.warn("Lines of code will not be populated as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})")
|
||||
}
|
||||
}
|
||||
|
||||
fun linesOfCodeInFile(id: Label<DbFile>): Boolean {
|
||||
if (psi2Ir == null) {
|
||||
return false
|
||||
}
|
||||
val ktFile = psi2Ir.getKtFile(file)
|
||||
if (ktFile == null) {
|
||||
return false
|
||||
}
|
||||
linesOfCodeInPsi(id, ktFile, file)
|
||||
// Even if linesOfCodeInPsi didn't manage to extract any
|
||||
// information, if we got as far as calling it then we have
|
||||
// PSI info for the file
|
||||
return true
|
||||
}
|
||||
|
||||
fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label<out DbSourceline>): Boolean {
|
||||
if (psi2Ir == null) {
|
||||
return false
|
||||
}
|
||||
val p = psi2Ir.findPsiElement(d, file)
|
||||
if (p == null) {
|
||||
return false
|
||||
}
|
||||
linesOfCodeInPsi(id, p, d)
|
||||
// Even if linesOfCodeInPsi didn't manage to extract any
|
||||
// information, if we got as far as calling it then we have
|
||||
// PSI info for the declaration
|
||||
return true
|
||||
}
|
||||
|
||||
private fun linesOfCodeInPsi(id: Label<out DbSourceline>, root: PsiElement, e: IrElement) {
|
||||
val document = root.getContainingFile().getViewProvider().getDocument()
|
||||
if (document == null) {
|
||||
logger.errorElement("Cannot find document for PSI", e)
|
||||
tw.writeNumlines(id, 0, 0, 0)
|
||||
return
|
||||
}
|
||||
|
||||
val rootRange = root.getTextRange()
|
||||
val rootStartOffset = rootRange.getStartOffset()
|
||||
val rootEndOffset = rootRange.getEndOffset()
|
||||
if (rootStartOffset < 0 || rootEndOffset < 0) {
|
||||
// This is synthetic, or has an invalid location
|
||||
tw.writeNumlines(id, 0, 0, 0)
|
||||
return
|
||||
}
|
||||
val rootFirstLine = document.getLineNumber(rootStartOffset)
|
||||
val rootLastLine = document.getLineNumber(rootEndOffset)
|
||||
if (rootLastLine < rootFirstLine) {
|
||||
logger.errorElement("PSI ends before it starts", e)
|
||||
tw.writeNumlines(id, 0, 0, 0)
|
||||
return
|
||||
}
|
||||
val numLines = 1 + rootLastLine - rootFirstLine
|
||||
val lineContents = Array(numLines) { LineContent() }
|
||||
|
||||
val visitor =
|
||||
object : KtVisitor<Unit, Unit>() {
|
||||
override fun visitElement(element: PsiElement) {
|
||||
val isComment = element is PsiComment
|
||||
// Comments may include nodes that aren't PsiComments,
|
||||
// so we don't want to visit them or we'll think they
|
||||
// are code.
|
||||
if (!isComment) {
|
||||
element.acceptChildren(this)
|
||||
}
|
||||
|
||||
if (element is PsiWhiteSpace) {
|
||||
return
|
||||
}
|
||||
// Leaf nodes are assumed to be tokens, and
|
||||
// therefore we count any lines that they are on.
|
||||
// For comments, we actually need to look at the
|
||||
// outermost node, as the leaves of KDocs don't
|
||||
// necessarily cover all lines.
|
||||
if (isComment || element.getChildren().size == 0) {
|
||||
val range = element.getTextRange()
|
||||
val startOffset = range.getStartOffset()
|
||||
val endOffset = range.getEndOffset()
|
||||
// The PSI doesn't seem to have anything like
|
||||
// the IR's UNDEFINED_OFFSET and SYNTHETIC_OFFSET,
|
||||
// but < 0 still seem to represent bad/unknown
|
||||
// locations.
|
||||
if (startOffset < 0 || endOffset < 0) {
|
||||
logger.errorElement("PSI element has negative offset", e)
|
||||
return
|
||||
}
|
||||
if (startOffset > endOffset) {
|
||||
logger.errorElement("PSI element has negative size", e)
|
||||
return
|
||||
}
|
||||
// We might get e.g. an import list for a file
|
||||
// with no imports, which claims to have start
|
||||
// and end offsets of 0. Anything of 0 width
|
||||
// we therefore just skip.
|
||||
if (startOffset == endOffset) {
|
||||
return
|
||||
}
|
||||
val firstLine = document.getLineNumber(startOffset)
|
||||
val lastLine = document.getLineNumber(endOffset)
|
||||
if (firstLine < rootFirstLine) {
|
||||
logger.errorElement("PSI element starts before root", e)
|
||||
return
|
||||
} else if (lastLine > rootLastLine) {
|
||||
logger.errorElement("PSI element ends after root", e)
|
||||
return
|
||||
}
|
||||
for (line in firstLine..lastLine) {
|
||||
val lineContent = lineContents[line - rootFirstLine]
|
||||
if (isComment) {
|
||||
lineContent.containsComment = true
|
||||
} else {
|
||||
lineContent.containsCode = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
root.accept(visitor)
|
||||
val code = lineContents.count { it.containsCode }
|
||||
val comment = lineContents.count { it.containsComment }
|
||||
tw.writeNumlines(id, numLines, code, comment)
|
||||
}
|
||||
|
||||
private class LineContent {
|
||||
var containsComment = false
|
||||
var containsCode = false
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.github.codeql
|
||||
|
||||
import org.jetbrains.kotlin.ir.declarations.*
|
||||
|
||||
class LinesOfCodeLighterAST(
|
||||
val logger: FileLogger,
|
||||
val tw: FileTrapWriter,
|
||||
val file: IrFile
|
||||
) {
|
||||
// We don't support LighterAST with old Kotlin versions
|
||||
fun linesOfCodeInFile(@Suppress("UNUSED_PARAMETER") id: Label<DbFile>): Boolean {
|
||||
return false
|
||||
}
|
||||
|
||||
// We don't support LighterAST with old Kotlin versions
|
||||
fun linesOfCodeInDeclaration(@Suppress("UNUSED_PARAMETER") d: IrDeclaration, @Suppress("UNUSED_PARAMETER") id: Label<out DbSourceline>): Boolean {
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -13,8 +13,6 @@ import org.jetbrains.kotlin.ir.visitors.acceptVoid
|
||||
import org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid
|
||||
import org.jetbrains.kotlin.kdoc.lexer.KDocTokens
|
||||
import org.jetbrains.kotlin.lexer.KtTokens
|
||||
import org.jetbrains.kotlin.psi.psiUtil.endOffset
|
||||
import org.jetbrains.kotlin.psi.psiUtil.startOffset
|
||||
import org.jetbrains.kotlin.util.getChildren
|
||||
|
||||
class CommentExtractorLighterAST(fileExtractor: KotlinFileExtractor, file: IrFile, fileLabel: Label<out DbFile>): CommentExtractor(fileExtractor, file, fileLabel) {
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
package com.github.codeql
|
||||
|
||||
import com.intellij.lang.LighterASTNode
|
||||
import com.intellij.util.diff.FlyweightCapableTreeStructure
|
||||
import org.jetbrains.kotlin.config.KotlinCompilerVersion
|
||||
import org.jetbrains.kotlin.fir.backend.FirMetadataSource
|
||||
import org.jetbrains.kotlin.ir.declarations.*
|
||||
import org.jetbrains.kotlin.ir.IrElement
|
||||
import org.jetbrains.kotlin.KtSourceElement
|
||||
import org.jetbrains.kotlin.lexer.KtTokens
|
||||
import org.jetbrains.kotlin.util.getChildren
|
||||
|
||||
class LinesOfCodeLighterAST(
|
||||
val logger: FileLogger,
|
||||
val tw: FileTrapWriter,
|
||||
val file: IrFile
|
||||
) {
|
||||
val fileEntry = file.fileEntry
|
||||
|
||||
fun linesOfCodeInFile(id: Label<DbFile>): Boolean {
|
||||
val sourceElement = (file.metadata as? FirMetadataSource.File)?.files?.elementAtOrNull(0)?.source
|
||||
if (sourceElement == null) {
|
||||
return false
|
||||
}
|
||||
linesOfCodeInLighterAST(id, file, sourceElement)
|
||||
// Even if linesOfCodeInLighterAST didn't manage to extract any
|
||||
// information, if we got as far as calling it then we have
|
||||
// LighterAST info for the file
|
||||
return true
|
||||
}
|
||||
|
||||
fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label<out DbSourceline>): Boolean {
|
||||
val metadata = (d as? IrMetadataSourceOwner)?.metadata
|
||||
val sourceElement = (metadata as? FirMetadataSource)?.fir?.source
|
||||
if (sourceElement == null) {
|
||||
return false
|
||||
}
|
||||
linesOfCodeInLighterAST(id, d, sourceElement)
|
||||
// Even if linesOfCodeInLighterAST didn't manage to extract any
|
||||
// information, if we got as far as calling it then we have
|
||||
// LighterAST info for the declaration
|
||||
return true
|
||||
}
|
||||
|
||||
private fun linesOfCodeInLighterAST(id: Label<out DbSourceline>, e: IrElement, s: KtSourceElement) {
|
||||
val rootStartOffset = s.startOffset
|
||||
val rootEndOffset = s.endOffset
|
||||
if (rootStartOffset < 0 || rootEndOffset < 0) {
|
||||
// This is synthetic, or has an invalid location
|
||||
tw.writeNumlines(id, 0, 0, 0)
|
||||
return
|
||||
}
|
||||
val rootFirstLine = fileEntry.getLineNumber(rootStartOffset)
|
||||
val rootLastLine = fileEntry.getLineNumber(rootEndOffset)
|
||||
if (rootLastLine < rootFirstLine) {
|
||||
logger.errorElement("Source element ends before it starts", e)
|
||||
tw.writeNumlines(id, 0, 0, 0)
|
||||
return
|
||||
}
|
||||
|
||||
val numLines = 1 + rootLastLine - rootFirstLine
|
||||
val lineContents = Array(numLines) { LineContent() }
|
||||
|
||||
val treeStructure = s.treeStructure
|
||||
|
||||
processSubtree(e, treeStructure, rootFirstLine, rootLastLine, lineContents, s.lighterASTNode)
|
||||
|
||||
val code = lineContents.count { it.containsCode }
|
||||
val comment = lineContents.count { it.containsComment }
|
||||
tw.writeNumlines(id, numLines, code, comment)
|
||||
}
|
||||
|
||||
private fun processSubtree(e: IrElement, treeStructure: FlyweightCapableTreeStructure<LighterASTNode>, rootFirstLine: Int, rootLastLine: Int, lineContents: Array<LineContent>, node: LighterASTNode) {
|
||||
if (KtTokens.WHITESPACES.contains(node.tokenType)) {
|
||||
return
|
||||
}
|
||||
|
||||
val isComment = KtTokens.COMMENTS.contains(node.tokenType)
|
||||
val children = node.getChildren(treeStructure)
|
||||
|
||||
// Leaf nodes are assumed to be tokens, and
|
||||
// therefore we count any lines that they are on.
|
||||
// For comments, we actually need to look at the
|
||||
// outermost node, as the leaves of KDocs don't
|
||||
// necessarily cover all lines.
|
||||
if (isComment || children.isEmpty()) {
|
||||
val startOffset = node.getStartOffset()
|
||||
val endOffset = node.getEndOffset()
|
||||
if (startOffset < 0 || endOffset < 0) {
|
||||
logger.errorElement("LighterAST node has negative offset", e)
|
||||
return
|
||||
}
|
||||
if (startOffset > endOffset) {
|
||||
logger.errorElement("LighterAST node has negative size", e)
|
||||
return
|
||||
}
|
||||
// This may not be possible with LighterAST, but:
|
||||
// We might get e.g. an import list for a file
|
||||
// with no imports, which claims to have start
|
||||
// and end offsets of 0. Anything of 0 width
|
||||
// we therefore just skip.
|
||||
if (startOffset == endOffset) {
|
||||
return
|
||||
}
|
||||
val firstLine = fileEntry.getLineNumber(startOffset)
|
||||
val lastLine = fileEntry.getLineNumber(endOffset)
|
||||
if (firstLine < rootFirstLine) {
|
||||
logger.errorElement("LighterAST element starts before root", e)
|
||||
return
|
||||
} else if (lastLine > rootLastLine) {
|
||||
logger.errorElement("LighterAST element ends after root", e)
|
||||
return
|
||||
}
|
||||
for (line in firstLine..lastLine) {
|
||||
val lineContent = lineContents[line - rootFirstLine]
|
||||
if (isComment) {
|
||||
lineContent.containsComment = true
|
||||
} else {
|
||||
lineContent.containsCode = true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(child in children) {
|
||||
processSubtree(e, treeStructure, rootFirstLine, rootLastLine, lineContents, child)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class LineContent {
|
||||
var containsComment = false
|
||||
var containsCode = false
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,7 @@
|
||||
| Number of files with extension jar | 1 |
|
||||
| Number of files with extension kt | 1 |
|
||||
| Number of lines of code | 2 |
|
||||
| Number of lines of code with extension kt | 2 |
|
||||
| Total number of lines | 3 |
|
||||
| Total number of lines with extension kt | 3 |
|
||||
| Uses Kotlin 2: true | 1 |
|
||||
|
||||
Reference in New Issue
Block a user