diff --git a/java/kotlin-extractor/src/main/kotlin/KotlinFileExtractor.kt b/java/kotlin-extractor/src/main/kotlin/KotlinFileExtractor.kt index 3458ec20244..264f3f5dbbb 100644 --- a/java/kotlin-extractor/src/main/kotlin/KotlinFileExtractor.kt +++ b/java/kotlin-extractor/src/main/kotlin/KotlinFileExtractor.kt @@ -133,7 +133,7 @@ open class KotlinFileExtractor( val lighterAstCommentsExtracted = CommentExtractorLighterAST(this, file, tw.fileId).extract() if (psiCommentsExtracted == lighterAstCommentsExtracted) { if (psiCommentsExtracted) { - logger.warnElement("Found both PSI and LightAST comments in ${file.path}.", file) + logger.warnElement("Found both PSI and LighterAST comments in ${file.path}.", file) } else { logger.warnElement("Comments could not be processed in ${file.path}.", file) } diff --git a/java/kotlin-extractor/src/main/kotlin/LinesOfCode.kt b/java/kotlin-extractor/src/main/kotlin/LinesOfCode.kt index 27b62c86109..41b83654d7b 100644 --- a/java/kotlin-extractor/src/main/kotlin/LinesOfCode.kt +++ b/java/kotlin-extractor/src/main/kotlin/LinesOfCode.kt @@ -1,138 +1,28 @@ package com.github.codeql -import com.github.codeql.utils.versions.getPsi2Ir -import com.intellij.psi.PsiComment -import com.intellij.psi.PsiElement -import com.intellij.psi.PsiWhiteSpace -import org.jetbrains.kotlin.config.KotlinCompilerVersion -import org.jetbrains.kotlin.ir.IrElement import org.jetbrains.kotlin.ir.declarations.* -import org.jetbrains.kotlin.kdoc.psi.api.KDocElement -import org.jetbrains.kotlin.psi.KtCodeFragment -import org.jetbrains.kotlin.psi.KtVisitor class LinesOfCode( val logger: FileLogger, val tw: FileTrapWriter, val file: IrFile ) { - val psi2Ir = getPsi2Ir().also { - if (it == null) { - logger.warn("Lines of code will not be populated as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})") - } - } + val linesOfCodePSI = LinesOfCodePSI(logger, tw, file) + val linesOfCodeLighterAST = LinesOfCodeLighterAST(logger, tw, file) fun linesOfCodeInFile(id: Label) { - if (psi2Ir == null) { - return + val psiExtracted = linesOfCodePSI.linesOfCodeInFile(id) + val lighterASTExtracted = linesOfCodeLighterAST.linesOfCodeInFile(id) + if (psiExtracted && lighterASTExtracted) { + logger.warnElement("Both PSI and LighterAST number-of-lines-in-file information for ${file.path}.", file) } - val ktFile = psi2Ir.getKtFile(file) - if (ktFile == null) { - return - } - linesOfCodeInPsi(id, ktFile, file) } fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label) { - if (psi2Ir == null) { - return + val psiExtracted = linesOfCodePSI.linesOfCodeInDeclaration(d, id) + val lighterASTExtracted = linesOfCodeLighterAST.linesOfCodeInDeclaration(d, id) + if (psiExtracted && lighterASTExtracted) { + logger.warnElement("Both PSI and LighterAST number-of-lines-in-file information for declaration.", d) } - val p = psi2Ir.findPsiElement(d, file) - if (p == null) { - return - } - linesOfCodeInPsi(id, p, d) - } - - private fun linesOfCodeInPsi(id: Label, root: PsiElement, e: IrElement) { - val document = root.getContainingFile().getViewProvider().getDocument() - if (document == null) { - logger.errorElement("Cannot find document for PSI", e) - tw.writeNumlines(id, 0, 0, 0) - return - } - - val rootRange = root.getTextRange() - val rootFirstLine = document.getLineNumber(rootRange.getStartOffset()) - val rootLastLine = document.getLineNumber(rootRange.getEndOffset()) - if (rootLastLine < rootFirstLine) { - logger.errorElement("PSI ends before it starts", e) - tw.writeNumlines(id, 0, 0, 0) - return - } - val numLines = 1 + rootLastLine - rootFirstLine - val lineContents = Array(numLines) { LineContent() } - - val visitor = - object : KtVisitor() { - override fun visitElement(element: PsiElement) { - val isComment = element is PsiComment - // Comments may include nodes that aren't PsiComments, - // so we don't want to visit them or we'll think they - // are code. - if (!isComment) { - element.acceptChildren(this) - } - - if (element is PsiWhiteSpace) { - return - } - // Leaf nodes are assumed to be tokens, and - // therefore we count any lines that they are on. - // For comments, we actually need to look at the - // outermost node, as the leaves of KDocs don't - // necessarily cover all lines. - if (isComment || element.getChildren().size == 0) { - val range = element.getTextRange() - val startOffset = range.getStartOffset() - val endOffset = range.getEndOffset() - // The PSI doesn't seem to have anything like - // the IR's UNDEFINED_OFFSET and SYNTHETIC_OFFSET, - // but < 0 still seem to represent bad/unknown - // locations. - if (startOffset < 0 || endOffset < 0) { - logger.errorElement("PSI has negative offset", e) - return - } - if (startOffset > endOffset) { - return - } - // We might get e.g. an import list for a file - // with no imports, which claims to have start - // and end offsets of 0. Anything of 0 width - // we therefore just skip. - if (startOffset == endOffset) { - return - } - val firstLine = document.getLineNumber(startOffset) - val lastLine = document.getLineNumber(endOffset) - if (firstLine < rootFirstLine) { - logger.errorElement("PSI element starts before root", e) - return - } else if (lastLine > rootLastLine) { - logger.errorElement("PSI element ends after root", e) - return - } - for (line in firstLine..lastLine) { - val lineContent = lineContents[line - rootFirstLine] - if (isComment) { - lineContent.containsComment = true - } else { - lineContent.containsCode = true - } - } - } - } - } - root.accept(visitor) - val total = lineContents.size - val code = lineContents.count { it.containsCode } - val comment = lineContents.count { it.containsComment } - tw.writeNumlines(id, total, code, comment) - } - - private class LineContent { - var containsComment = false - var containsCode = false } } diff --git a/java/kotlin-extractor/src/main/kotlin/LinesOfCodePSI.kt b/java/kotlin-extractor/src/main/kotlin/LinesOfCodePSI.kt new file mode 100644 index 00000000000..858241f549e --- /dev/null +++ b/java/kotlin-extractor/src/main/kotlin/LinesOfCodePSI.kt @@ -0,0 +1,153 @@ +package com.github.codeql + +import com.github.codeql.utils.versions.getPsi2Ir +import com.intellij.psi.PsiComment +import com.intellij.psi.PsiElement +import com.intellij.psi.PsiWhiteSpace +import org.jetbrains.kotlin.config.KotlinCompilerVersion +import org.jetbrains.kotlin.ir.IrElement +import org.jetbrains.kotlin.ir.declarations.* +import org.jetbrains.kotlin.kdoc.psi.api.KDocElement +import org.jetbrains.kotlin.psi.KtCodeFragment +import org.jetbrains.kotlin.psi.KtVisitor + +class LinesOfCodePSI( + val logger: FileLogger, + val tw: FileTrapWriter, + val file: IrFile +) { + val psi2Ir = getPsi2Ir().also { + if (it == null) { + logger.warn("Lines of code will not be populated as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})") + } + } + + fun linesOfCodeInFile(id: Label): Boolean { + if (psi2Ir == null) { + return false + } + val ktFile = psi2Ir.getKtFile(file) + if (ktFile == null) { + return false + } + linesOfCodeInPsi(id, ktFile, file) + // Even if linesOfCodeInPsi didn't manage to extract any + // information, if we got as far as calling it then we have + // PSI info for the file + return true + } + + fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label): Boolean { + if (psi2Ir == null) { + return false + } + val p = psi2Ir.findPsiElement(d, file) + if (p == null) { + return false + } + linesOfCodeInPsi(id, p, d) + // Even if linesOfCodeInPsi didn't manage to extract any + // information, if we got as far as calling it then we have + // PSI info for the declaration + return true + } + + private fun linesOfCodeInPsi(id: Label, root: PsiElement, e: IrElement) { + val document = root.getContainingFile().getViewProvider().getDocument() + if (document == null) { + logger.errorElement("Cannot find document for PSI", e) + tw.writeNumlines(id, 0, 0, 0) + return + } + + val rootRange = root.getTextRange() + val rootStartOffset = rootRange.getStartOffset() + val rootEndOffset = rootRange.getEndOffset() + if (rootStartOffset < 0 || rootEndOffset < 0) { + // This is synthetic, or has an invalid location + tw.writeNumlines(id, 0, 0, 0) + return + } + val rootFirstLine = document.getLineNumber(rootStartOffset) + val rootLastLine = document.getLineNumber(rootEndOffset) + if (rootLastLine < rootFirstLine) { + logger.errorElement("PSI ends before it starts", e) + tw.writeNumlines(id, 0, 0, 0) + return + } + val numLines = 1 + rootLastLine - rootFirstLine + val lineContents = Array(numLines) { LineContent() } + + val visitor = + object : KtVisitor() { + override fun visitElement(element: PsiElement) { + val isComment = element is PsiComment + // Comments may include nodes that aren't PsiComments, + // so we don't want to visit them or we'll think they + // are code. + if (!isComment) { + element.acceptChildren(this) + } + + if (element is PsiWhiteSpace) { + return + } + // Leaf nodes are assumed to be tokens, and + // therefore we count any lines that they are on. + // For comments, we actually need to look at the + // outermost node, as the leaves of KDocs don't + // necessarily cover all lines. + if (isComment || element.getChildren().size == 0) { + val range = element.getTextRange() + val startOffset = range.getStartOffset() + val endOffset = range.getEndOffset() + // The PSI doesn't seem to have anything like + // the IR's UNDEFINED_OFFSET and SYNTHETIC_OFFSET, + // but < 0 still seem to represent bad/unknown + // locations. + if (startOffset < 0 || endOffset < 0) { + logger.errorElement("PSI element has negative offset", e) + return + } + if (startOffset > endOffset) { + logger.errorElement("PSI element has negative size", e) + return + } + // We might get e.g. an import list for a file + // with no imports, which claims to have start + // and end offsets of 0. Anything of 0 width + // we therefore just skip. + if (startOffset == endOffset) { + return + } + val firstLine = document.getLineNumber(startOffset) + val lastLine = document.getLineNumber(endOffset) + if (firstLine < rootFirstLine) { + logger.errorElement("PSI element starts before root", e) + return + } else if (lastLine > rootLastLine) { + logger.errorElement("PSI element ends after root", e) + return + } + for (line in firstLine..lastLine) { + val lineContent = lineContents[line - rootFirstLine] + if (isComment) { + lineContent.containsComment = true + } else { + lineContent.containsCode = true + } + } + } + } + } + root.accept(visitor) + val code = lineContents.count { it.containsCode } + val comment = lineContents.count { it.containsComment } + tw.writeNumlines(id, numLines, code, comment) + } + + private class LineContent { + var containsComment = false + var containsCode = false + } +} diff --git a/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_5_0/LinesOfCodeLighterAST.kt b/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_5_0/LinesOfCodeLighterAST.kt new file mode 100644 index 00000000000..07b8b84c062 --- /dev/null +++ b/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_5_0/LinesOfCodeLighterAST.kt @@ -0,0 +1,19 @@ +package com.github.codeql + +import org.jetbrains.kotlin.ir.declarations.* + +class LinesOfCodeLighterAST( + val logger: FileLogger, + val tw: FileTrapWriter, + val file: IrFile +) { + // We don't support LighterAST with old Kotlin versions + fun linesOfCodeInFile(@Suppress("UNUSED_PARAMETER") id: Label): Boolean { + return false + } + + // We don't support LighterAST with old Kotlin versions + fun linesOfCodeInDeclaration(@Suppress("UNUSED_PARAMETER") d: IrDeclaration, @Suppress("UNUSED_PARAMETER") id: Label): Boolean { + return false + } +} diff --git a/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_9_0-Beta/CommentExtractorLighterAST.kt b/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_9_0-Beta/CommentExtractorLighterAST.kt index ec3b1602ed1..35610ee163d 100644 --- a/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_9_0-Beta/CommentExtractorLighterAST.kt +++ b/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_9_0-Beta/CommentExtractorLighterAST.kt @@ -13,8 +13,6 @@ import org.jetbrains.kotlin.ir.visitors.acceptVoid import org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid import org.jetbrains.kotlin.kdoc.lexer.KDocTokens import org.jetbrains.kotlin.lexer.KtTokens -import org.jetbrains.kotlin.psi.psiUtil.endOffset -import org.jetbrains.kotlin.psi.psiUtil.startOffset import org.jetbrains.kotlin.util.getChildren class CommentExtractorLighterAST(fileExtractor: KotlinFileExtractor, file: IrFile, fileLabel: Label): CommentExtractor(fileExtractor, file, fileLabel) { diff --git a/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_9_0-Beta/LinesOfCodeLighterAST.kt b/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_9_0-Beta/LinesOfCodeLighterAST.kt new file mode 100644 index 00000000000..49482f2c2f1 --- /dev/null +++ b/java/kotlin-extractor/src/main/kotlin/utils/versions/v_1_9_0-Beta/LinesOfCodeLighterAST.kt @@ -0,0 +1,133 @@ +package com.github.codeql + +import com.intellij.lang.LighterASTNode +import com.intellij.util.diff.FlyweightCapableTreeStructure +import org.jetbrains.kotlin.config.KotlinCompilerVersion +import org.jetbrains.kotlin.fir.backend.FirMetadataSource +import org.jetbrains.kotlin.ir.declarations.* +import org.jetbrains.kotlin.ir.IrElement +import org.jetbrains.kotlin.KtSourceElement +import org.jetbrains.kotlin.lexer.KtTokens +import org.jetbrains.kotlin.util.getChildren + +class LinesOfCodeLighterAST( + val logger: FileLogger, + val tw: FileTrapWriter, + val file: IrFile +) { + val fileEntry = file.fileEntry + + fun linesOfCodeInFile(id: Label): Boolean { + val sourceElement = (file.metadata as? FirMetadataSource.File)?.files?.elementAtOrNull(0)?.source + if (sourceElement == null) { + return false + } + linesOfCodeInLighterAST(id, file, sourceElement) + // Even if linesOfCodeInLighterAST didn't manage to extract any + // information, if we got as far as calling it then we have + // LighterAST info for the file + return true + } + + fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label): Boolean { + val metadata = (d as? IrMetadataSourceOwner)?.metadata + val sourceElement = (metadata as? FirMetadataSource)?.fir?.source + if (sourceElement == null) { + return false + } + linesOfCodeInLighterAST(id, d, sourceElement) + // Even if linesOfCodeInLighterAST didn't manage to extract any + // information, if we got as far as calling it then we have + // LighterAST info for the declaration + return true + } + + private fun linesOfCodeInLighterAST(id: Label, e: IrElement, s: KtSourceElement) { + val rootStartOffset = s.startOffset + val rootEndOffset = s.endOffset + if (rootStartOffset < 0 || rootEndOffset < 0) { + // This is synthetic, or has an invalid location + tw.writeNumlines(id, 0, 0, 0) + return + } + val rootFirstLine = fileEntry.getLineNumber(rootStartOffset) + val rootLastLine = fileEntry.getLineNumber(rootEndOffset) + if (rootLastLine < rootFirstLine) { + logger.errorElement("Source element ends before it starts", e) + tw.writeNumlines(id, 0, 0, 0) + return + } + + val numLines = 1 + rootLastLine - rootFirstLine + val lineContents = Array(numLines) { LineContent() } + + val treeStructure = s.treeStructure + + processSubtree(e, treeStructure, rootFirstLine, rootLastLine, lineContents, s.lighterASTNode) + + val code = lineContents.count { it.containsCode } + val comment = lineContents.count { it.containsComment } + tw.writeNumlines(id, numLines, code, comment) + } + + private fun processSubtree(e: IrElement, treeStructure: FlyweightCapableTreeStructure, rootFirstLine: Int, rootLastLine: Int, lineContents: Array, node: LighterASTNode) { + if (KtTokens.WHITESPACES.contains(node.tokenType)) { + return + } + + val isComment = KtTokens.COMMENTS.contains(node.tokenType) + val children = node.getChildren(treeStructure) + + // Leaf nodes are assumed to be tokens, and + // therefore we count any lines that they are on. + // For comments, we actually need to look at the + // outermost node, as the leaves of KDocs don't + // necessarily cover all lines. + if (isComment || children.isEmpty()) { + val startOffset = node.getStartOffset() + val endOffset = node.getEndOffset() + if (startOffset < 0 || endOffset < 0) { + logger.errorElement("LighterAST node has negative offset", e) + return + } + if (startOffset > endOffset) { + logger.errorElement("LighterAST node has negative size", e) + return + } + // This may not be possible with LighterAST, but: + // We might get e.g. an import list for a file + // with no imports, which claims to have start + // and end offsets of 0. Anything of 0 width + // we therefore just skip. + if (startOffset == endOffset) { + return + } + val firstLine = fileEntry.getLineNumber(startOffset) + val lastLine = fileEntry.getLineNumber(endOffset) + if (firstLine < rootFirstLine) { + logger.errorElement("LighterAST element starts before root", e) + return + } else if (lastLine > rootLastLine) { + logger.errorElement("LighterAST element ends after root", e) + return + } + for (line in firstLine..lastLine) { + val lineContent = lineContents[line - rootFirstLine] + if (isComment) { + lineContent.containsComment = true + } else { + lineContent.containsCode = true + } + } + } else { + for(child in children) { + processSubtree(e, treeStructure, rootFirstLine, rootLastLine, lineContents, child) + } + } + } + + private class LineContent { + var containsComment = false + var containsCode = false + } +} diff --git a/java/ql/integration-tests/all-platforms/kotlin/extractor_information_kotlin2/ExtractorInformation.expected b/java/ql/integration-tests/all-platforms/kotlin/extractor_information_kotlin2/ExtractorInformation.expected index e1fe4c6ee39..b743aae2faf 100644 --- a/java/ql/integration-tests/all-platforms/kotlin/extractor_information_kotlin2/ExtractorInformation.expected +++ b/java/ql/integration-tests/all-platforms/kotlin/extractor_information_kotlin2/ExtractorInformation.expected @@ -1,3 +1,7 @@ | Number of files with extension jar | 1 | | Number of files with extension kt | 1 | +| Number of lines of code | 2 | +| Number of lines of code with extension kt | 2 | +| Total number of lines | 3 | +| Total number of lines with extension kt | 3 | | Uses Kotlin 2: true | 1 |