Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing function signature of parse to accept the file content instead of a file #1706

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ package de.fraunhofer.aisec.cpg

import de.fraunhofer.aisec.cpg.frontends.Language
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.frontends.SupportsNewParse
import de.fraunhofer.aisec.cpg.frontends.SupportsParallelParsing
import de.fraunhofer.aisec.cpg.frontends.TranslationException
import de.fraunhofer.aisec.cpg.graph.Component
Expand All @@ -43,7 +44,10 @@ import java.util.concurrent.CompletableFuture
import java.util.concurrent.CompletionException
import java.util.concurrent.ExecutionException
import java.util.concurrent.atomic.AtomicBoolean
import kotlin.io.path.absolute
import kotlin.io.path.readText
import kotlin.reflect.full.findAnnotation
import kotlin.time.DurationUnit
import org.slf4j.LoggerFactory

/** Main entry point for all source code translation for all language front-ends. */
Expand Down Expand Up @@ -116,6 +120,15 @@ private constructor(
}
}

log.info(
"Translated {} LoC in total ({} / LoC)",
result.stats.totalLinesOfCode,
(outerBench.duration / result.stats.totalLinesOfCode).toString(
DurationUnit.MILLISECONDS,
decimals = 3
)
)

return result
}

Expand Down Expand Up @@ -276,7 +289,7 @@ private constructor(
val future =
CompletableFuture.supplyAsync {
try {
return@supplyAsync parse(component, ctx, sourceLocation)
return@supplyAsync parse(component, result, ctx, sourceLocation)
} catch (e: TranslationException) {
throw RuntimeException("Error parsing $sourceLocation", e)
}
Expand Down Expand Up @@ -337,7 +350,7 @@ private constructor(

for (sourceLocation in sourceLocations) {
ctx.currentComponent = component
val f = parse(component, ctx, sourceLocation)
val f = parse(component, result, ctx, sourceLocation)
if (f != null) {
handleCompletion(result, usedFrontends, sourceLocation, f)
}
Expand Down Expand Up @@ -365,6 +378,7 @@ private constructor(
@Throws(TranslationException::class)
private fun parse(
component: Component,
result: TranslationResult,
ctx: TranslationContext,
sourceLocation: File,
): LanguageFrontend<*, *>? {
Expand All @@ -384,7 +398,30 @@ private constructor(
}
return null
}
component.addTranslationUnit(frontend.parse(sourceLocation))

// Check, if the frontend supports the new API
var tu =
if (frontend is SupportsNewParse) {
// Read the file contents and supply it to the frontend. This gives us a chance
// to do some statistics here, for example on the lines of code. For now, we
// just print it, in a future PR we will gather this information and consolidate
// it.
var path = sourceLocation.toPath().absolute()
var content = path.readText()
var linesOfCode = content.linesOfCode

log.info("{} has {} LoC", path, linesOfCode)

var tu = frontend.parse(content, path)

// Add the LoC. This needs to be synchronized on the stats object, because of
// parallel parsing
synchronized(result.stats) { result.stats.totalLinesOfCode += linesOfCode }
tu
} else {
frontend.parse(sourceLocation)
}
component.addTranslationUnit(tu)
} catch (ex: TranslationException) {
log.error("An error occurred during parsing of ${sourceLocation.name}: ${ex.message}")
if (config.failOnError) {
Expand Down Expand Up @@ -462,3 +499,12 @@ private constructor(
}
}
}

/**
* This returns a VERY trivial count of the lines of code (mainly just the line count). This can be
* extended to a real LoC algorithm at some point.
*/
val String.linesOfCode: Int
get() {
return this.count { it == '\n' }
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class TranslationResult(
return finalCtx
}

var stats = TranslationStats()

/**
* Checks if only a single software component has been analyzed and returns its translation
* units. For multiple software components, it aggregates the results.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) 2024, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg

import de.fraunhofer.aisec.cpg.helpers.MeasurementHolder
import de.fraunhofer.aisec.cpg.helpers.StatisticsHolder

/**
* This class provides some statistics about our translation process. At some point this will fully
* replace [StatisticsHolder] and [MeasurementHolder]
*/
class TranslationStats {

/** The total lines of code that were translated into the CPG. */
var totalLinesOfCode: Int = 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2024, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg.frontends

import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration
import java.nio.file.Path

interface SupportsNewParse {
/**
* Parses the given [content] with the language frontend into a [TranslationUnitDeclaration]. If
* known, a [path] should be specified, so that the language frontend can potentially use more
* advanced features like module resolution.
*/
fun parse(content: String, path: Path? = null): TranslationUnitDeclaration
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ import java.nio.file.Path
import java.time.Duration
import java.time.Instant
import java.util.*
import kotlin.time.DurationUnit
import kotlin.time.toDuration
import org.slf4j.Logger
import org.slf4j.LoggerFactory

Expand Down Expand Up @@ -146,6 +148,7 @@ constructor(
) : MeasurementHolder(c, message, debug, holder) {

private val start: Instant
var duration: kotlin.time.Duration = kotlin.time.Duration.ZERO

/** Stops this benchmark and adds its measurement to the its [StatisticsHolder]. */
fun stop() {
Expand All @@ -154,14 +157,17 @@ constructor(

/** Stops the time and computes the difference between */
override fun addMeasurement(measurementKey: String?, measurementValue: String?): Any? {
val duration = Duration.between(start, Instant.now()).toMillis()
var duration = Duration.between(start, Instant.now()).toMillis()
measurements["${caller}: $message"] = "$duration ms"

logDebugMsg("$caller: $message done in $duration ms")

// update our holder, if we have any
holder?.addBenchmark(this)

// update our internal duration so that others can access it
this.duration = duration.toDuration(DurationUnit.MILLISECONDS)

return duration
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ import java.net.URI
import java.util.*

/** A SARIF compatible location referring to a location, i.e. file and region within the file. */
class PhysicalLocation(uri: URI, region: Region) {
class ArtifactLocation(val uri: URI) {
class PhysicalLocation(uri: URI?, region: Region) {
class ArtifactLocation(val uri: URI?) {

override fun toString(): String {
return uri.path.substring(uri.path.lastIndexOf('/') + 1)
return if (uri != null) {
uri.path
} else {
"unknown"
}
}

override fun equals(other: Any?): Boolean {
Expand All @@ -45,7 +49,7 @@ class PhysicalLocation(uri: URI, region: Region) {
override fun hashCode() = Objects.hashCode(uri)
}

val artifactLocation: ArtifactLocation
var artifactLocation: ArtifactLocation
var region: Region

init {
Expand All @@ -68,11 +72,7 @@ class PhysicalLocation(uri: URI, region: Region) {
companion object {
fun locationLink(location: PhysicalLocation?): String {
return if (location != null) {
(location.artifactLocation.uri.path +
":" +
location.region.startLine +
":" +
location.region.startColumn)
"${location.artifactLocation}:${location.region.startLine}:${location.region.startColumn}"
} else "unknown"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ package de.fraunhofer.aisec.cpg.frontends.python
import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.frontends.Language
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.frontends.SupportsNewParse
import de.fraunhofer.aisec.cpg.frontends.TranslationException
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration
Expand All @@ -39,15 +40,16 @@ import de.fraunhofer.aisec.cpg.passes.configuration.RegisterExtraPass
import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation
import de.fraunhofer.aisec.cpg.sarif.Region
import java.io.File
import java.net.URI
import java.nio.file.Path
import jep.python.PyObject
import kotlin.io.path.Path
import kotlin.io.path.absolute
import kotlin.io.path.name
import kotlin.io.path.nameWithoutExtension
import kotlin.math.min

@RegisterExtraPass(PythonAddDeclarationsPass::class)
class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: TranslationContext) :
LanguageFrontend<Python.AST.AST, Python.AST.AST?>(language, ctx) {
LanguageFrontend<Python.AST.AST, Python.AST.AST?>(language, ctx), SupportsNewParse {
private val lineSeparator = '\n' // TODO
private val tokenTypeIndex = 0
private val jep = JepSingleton // configure Jep
Expand All @@ -62,21 +64,32 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
* new [PythonLanguageFrontend] instance per file.
*/
private lateinit var fileContent: String
private lateinit var uri: URI
private var filePath: Path? = null

@Throws(TranslationException::class)
override fun parse(file: File): TranslationUnitDeclaration {
fileContent = file.readText(Charsets.UTF_8)
uri = file.toURI()
return parse(file.readText(Charsets.UTF_8), file.toPath())
maximiliankaul marked this conversation as resolved.
Show resolved Hide resolved
}

override fun parse(content: String, path: Path?): TranslationUnitDeclaration {
this.fileContent = content
this.filePath = path

jep.getInterp().use {
it.set("content", fileContent)
it.set("filename", file.absolutePath)
it.set("content", content)
it.set(
"filename",
if (path != null) {
path.absolute().toString()
} else {
"<unknown>"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To we want this constant for cpg consistency? Python recommends <string> see https://docs.python.org/3/library/functions.html#compile

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are using ast.Parse and the default parameter here was <unknown>. See https://docs.python.org/3/library/ast.html#ast.parse

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Funny, that the ast.parse doc says:

Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).

But I'm ok with the "unknown". This should probably a sane constant for all frontends if there is no file available?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Funny, that the ast.parse doc says:

Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).

But I'm ok with the "unknown". This should probably a sane constant for all frontends if there is no file available?

I was intentionally using the python specific value here, but I am fine with either way. Not sure if the SARIF standard defines something in this case, since we are using the location information from SARIF.

}
)
it.exec("import ast")
it.exec("parsed = ast.parse(content, filename=filename, type_comments=True)")

val pyAST = it.getValue("parsed") as PyObject
val tud = pythonASTtoCPG(pyAST, file.name)
val tud = pythonASTtoCPG(pyAST, path)

if (config.matchCommentsToNodes) {
it.exec("import tokenize")
Expand Down Expand Up @@ -236,7 +249,7 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
override fun locationOf(astNode: Python.AST.AST): PhysicalLocation? {
return if (astNode is Python.AST.WithLocation) {
PhysicalLocation(
uri,
filePath?.toUri(),
Region(
startLine = astNode.lineno,
endLine = astNode.end_lineno,
Expand All @@ -253,17 +266,22 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
// will be invoked by native function
}

private fun pythonASTtoCPG(pyAST: PyObject, path: String): TranslationUnitDeclaration {
private fun pythonASTtoCPG(pyAST: PyObject, path: Path?): TranslationUnitDeclaration {
val pythonASTModule =
fromPython(pyAST) as? Python.AST.Module
?: TODO(
"Python ast of type ${fromPython(pyAST).javaClass} is not supported yet"
) // could be one of "ast.{Module,Interactive,Expression,FunctionType}

val tud = newTranslationUnitDeclaration(path, rawNode = pythonASTModule)
val tud = newTranslationUnitDeclaration(path?.name, rawNode = pythonASTModule)
scopeManager.resetToGlobal(tud)

val nsdName = Path(path).nameWithoutExtension
val nsdName =
if (path != null) {
path.nameWithoutExtension
} else {
"unknown"
}
val nsd = newNamespaceDeclaration(nsdName, rawNode = pythonASTModule)
tud.addDeclaration(nsd)

Expand Down
Loading
Loading