Skip to content

Commit

Permalink
Add file content for php2cpg with broken unicode (joernio#4428)
Browse files Browse the repository at this point in the history
  • Loading branch information
johannescoetzee authored Apr 5, 2024
1 parent aa68968 commit 35cd824
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal
import org.slf4j.LoggerFactory
import overflowdb.BatchedUpdate

class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidation: ValidationMode)
extends AstCreatorBase(filename)
class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String], disableFileContent: Boolean)(implicit
withSchemaValidation: ValidationMode
) extends AstCreatorBase(filename)
with AstNodeBuilder[PhpNode, AstCreator] {

private val logger = LoggerFactory.getLogger(AstCreator.getClass)
Expand Down Expand Up @@ -71,6 +72,9 @@ class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidatio
}

private def astForPhpFile(file: PhpFile): Ast = {
val fileNode = NewFile().name(filename)
fileContent.foreach(fileNode.content(_))

scope.pushNewScope(globalNamespace)

val (globalDeclStmts, globalMethodStmts) =
Expand All @@ -94,7 +98,7 @@ class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidatio

scope.popScope() // globalNamespace

Ast(globalNamespace).withChild(globalTypeDeclAst)
Ast(fileNode).withChild(Ast(globalNamespace).withChild(globalTypeDeclAst))
}

private def astsForStmt(stmt: PhpStmt): List[Ast] = {
Expand Down Expand Up @@ -1701,6 +1705,12 @@ class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidatio
protected def lineEnd(phpNode: PhpNode): Option[Integer] = None
protected def columnEnd(phpNode: PhpNode): Option[Integer] = None
protected def code(phpNode: PhpNode): String = "" // Sadly, the Php AST does not carry any code fields

override protected def offset(phpNode: PhpNode): Option[(Int, Int)] = {
Option.when(!disableFileContent) {
(phpNode.attributes.startFilePos, phpNode.attributes.endFilePos)
}
}
}

object AstCreator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,18 @@ object Domain {
// Used for creating the default constructor.
val ConstructorMethodName = "__construct"

final case class PhpAttributes(lineNumber: Option[Integer], kind: Option[Int])
final case class PhpAttributes(lineNumber: Option[Integer], kind: Option[Int], startFilePos: Int, endFilePos: Int)
object PhpAttributes {
val Empty: PhpAttributes = PhpAttributes(None, None)
val Empty: PhpAttributes = PhpAttributes(None, None, -1, -1)

def apply(json: Value): PhpAttributes = {
Try(json("attributes")) match {
case Success(Obj(attributes)) =>
val startLine = attributes.get("startLine").map(num => Integer.valueOf(num.num.toInt))
val kind = attributes.get("kind").map(_.num.toInt)
PhpAttributes(startLine, kind)
val startLine = attributes.get("startLine").map(num => Integer.valueOf(num.num.toInt))
val kind = attributes.get("kind").map(_.num.toInt)
val startFilePos = attributes.get("startFilePos").map(_.num.toInt).getOrElse(-1)
val endFilePos = attributes.get("endFilePos").map(_.num.toInt + 1).getOrElse(-1)
PhpAttributes(startLine, kind, startFilePos, endFilePos)

case Success(Arr(_)) =>
logger.debug(s"Found array attributes in $json")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import java.nio.file.Paths
import scala.io.Source
import scala.util.{Failure, Success, Try}

class PhpParser private (phpParserPath: String, phpIniPath: String) {
class PhpParser private (phpParserPath: String, phpIniPath: String, disableFileContent: Boolean) {

private val logger = LoggerFactory.getLogger(this.getClass)

Expand All @@ -19,14 +19,15 @@ class PhpParser private (phpParserPath: String, phpIniPath: String) {
s"php --php-ini $phpIniPath $phpParserPath $phpParserCommands $filename"
}

def parseFile(inputPath: String): Option[PhpFile] = {
def parseFile(inputPath: String): Option[(PhpFile, Option[String])] = {
val inputFile = File(inputPath)
val inputFilePath = inputFile.canonicalPath
val inputDirectory = inputFile.parent.canonicalPath
val command = phpParseCommand(inputFilePath)
ExternalCommand.run(command, inputDirectory) match {
case Success(output) =>
processParserOutput(output, inputFilePath)
val content = Option.unless(disableFileContent)(inputFile.contentAsString)
processParserOutput(output, inputFilePath).map((_, content))
case Failure(exception) =>
logger.error(s"Failure running php-parser with $command", exception.getMessage)
None
Expand Down Expand Up @@ -118,6 +119,6 @@ object PhpParser {
for (
phpParserPath <- maybePhpParserPath(config);
phpIniPath <- maybePhpIniPath(config)
) yield new PhpParser(phpParserPath, phpIniPath)
) yield new PhpParser(phpParserPath, phpIniPath, config.disableFileContent)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ class AstCreationPass(config: Config, cpg: Cpg, parser: PhpParser)(implicit with
File(config.inputPath).relativize(File(filename)).toString
}
parser.parseFile(filename) match {
case Some(parseResult) =>
diffGraph.absorb(new AstCreator(relativeFilename, parseResult)(config.schemaValidation).createAst())
case Some((parseResult, fileContent)) =>
diffGraph.absorb(
new AstCreator(relativeFilename, parseResult, fileContent, config.disableFileContent)(config.schemaValidation)
.createAst()
)

case None =>
logger.warn(s"Could not parse file $filename. Results will be missing!")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.joern.php2cpg.querying

import io.joern.php2cpg.Config
import io.joern.php2cpg.testfixtures.PhpCode2CpgFixture
import io.joern.x2cpg.Defines
import io.shiftleft.codepropertygraph.generated.{ModifierTypes, Operators}
Expand Down Expand Up @@ -107,15 +108,47 @@ class MethodTests extends PhpCode2CpgFixture {
cpg.method.nameExact("<global>").fullName.l shouldBe List("test.php:<global>")
}

"methods with non-unicode-legal characters should be created with escaped char codes" in {
"methods with non-unicode-legal characters" should {
val cpg = code("""<?php
|function foo() {
| $x = "\xFF";
|}
|""".stripMargin)
|""".stripMargin).withConfig(Config().withDisableFileContent(false))

cpg.file.method.name.toSet shouldBe Set("<global>", "foo")
cpg.assignment.code.l shouldBe List("$x = \"\\\\xFF\"")
"be created with escaped char codes" in {
cpg.file.method.name.toSet shouldBe Set("<global>", "foo")
cpg.assignment.code.l shouldBe List("$x = \"\\\\xFF\"")
}

"set the file content correctly" in {
inside(cpg.method.nameExact("foo").l) { case List(fooMethod) =>
val offsetStart = fooMethod.offset.get
val offsetEnd = fooMethod.offsetEnd.get
fooMethod.file.head.content.substring(offsetStart, offsetEnd) shouldBe
"""function foo() {
| $x = "\xFF";
|}""".stripMargin
}
}
}

"methods with unicode characters in source" should {
val cpg = code("""<?php
|function foo() {
| $x = "🙂";
|}
|""".stripMargin).withConfig(Config().withDisableFileContent(false))

"set the content field correctly" ignore {
inside(cpg.method.nameExact("foo").l) { case List(fooMethod) =>
val offsetStart = fooMethod.offset.get
val offsetEnd = fooMethod.offsetEnd.get
fooMethod.file.head.content.substring(offsetStart, offsetEnd) shouldBe
"""function foo() {
| $x = "🙂";
|}""".stripMargin
}
}
}

"explicit constructors" should {
Expand All @@ -126,7 +159,7 @@ class MethodTests extends PhpCode2CpgFixture {
|}
|""".stripMargin,
fileName = "foo.php"
)
).withConfig(Config().withDisableFileContent(false))

"have the constructor modifier set" in {
inside(cpg.method.nameExact("__construct").l) { case List(constructor) =>
Expand All @@ -142,6 +175,14 @@ class MethodTests extends PhpCode2CpgFixture {
constructor.file.name.l shouldBe List("foo.php")
}
}

"have the content offsets set correctly" in {
inside(cpg.method.nameExact("__construct").l) { case List(constructor) =>
val offsetStart = constructor.offset.get
val offsetEnd = constructor.offsetEnd.get
constructor.file.head.content.substring(offsetStart, offsetEnd) shouldBe "function __construct() {}"
}
}
}

"default constructors" should {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,52 +1,78 @@
package io.joern.php2cpg.querying

import io.joern.php2cpg.Config
import io.joern.php2cpg.testfixtures.PhpCode2CpgFixture
import io.joern.x2cpg.Defines
import io.shiftleft.codepropertygraph.generated.{ModifierTypes, Operators}
import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier, Literal, Local, Member, Method}
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.codepropertygraph.generated.nodes.Block
import io.shiftleft.codepropertygraph.generated.nodes.MethodRef
import io.shiftleft.codepropertygraph.generated.nodes.TypeRef

class TypeDeclTests extends PhpCode2CpgFixture {

"typedecl nodes for empty classes should have the correct basic properties set" in {
"typedecl nodes for empty classes" should {
val cpg = code("""<?php
|class A extends B implements C, D {}
|""".stripMargin)
|class A extends B implements C, D {}
|""".stripMargin).withConfig(Config().withDisableFileContent(false))

"have the correct basic properties set" in {
inside(cpg.typeDecl.nameExact("A").l) { case List(typeDecl) =>
typeDecl.fullName shouldBe "A"
typeDecl.lineNumber shouldBe Some(2)
typeDecl.code shouldBe "class A extends B implements C, D"
}
}

inside(cpg.typeDecl.nameExact("A").l) { case List(typeDecl) =>
typeDecl.fullName shouldBe "A"
typeDecl.lineNumber shouldBe Some(2)
typeDecl.code shouldBe "class A extends B implements C, D"
"have the content offsets set correctly" in {
inside(cpg.typeDecl.name("A").l) { case List(typeDecl) =>
val offsetStart = typeDecl.offset.get
val offsetEnd = typeDecl.offsetEnd.get
typeDecl.file.head.content.substring(offsetStart, offsetEnd) shouldBe "class A extends B implements C, D {}"
}
}
}

"class methods should be created correctly" in {
"class methods" should {
val cpg = code("""<?php
|class Foo {
| final public function foo(int $x): int {
| return 0;
| }
|}
|""".stripMargin)

inside(cpg.method.name("foo").l) { case List(fooMethod) =>
fooMethod.fullName shouldBe s"Foo->foo"
fooMethod.signature shouldBe s"${Defines.UnresolvedSignature}(1)"
fooMethod.modifier.map(_.modifierType).toSet shouldBe Set(ModifierTypes.FINAL, ModifierTypes.PUBLIC)
fooMethod.methodReturn.typeFullName shouldBe "int"
inside(fooMethod.parameter.l) { case List(thisParam, xParam) =>
thisParam.name shouldBe "this"
thisParam.code shouldBe "this"
thisParam.dynamicTypeHintFullName should contain("Foo")
thisParam.typeFullName shouldBe "Foo"
thisParam.index shouldBe 0

xParam.code shouldBe "$x"
xParam.typeFullName shouldBe "int"
xParam.index shouldBe 1
|class Foo {
| final public function foo(int $x): int {
| return 0;
| }
|}
|""".stripMargin).withConfig(Config().withDisableFileContent(false))

"be created correctly" in {
inside(cpg.method.name("foo").l) { case List(fooMethod) =>
fooMethod.fullName shouldBe s"Foo->foo"
fooMethod.signature shouldBe s"${Defines.UnresolvedSignature}(1)"
fooMethod.modifier.map(_.modifierType).toSet shouldBe Set(ModifierTypes.FINAL, ModifierTypes.PUBLIC)
fooMethod.methodReturn.typeFullName shouldBe "int"
inside(fooMethod.parameter.l) { case List(thisParam, xParam) =>
thisParam.name shouldBe "this"
thisParam.code shouldBe "this"
thisParam.dynamicTypeHintFullName should contain("Foo")
thisParam.typeFullName shouldBe "Foo"
thisParam.index shouldBe 0

xParam.code shouldBe "$x"
xParam.typeFullName shouldBe "int"
xParam.index shouldBe 1
}
}
}

"have the content offsets set correctly" in {
inside(cpg.typeDecl.name("Foo").l) { case List(typeDecl) =>
val offsetStart = typeDecl.offset.get
val offsetEnd = typeDecl.offsetEnd.get
typeDecl.file.head.content.substring(offsetStart, offsetEnd) shouldBe
"""class Foo {
| final public function foo(int $x): int {
| return 0;
| }
|}""".stripMargin
}
}
}
Expand Down

0 comments on commit 35cd824

Please sign in to comment.