From add96fe7d3401f2e8075d018ffc1ed283d510dff Mon Sep 17 00:00:00 2001 From: Johannes Coetzee Date: Wed, 7 Feb 2024 17:47:25 +0100 Subject: [PATCH] [php2cpg] Fix orphan identifier issue (#4133) * Fix orphan identifiers in php2cpg --- .../php2cpg/astcreation/AstCreator.scala | 77 +++++++++---------- .../joern/php2cpg/querying/ClosureTests.scala | 40 +++++++++- .../querying/ControlStructureTests.scala | 30 ++++++++ .../joern/php2cpg/querying/MethodTests.scala | 27 ++++--- 4 files changed, 121 insertions(+), 53 deletions(-) diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala index f37568edd855..c507701dc5f0 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala @@ -542,7 +542,6 @@ class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidatio } private def astForForeachStmt(stmt: PhpForeachStmt): Ast = { - val iteratorAst = astForExpr(stmt.iterExpr) val iterIdentifier = getTmpIdentifier(stmt, maybeTypeFullName = None, prefix = "iter_") val assignItemTargetAst = stmt.keyVar match { @@ -592,7 +591,7 @@ class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidatio val bodyAst = stmtBodyBlockAst(stmt) val ampPrefix = if (stmt.assignByRef) "&" else "" - val foreachCode = s"foreach (${iteratorAst.rootCodeOrEmpty} as $ampPrefix${assignItemTargetAst.rootCodeOrEmpty})" + val foreachCode = s"foreach (${iterValue.rootCodeOrEmpty} as $ampPrefix${assignItemTargetAst.rootCodeOrEmpty})" val foreachNode = controlStructureNode(stmt, ControlStructureTypes.FOR, foreachCode) Ast(foreachNode) .withChild(wrapMultipleInBlock(iteratorAssignAst :: itemInitAst :: Nil, line(stmt))) @@ -664,29 +663,32 @@ class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidatio private def astsForStaticStmt(stmt: PhpStaticStmt): List[Ast] = { stmt.vars.flatMap { staticVarDecl => - val variableAst = astForVariableExpr(staticVarDecl.variable) - val maybeValueAst = staticVarDecl.defaultValue.map(astForExpr) + staticVarDecl.variable match { + case PhpVariable(PhpNameExpr(name, _), _) => + val maybeDefaultValueAst = staticVarDecl.defaultValue.map(astForExpr) - val code = variableAst.rootCode.getOrElse(NameConstants.Unknown) - val name = variableAst.root match { - case Some(identifier: NewIdentifier) => identifier.name - case _ => code - } + val code = s"static $$$name" + val typeFullName = maybeDefaultValueAst.flatMap(_.rootType).getOrElse(TypeConstants.Any) - val local = localNode(stmt, name, s"static $code", variableAst.rootType.getOrElse(TypeConstants.Any)) - scope.addToScope(local.name, local) + val local = localNode(stmt, name, code, maybeDefaultValueAst.flatMap(_.rootType).getOrElse(TypeConstants.Any)) + scope.addToScope(local.name, local) - variableAst.root.collect { case identifier: NewIdentifier => - diffGraph.addEdge(identifier, local, EdgeTypes.REF) - } + val assignmentAst = maybeDefaultValueAst.map { defaultValue => + val variableNode = identifierNode(stmt, name, s"$$$name", typeFullName) + val variableAst = Ast(variableNode).withRefEdge(variableNode, local) - val defaultAssignAst = maybeValueAst.map { valueAst => - val valueCode = s"static $code = ${valueAst.rootCodeOrEmpty}" - val assignNode = newOperatorCallNode(Operators.assignment, valueCode, line = line(stmt)) - callAst(assignNode, variableAst :: valueAst :: Nil) - } + val assignCode = s"$code = ${defaultValue.rootCodeOrEmpty}" + val assignNode = newOperatorCallNode(Operators.assignment, assignCode, line = line(stmt)) + + callAst(assignNode, variableAst :: defaultValue :: Nil) + } - Ast(local) :: defaultAssignAst.toList + Ast(local) :: assignmentAst.toList + + case other => + logger.warn(s"Unexpected static variable type ${other} in $filename") + Nil + } } } @@ -1027,11 +1029,9 @@ class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidatio private def astForNameExpr(expr: PhpNameExpr): Ast = { val identifier = identifierNode(expr, expr.name, expr.name, TypeConstants.Any) - scope.lookupVariable(identifier.name).foreach { declaringNode => - diffGraph.addEdge(identifier, declaringNode, EdgeTypes.REF) - } + val declaringNode = scope.lookupVariable(identifier.name) - Ast(identifier) + Ast(identifier).withRefEdges(identifier, declaringNode.toList) } /** This is used to rewrite the short form $xs[] = as array_push($xs, ) to avoid having to @@ -1383,23 +1383,18 @@ class AstCreator(filename: String, phpAst: PhpFile)(implicit withSchemaValidatio val methodRef = methodRefNode(closureExpr, methodName, methodName, TypeConstants.Any) val localsForUses = closureExpr.uses.flatMap { closureUse => - val variableAst = astForExpr(closureUse.variable) - val codePref = if (closureUse.byRef) "&" else "" - - variableAst.root match { - case Some(identifier: NewIdentifier) => - // This is the expected case and is handled well - Some(localNode(closureExpr, identifier.name, codePref ++ identifier.code, TypeConstants.Any)) - case Some(expr: ExpressionNew) => - // Results here may be bad, but its' the best we're likely to do - Some(localNode(closureExpr, expr.code, codePref ++ expr.code, TypeConstants.Any)) - case Some(other) => - // This should never happen - logger.warn(s"Found ast '$other' for closure use in $filename") - None - case None => - // This should never happen - logger.warn(s"Found empty ast for closure use in $filename") + closureUse.variable match { + case PhpVariable(PhpNameExpr(name, _), _) => + val typeFullName = scope + .lookupVariable(name) + .flatMap(_.properties.get(PropertyNames.TYPE_FULL_NAME).map(_.toString)) + .getOrElse(TypeConstants.Any) + val byRefPrefix = if (closureUse.byRef) "&" else "" + + Some(localNode(closureExpr, name, s"$byRefPrefix$$$name", typeFullName)) + + case other => + logger.warn(s"Found incorrect closure use variable '$other' in $filename") None } } diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/querying/ClosureTests.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/querying/ClosureTests.scala index e962843bb059..c6e5d7726767 100644 --- a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/querying/ClosureTests.scala +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/querying/ClosureTests.scala @@ -3,7 +3,9 @@ package io.joern.php2cpg.querying import io.joern.php2cpg.testfixtures.PhpCode2CpgFixture import io.joern.x2cpg.Defines import io.shiftleft.codepropertygraph.generated.nodes.{Call, ClosureBinding, Identifier, Local, MethodRef, Return} -import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.* + +import scala.util.Try class ClosureTests extends PhpCode2CpgFixture { @@ -42,17 +44,35 @@ class ClosureTests extends PhpCode2CpgFixture { } } + "long-form closures using captured parameters" should { + val cpg = code(""" Try(node.astParent.toList).isFailure).toSet shouldBe Set.empty + } + } + "long-form closures with uses " should { val cpg = code( """ Try(node.astParent.toList).isFailure).toSet shouldBe Set.empty + } + "have the correct method AST" in { val closureMethod = inside(cpg.method.name(".*.*").l) { case List(closureMethod) => closureMethod @@ -82,7 +102,7 @@ class ClosureTests extends PhpCode2CpgFixture { use2.code shouldBe "&$use2" use2.closureBindingId shouldBe Some(s"foo.php:$expectedName:use2") - echoCall.code shouldBe "echo $value" + echoCall.code shouldBe "sink($value,$use1)" } } @@ -104,6 +124,20 @@ class ClosureTests extends PhpCode2CpgFixture { } } + "arrow functions with captures" should { + val cpg = code( + """ $value + $captured; + |""".stripMargin, + fileName = "foo.php" + ) + + "not leave orphan identifiers" in { + cpg.identifier.filter(node => Try(node.astParent.toList).isFailure).toList shouldBe Nil + } + } + "arrow functions should be represented as closures with return statements" should { val cpg = code( """ Try(node.astParent).isFailure).toList shouldBe Nil } + "foreach statements referencing parameters in methods should not create parentless identifiers" in { + val cpg = code(""" Try(node.astParent).isFailure).toList shouldBe Nil + } + + "foreach statements referencing regular parameters should not create parentless identifiers" in { + val cpg = code(""" Try(node.astParent).isFailure).toList shouldBe Nil + } + + "foreach statements referencing locals should not create parentless identifiers" in { + val cpg = code(""" Try(node.astParent).isFailure).toList shouldBe Nil + } + "foreach statements with only simple values should be represented as a for" in { val cpg = code(""" - xLocal.name shouldBe "x" - xLocal.code shouldBe "static $x" - xLocal.lineNumber shouldBe Some(3) + "not leave orphan identifiers" in { + cpg.identifier.filter(identifier => Try(identifier.astParent.isEmpty).getOrElse(true)).toList shouldBe Nil + } - yLocal.name shouldBe "y" - yLocal.code shouldBe "static $y" - yLocal.lineNumber shouldBe Some(3) + "be represented as the correct local nodes" in { + inside(cpg.method.name("foo").body.astChildren.l) { case List(xLocal: Local, yLocal: Local) => + xLocal.name shouldBe "x" + xLocal.code shouldBe "static $x" + xLocal.lineNumber shouldBe Some(3) + + yLocal.name shouldBe "y" + yLocal.code shouldBe "static $y" + yLocal.lineNumber shouldBe Some(3) + } } + } "static variables with default values should have the correct initialisers" in {