From 078d1c56168166a7682b1514962f20d99aba85d0 Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Wed, 7 Feb 2024 12:53:19 +0200 Subject: [PATCH] [x2cpg] Program Summary & Typed Scope (#4131) A few frontends each face the problems of: * What types are available at this point in the program? How do I resolve a type alias to its full name? * How do I deal with tracking possible method calls? * How do I deal with conflicts if more than one type/method share the same name? * Do I need to download my dependencies to provide the type meta-data? This PR standardises the various structures found across frontends that try to deal with this by having a program summary and modified scope object. The `ProgramSummary` gives enough bare-bones structure to describe the meta-data of high level interprocedurally scoped objects. The idea is that this is populated via a pre-parse before the `AstCreator` runs, and can be populated with pre-computed type stubs. The `TypedScope` trait allows one to keep track of which types are currently available at the current point of the program to help manage type references and aliases. See the test for an illustration of this scoped type look-up behaviour. The `TypeMap` and `CSharpScope` structures were pilot studies into this strategy and have proven effective. This structure tries to be fairly broad and flexible, and will be back ported into CSharpSrc and new implemented in the new Ruby frontend (useful for parenthesisless calls of methods imported into the scope). --- .../x2cpg/datastructures/ProgramSummary.scala | 318 ++++++++++++++++++ .../datastructures/ProgramSummaryTests.scala | 118 +++++++ 2 files changed, 436 insertions(+) create mode 100644 joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala create mode 100644 joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/datastructures/ProgramSummaryTests.scala diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala new file mode 100644 index 000000000000..7f8fe5ba1ee5 --- /dev/null +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala @@ -0,0 +1,318 @@ +package io.joern.x2cpg.datastructures + +import scala.collection.mutable +import io.shiftleft.codepropertygraph.generated.nodes.DeclarationNew + +/** A hierarchical data-structure that stores the result of types and their respective members. These types can be + * sourced from pre-parsing the application, or pre-computed stubs of common libraries. + * + * The utility of this object is in assisting resolving shorthand types during AST creation. + * + * @tparam M + * the method/function meta data class. + * @tparam F + * the field/object property meta data class. + * @tparam T + * the type/class meta data class. + */ +trait ProgramSummary[T <: TypeLike[_, _]] { + + /** A mapping between a namespace/directory and the containing types. + */ + protected val namespaceToType: Map[String, Set[T]] + + /** For the given namespace, returns the declared types. + */ + def typesUnderNamespace(namespace: String): Set[T] = namespaceToType.getOrElse(namespace, Set.empty) + + /** For a type, will search for the associated namespace. + */ + def namespaceFor(clazz: T): Option[String] = namespaceToType.find { case (_, v) => v.contains(clazz) }.map(_._1) + + /** @param typeName + * the type name or full name. Can be partially qualified. + * @return + * the set of matching types' meta data. + */ + def matchingTypes(typeName: String): List[T] = { + namespaceToType.values.flatten.filter(_.name.endsWith(typeName)).toList + } + +} + +/** Extends the capability of the scope object to track types in scope as provide type resolution. + * + * @tparam M + * the method/function meta data class. + * @tparam F + * the field/object property meta data class. + * @tparam T + * the type/class meta data class. + * @tparam S + * the scope type. + */ +trait TypedScope[M <: MethodLike, F <: FieldLike, T <: TypeLike[M, F], S <: TypedScopeElement]( + summary: ProgramSummary[T] +) { this: Scope[_, _, S] => + + /** Tracks the types that are visible to this scope. + */ + protected val typesInScope = mutable.Set.empty[T] + + /** Tracks the members visible to this scope. In languages like JavaScript or Python, where members can be directly + * imported and accessed without an explicit base, they are kept here. + */ + protected val membersInScope = mutable.Set.empty[MemberLike] + + /** Tracks any types or modules imported under alternative names to their type full names. + */ + protected val aliasedTypes = mutable.HashMap.empty[String, String] + + /** Given a type name or alias, attempts to resolve its full name using the types currently in scope. + * + * @param typeName + * the shorthand name. + * @return + * the type meta-data if found. + */ + def tryResolveTypeReference(typeName: String): Option[T] = { + typesInScope + .collectFirst { + case typ if typ.name.endsWith(typeName) => typ + case typ if aliasedTypes.contains(typeName) && typ.name == aliasedTypes(typeName) => typ + } + .flatMap(typ => summary.namespaceFor(typ).map(namespace => typ)) + } + + /** Given the type full name and call name, will attempt to find the matching entry. + * + * @param typeFullName + * the base type full name. If none, will refer to loosely imported member or functions. + * @param callName + * the call name. + * @param argTypes + * the observed argument types. Only relevant for languages that implement overloading. + * @return + * the method meta data if found. + */ + def tryResolveMethodInvocation( + callName: String, + argTypes: List[String], + typeFullName: Option[String] = None + ): Option[M] = typeFullName match { + case None => + membersInScope.collectFirst { case m: MethodLike if m.name == callName => m.asInstanceOf[M] } + case Some(tfn) => + tryResolveTypeReference(tfn).flatMap { t => + t.methods.find { m => m.name == callName && isOverloadedBy(m, argTypes) } + } + } + + /** Determines if, by observing the given argument types, that the method's signature is a plausible match to the + * observed arguments. + * + * The default implementation only considers that the same number of arguments are added and does not account for + * variadic arguments nor polymorphism. + * + * @param method + * the method meta data. + * @param argTypes + * the observed arguments from the call-site. + * @return + * true if the method could be overloaded by a call with these argument types. + */ + protected def isOverloadedBy(method: M, argTypes: List[String]): Boolean = { + method.parameterTypes.size == argTypes.size + } + + /** Given the type full name and field name, will attempt to find the matching entry. + * @param typeFullName + * the base type full name. If none, will refer to loosely imported member or functions. + * @param fieldName + * the field/object property/module variable name. + * @return + * the field/object property/module variable's meta data. + */ + def tryResolveFieldAccess(fieldName: String, typeFullName: Option[String] = None): Option[F] = typeFullName match { + case None => membersInScope.collectFirst { case f: FieldLike if f.name == fieldName => f.asInstanceOf[F] } + case Some(tfn) => + tryResolveTypeReference(tfn).flatMap { t => + t.fields.find { f => f.name == fieldName } + } + } + + /** Appends known types imported into the scope. + * @param namespace + * the fully qualified imported namespace. + */ + def addImportedNamespace(namespace: String): Unit = { + val knownTypesFromNamespace = summary.typesUnderNamespace(namespace) + typesInScope.addAll(knownTypesFromNamespace) + } + + /** Appends known types imported into the scope. + * @param typeOrModule + * the type name or full name. + */ + def addImportedTypeOrModule(typeOrModule: String): Unit = { + val matchingTypes = summary.matchingTypes(typeOrModule) + typesInScope.addAll(matchingTypes) + + } + + /** Appends known members to the scope. + * @param typeOrModule + * the type name or full name. + * @param memberNames + * the names of the members, or, if empty, imports all members from the type. + */ + def addImportedMember(typeOrModule: String, memberNames: String*): Unit = { + val matchingTypes = summary.matchingTypes(typeOrModule) + val matchingMembers = matchingTypes.flatMap(t => t.fields ++ t.methods) + memberNames match { + case Nil => membersInScope.addAll(matchingMembers) + case names => + val nameSet = names.toSet // Cast to set for O(1) membership query + val filteredMembers = matchingMembers.filter(member => nameSet.contains(member.name)) + membersInScope.addAll(filteredMembers) + } + } + +} + +/** An implementation of combining the typed scoping structures to manage the available type information at namespace + * levels. + * + * @tparam M + * the method/function meta data class. + * @tparam F + * the field/object property meta data class. + * @tparam T + * the type/class meta data class. + * @param summary + * the program summary. + */ +class DefaultTypedScope[M <: MethodLike, F <: FieldLike, T <: TypeLike[M, F]](summary: ProgramSummary[T]) + extends Scope[String, DeclarationNew, TypedScopeElement] + with TypedScope[M, F, T, TypedScopeElement](summary) { + + /** Pops the scope, adding types from the scope if necessary. + */ + override def pushNewScope(scopeNode: TypedScopeElement): Unit = { + scopeNode match { + case n: NamespaceLikeScope => typesInScope.addAll(summary.typesUnderNamespace(n.fullName)) + case _ => + } + super.pushNewScope(scopeNode) + } + + /** Pops the scope, removing types from the scope if necessary. + */ + override def popScope(): Option[TypedScopeElement] = { + super.popScope().map { + case n: NamespaceLikeScope => + summary.typesUnderNamespace(n.fullName).foreach(typesInScope.remove) + n + case x => x + } + } + +} + +/* + Traits related to scoping classes + */ + +/** A scope element designed for the TypedScope. + */ +trait TypedScopeElement + +/** A namespace scope to synchronise types entering and exiting scopes. + */ +trait NamespaceLikeScope extends TypedScopeElement { + + /** @return + * the namespace full name. + */ + def fullName: String +} + +/* + Traits related to meta-data classes + */ + +/** A type declaration or module. Holds methods and field entities. + * + * @tparam M + * the method/function meta data class. + * @tparam F + * the field/object property meta data class. + */ +trait TypeLike[M <: MethodLike, F <: FieldLike] { + + /** @return + * the type full name. + */ + def name: String + + /** @return + * the methods declared directly under the type declaration. + */ + def methods: List[M] + + /** @return + * the fields/properties declared directly under the type declaration. + */ + def fields: List[F] + +} + +/** An entity that is a member to some type or module. + */ +trait MemberLike { + + /** @return + * the name of the member. + */ + def name: String +} + +/** A member that behaves like a field/property/module variabe. + */ +trait FieldLike extends MemberLike { + + /** @return + * the name of the field. + */ + def name: String + + /** @return + * the type declared (not necessarily resolved) + */ + def typeName: String +} + +/** A function or procedure. + */ +trait MethodLike extends MemberLike { + + /** @return + * the name of the method. + */ + def name: String + + /** Stores a tuple of the parameter name and type name. + * + * @return + * the names and type names of the parameters. + */ + def parameterTypes: List[(String, String)] + + /** Stores the return type name. + * + * @return + * the return type name. + */ + def returnType: String + +} diff --git a/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/datastructures/ProgramSummaryTests.scala b/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/datastructures/ProgramSummaryTests.scala new file mode 100644 index 000000000000..9f6e2b088e43 --- /dev/null +++ b/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/datastructures/ProgramSummaryTests.scala @@ -0,0 +1,118 @@ +package io.joern.x2cpg.datastructures + +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import io.shiftleft.codepropertygraph.generated.nodes.DeclarationNew +import scala.collection.mutable +import org.scalatest.Inside + +class ProgramSummaryTests extends AnyWordSpec with Matchers with Inside { + + "a typed program summary based off of Java" should { + + /* // Reference for the summary + + package io.joern; + + public class Foo { + int bar(int x, int y) ... + + int bar(int x) ... + } + + */ + + val mockTyp = Typ( + "io.joern.Foo", + List(Method("bar", List(("x", "int"), ("y", "int")), "int"), Method("bar", List(("x", "int")), "int")), + List.empty + ) + + val mockSummary = SummaryImpl(Map("io.joern" -> Set(mockTyp))) + + "provide the types within a given namespace" in { + inside(mockSummary.typesUnderNamespace("io.joern").toList) { + case typ :: Nil => + typ.name shouldBe "io.joern.Foo" + case Nil => + fail("Unable to resolve the types for the given namespace!") + case _ => fail("Unexpected number of types for the given namespace") + } + } + + "return the associated namespace given a type" in { + mockSummary.namespaceFor(mockTyp) match + case None => fail("Unable to resolve namespace!") + case Some(namespace) => namespace shouldBe "io.joern" + } + + "not be able to resolve any types with no entries" in { + val mockScope = DefaultTypedScope(summary = mockSummary) + mockScope.size shouldBe 0 + mockScope.tryResolveTypeReference("Foo").isDefined shouldBe false + } + + "successfully resolve a type once the namespace scope is pushed" in { + val mockScope = DefaultTypedScope(summary = mockSummary) + mockScope.pushNewScope(NamespaceScope("io.joern")) + mockScope.tryResolveTypeReference("Foo") match { + case None => fail("Unable to resolve type!") + case Some(typ) => typ.name shouldBe "io.joern.Foo" + } + } + + "unable to resolve a type once the namespace scope is pushed and popped off again" in { + val mockScope = DefaultTypedScope(summary = mockSummary) + mockScope.pushNewScope(NamespaceScope("io.joern")) + mockScope.popScope() + mockScope.tryResolveTypeReference("Foo") match { + case None => // correct behaviour + case Some(typ) => fail("Type should no longer be on the stack!") + } + } + + } + + "a typed program summary based off of Python" should { + + /* # Reference for the summary + + # foo.py: + + a = 2 + + */ + + val mockTyp = Typ("foo.py:", List.empty, List(Field("a", "__builtin.int"))) + + val mockSummary = SummaryImpl(Map("foo.py" -> Set(mockTyp))) + + "successfully resolve a module variable if its imported by a known module" in { + /* + from foo import * + + println(a) # We are looking for where `a` is coming from + */ + val mockScope = DefaultTypedScope(summary = mockSummary) + mockScope.addImportedMember("foo.py:") + mockScope.tryResolveFieldAccess("a") match { + case None => fail("Unable to resolve type!") + case Some(f) => f.name shouldBe "a" + } + } + + } + + class SummaryImpl(initMap: Map[String, Set[Typ]]) extends ProgramSummary[Typ] { + override protected val namespaceToType = Map.from(initMap) + } + + case class NamespaceScope(fullName: String) extends NamespaceLikeScope + + case class Method(name: String, parameterTypes: List[(String, String)], returnType: String) extends MethodLike + + case class Field(name: String, typeName: String) extends FieldLike + + case class Typ(name: String, methods: List[Method], fields: List[Field]) extends TypeLike[Method, Field] + +}