diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala new file mode 100644 index 000000000000..7f8fe5ba1ee5 --- /dev/null +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala @@ -0,0 +1,318 @@ +package io.joern.x2cpg.datastructures + +import scala.collection.mutable +import io.shiftleft.codepropertygraph.generated.nodes.DeclarationNew + +/** A hierarchical data-structure that stores the result of types and their respective members. These types can be + * sourced from pre-parsing the application, or pre-computed stubs of common libraries. + * + * The utility of this object is in assisting resolving shorthand types during AST creation. + * + * @tparam M + * the method/function meta data class. + * @tparam F + * the field/object property meta data class. + * @tparam T + * the type/class meta data class. + */ +trait ProgramSummary[T <: TypeLike[_, _]] { + + /** A mapping between a namespace/directory and the containing types. + */ + protected val namespaceToType: Map[String, Set[T]] + + /** For the given namespace, returns the declared types. + */ + def typesUnderNamespace(namespace: String): Set[T] = namespaceToType.getOrElse(namespace, Set.empty) + + /** For a type, will search for the associated namespace. + */ + def namespaceFor(clazz: T): Option[String] = namespaceToType.find { case (_, v) => v.contains(clazz) }.map(_._1) + + /** @param typeName + * the type name or full name. Can be partially qualified. + * @return + * the set of matching types' meta data. + */ + def matchingTypes(typeName: String): List[T] = { + namespaceToType.values.flatten.filter(_.name.endsWith(typeName)).toList + } + +} + +/** Extends the capability of the scope object to track types in scope as provide type resolution. + * + * @tparam M + * the method/function meta data class. + * @tparam F + * the field/object property meta data class. + * @tparam T + * the type/class meta data class. + * @tparam S + * the scope type. + */ +trait TypedScope[M <: MethodLike, F <: FieldLike, T <: TypeLike[M, F], S <: TypedScopeElement]( + summary: ProgramSummary[T] +) { this: Scope[_, _, S] => + + /** Tracks the types that are visible to this scope. + */ + protected val typesInScope = mutable.Set.empty[T] + + /** Tracks the members visible to this scope. In languages like JavaScript or Python, where members can be directly + * imported and accessed without an explicit base, they are kept here. + */ + protected val membersInScope = mutable.Set.empty[MemberLike] + + /** Tracks any types or modules imported under alternative names to their type full names. + */ + protected val aliasedTypes = mutable.HashMap.empty[String, String] + + /** Given a type name or alias, attempts to resolve its full name using the types currently in scope. + * + * @param typeName + * the shorthand name. + * @return + * the type meta-data if found. + */ + def tryResolveTypeReference(typeName: String): Option[T] = { + typesInScope + .collectFirst { + case typ if typ.name.endsWith(typeName) => typ + case typ if aliasedTypes.contains(typeName) && typ.name == aliasedTypes(typeName) => typ + } + .flatMap(typ => summary.namespaceFor(typ).map(namespace => typ)) + } + + /** Given the type full name and call name, will attempt to find the matching entry. + * + * @param typeFullName + * the base type full name. If none, will refer to loosely imported member or functions. + * @param callName + * the call name. + * @param argTypes + * the observed argument types. Only relevant for languages that implement overloading. + * @return + * the method meta data if found. + */ + def tryResolveMethodInvocation( + callName: String, + argTypes: List[String], + typeFullName: Option[String] = None + ): Option[M] = typeFullName match { + case None => + membersInScope.collectFirst { case m: MethodLike if m.name == callName => m.asInstanceOf[M] } + case Some(tfn) => + tryResolveTypeReference(tfn).flatMap { t => + t.methods.find { m => m.name == callName && isOverloadedBy(m, argTypes) } + } + } + + /** Determines if, by observing the given argument types, that the method's signature is a plausible match to the + * observed arguments. + * + * The default implementation only considers that the same number of arguments are added and does not account for + * variadic arguments nor polymorphism. + * + * @param method + * the method meta data. + * @param argTypes + * the observed arguments from the call-site. + * @return + * true if the method could be overloaded by a call with these argument types. + */ + protected def isOverloadedBy(method: M, argTypes: List[String]): Boolean = { + method.parameterTypes.size == argTypes.size + } + + /** Given the type full name and field name, will attempt to find the matching entry. + * @param typeFullName + * the base type full name. If none, will refer to loosely imported member or functions. + * @param fieldName + * the field/object property/module variable name. + * @return + * the field/object property/module variable's meta data. + */ + def tryResolveFieldAccess(fieldName: String, typeFullName: Option[String] = None): Option[F] = typeFullName match { + case None => membersInScope.collectFirst { case f: FieldLike if f.name == fieldName => f.asInstanceOf[F] } + case Some(tfn) => + tryResolveTypeReference(tfn).flatMap { t => + t.fields.find { f => f.name == fieldName } + } + } + + /** Appends known types imported into the scope. + * @param namespace + * the fully qualified imported namespace. + */ + def addImportedNamespace(namespace: String): Unit = { + val knownTypesFromNamespace = summary.typesUnderNamespace(namespace) + typesInScope.addAll(knownTypesFromNamespace) + } + + /** Appends known types imported into the scope. + * @param typeOrModule + * the type name or full name. + */ + def addImportedTypeOrModule(typeOrModule: String): Unit = { + val matchingTypes = summary.matchingTypes(typeOrModule) + typesInScope.addAll(matchingTypes) + + } + + /** Appends known members to the scope. + * @param typeOrModule + * the type name or full name. + * @param memberNames + * the names of the members, or, if empty, imports all members from the type. + */ + def addImportedMember(typeOrModule: String, memberNames: String*): Unit = { + val matchingTypes = summary.matchingTypes(typeOrModule) + val matchingMembers = matchingTypes.flatMap(t => t.fields ++ t.methods) + memberNames match { + case Nil => membersInScope.addAll(matchingMembers) + case names => + val nameSet = names.toSet // Cast to set for O(1) membership query + val filteredMembers = matchingMembers.filter(member => nameSet.contains(member.name)) + membersInScope.addAll(filteredMembers) + } + } + +} + +/** An implementation of combining the typed scoping structures to manage the available type information at namespace + * levels. + * + * @tparam M + * the method/function meta data class. + * @tparam F + * the field/object property meta data class. + * @tparam T + * the type/class meta data class. + * @param summary + * the program summary. + */ +class DefaultTypedScope[M <: MethodLike, F <: FieldLike, T <: TypeLike[M, F]](summary: ProgramSummary[T]) + extends Scope[String, DeclarationNew, TypedScopeElement] + with TypedScope[M, F, T, TypedScopeElement](summary) { + + /** Pops the scope, adding types from the scope if necessary. + */ + override def pushNewScope(scopeNode: TypedScopeElement): Unit = { + scopeNode match { + case n: NamespaceLikeScope => typesInScope.addAll(summary.typesUnderNamespace(n.fullName)) + case _ => + } + super.pushNewScope(scopeNode) + } + + /** Pops the scope, removing types from the scope if necessary. + */ + override def popScope(): Option[TypedScopeElement] = { + super.popScope().map { + case n: NamespaceLikeScope => + summary.typesUnderNamespace(n.fullName).foreach(typesInScope.remove) + n + case x => x + } + } + +} + +/* + Traits related to scoping classes + */ + +/** A scope element designed for the TypedScope. + */ +trait TypedScopeElement + +/** A namespace scope to synchronise types entering and exiting scopes. + */ +trait NamespaceLikeScope extends TypedScopeElement { + + /** @return + * the namespace full name. + */ + def fullName: String +} + +/* + Traits related to meta-data classes + */ + +/** A type declaration or module. Holds methods and field entities. + * + * @tparam M + * the method/function meta data class. + * @tparam F + * the field/object property meta data class. + */ +trait TypeLike[M <: MethodLike, F <: FieldLike] { + + /** @return + * the type full name. + */ + def name: String + + /** @return + * the methods declared directly under the type declaration. + */ + def methods: List[M] + + /** @return + * the fields/properties declared directly under the type declaration. + */ + def fields: List[F] + +} + +/** An entity that is a member to some type or module. + */ +trait MemberLike { + + /** @return + * the name of the member. + */ + def name: String +} + +/** A member that behaves like a field/property/module variabe. + */ +trait FieldLike extends MemberLike { + + /** @return + * the name of the field. + */ + def name: String + + /** @return + * the type declared (not necessarily resolved) + */ + def typeName: String +} + +/** A function or procedure. + */ +trait MethodLike extends MemberLike { + + /** @return + * the name of the method. + */ + def name: String + + /** Stores a tuple of the parameter name and type name. + * + * @return + * the names and type names of the parameters. + */ + def parameterTypes: List[(String, String)] + + /** Stores the return type name. + * + * @return + * the return type name. + */ + def returnType: String + +} diff --git a/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/datastructures/ProgramSummaryTests.scala b/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/datastructures/ProgramSummaryTests.scala new file mode 100644 index 000000000000..9f6e2b088e43 --- /dev/null +++ b/joern-cli/frontends/x2cpg/src/test/scala/io/joern/x2cpg/datastructures/ProgramSummaryTests.scala @@ -0,0 +1,118 @@ +package io.joern.x2cpg.datastructures + +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import io.shiftleft.codepropertygraph.generated.nodes.DeclarationNew +import scala.collection.mutable +import org.scalatest.Inside + +class ProgramSummaryTests extends AnyWordSpec with Matchers with Inside { + + "a typed program summary based off of Java" should { + + /* // Reference for the summary + + package io.joern; + + public class Foo { + int bar(int x, int y) ... + + int bar(int x) ... + } + + */ + + val mockTyp = Typ( + "io.joern.Foo", + List(Method("bar", List(("x", "int"), ("y", "int")), "int"), Method("bar", List(("x", "int")), "int")), + List.empty + ) + + val mockSummary = SummaryImpl(Map("io.joern" -> Set(mockTyp))) + + "provide the types within a given namespace" in { + inside(mockSummary.typesUnderNamespace("io.joern").toList) { + case typ :: Nil => + typ.name shouldBe "io.joern.Foo" + case Nil => + fail("Unable to resolve the types for the given namespace!") + case _ => fail("Unexpected number of types for the given namespace") + } + } + + "return the associated namespace given a type" in { + mockSummary.namespaceFor(mockTyp) match + case None => fail("Unable to resolve namespace!") + case Some(namespace) => namespace shouldBe "io.joern" + } + + "not be able to resolve any types with no entries" in { + val mockScope = DefaultTypedScope(summary = mockSummary) + mockScope.size shouldBe 0 + mockScope.tryResolveTypeReference("Foo").isDefined shouldBe false + } + + "successfully resolve a type once the namespace scope is pushed" in { + val mockScope = DefaultTypedScope(summary = mockSummary) + mockScope.pushNewScope(NamespaceScope("io.joern")) + mockScope.tryResolveTypeReference("Foo") match { + case None => fail("Unable to resolve type!") + case Some(typ) => typ.name shouldBe "io.joern.Foo" + } + } + + "unable to resolve a type once the namespace scope is pushed and popped off again" in { + val mockScope = DefaultTypedScope(summary = mockSummary) + mockScope.pushNewScope(NamespaceScope("io.joern")) + mockScope.popScope() + mockScope.tryResolveTypeReference("Foo") match { + case None => // correct behaviour + case Some(typ) => fail("Type should no longer be on the stack!") + } + } + + } + + "a typed program summary based off of Python" should { + + /* # Reference for the summary + + # foo.py: + + a = 2 + + */ + + val mockTyp = Typ("foo.py:", List.empty, List(Field("a", "__builtin.int"))) + + val mockSummary = SummaryImpl(Map("foo.py" -> Set(mockTyp))) + + "successfully resolve a module variable if its imported by a known module" in { + /* + from foo import * + + println(a) # We are looking for where `a` is coming from + */ + val mockScope = DefaultTypedScope(summary = mockSummary) + mockScope.addImportedMember("foo.py:") + mockScope.tryResolveFieldAccess("a") match { + case None => fail("Unable to resolve type!") + case Some(f) => f.name shouldBe "a" + } + } + + } + + class SummaryImpl(initMap: Map[String, Set[Typ]]) extends ProgramSummary[Typ] { + override protected val namespaceToType = Map.from(initMap) + } + + case class NamespaceScope(fullName: String) extends NamespaceLikeScope + + case class Method(name: String, parameterTypes: List[(String, String)], returnType: String) extends MethodLike + + case class Field(name: String, typeName: String) extends FieldLike + + case class Typ(name: String, methods: List[Method], fields: List[Field]) extends TypeLike[Method, Field] + +}