Skip to content

Commit

Permalink
Jdk frontend (#1042)
Browse files Browse the repository at this point in the history
* trial

* added a check

* Update README.md

* Update README.md

* Update README.md

* add - handle more case

* skip already tagged api sinks

* add - api by inference rule

* fix - missing flow

* use kotlin and java language as one in flows filtering

* create new rule per ruleId only once

* minor refactor

* fix failing build

* fix

* added test case for retrofit

* add test cases for inference api
  • Loading branch information
khemrajrathore authored Apr 19, 2024
1 parent ff4fe73 commit bf592ab
Show file tree
Hide file tree
Showing 28 changed files with 939 additions and 140 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Privado Core
=============================================

Branch structure
Branch structure

main - This branch will contain the released version of the code.

Expand Down
104 changes: 104 additions & 0 deletions src/main/resources/ai/privado/rulevalidator/schema/inferences.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"definitions": {},
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://github.com/Privado-Inc/privado-core/tree/main/src/main/resources/ai/privado/rulevalidator/schema/inferences.json",
"title": "Root",
"type": "object",
"required": [
"inferences"
],
"additionalProperties": false,
"properties": {
"inferences": {
"$id": "#root/inferences",
"title": "Inferences",
"type": "array",
"default": [],
"items":{
"$id": "#root/inferences/items",
"title": "Items",
"type": "object",
"required": [
"id",
"name",
"domains",
"patterns"
],
"additionalProperties": false,
"properties": {
"id": {
"$id": "#root/inferences/items/id",
"title": "Id",
"type": "string",
"default": "",
"examples": [
"Storages.AmazonS3.Read"
],
"pattern": "^.*$"
},
"name": {
"$id": "#root/inferences/items/name",
"title": "Name",
"type": "string",
"default": "",
"examples": [
"Amazon S3(Read)"
],
"pattern": "^.*$"
},
"filterProperty": {
"$id": "#root/inferences/items/filterProperty",
"title": "FilterProperty",
"type": "string",
"default": "method_full_name",
"examples": [
"code",
"method_full_name"
],
"pattern": "^(code|method_full_name|method_full_name_with_literal|method_full_name_with_property_name)$"
},
"domains": {
"$id": "#root/inferences/items/domains",
"title": "Domains",
"type": "array",
"default": [],
"items":{
"$id": "#root/inferences/items/domains/items",
"title": "Items",
"type": "string",
"default": "",
"examples": [
"aws.amazon.com"
],
"pattern": "^.*$"
}
},
"patterns": {
"$id": "#root/inferences/items/patterns",
"title": "Patterns",
"type": "array",
"default": [],
"items":{
"$id": "#root/inferences/items/patterns/items",
"title": "Items",
"type": "string",
"format": "regex",
"default": "",
"examples": [
".*(AmazonS3).*"
],
"pattern": "^.*$"
}
},
"tags": {
"$id": "#root/inferences/items/tags",
"title": "Tags",
"type": ["object", "null"],
"default": null
}
}
}

}
}
}
21 changes: 13 additions & 8 deletions src/main/scala/ai/privado/cache/RuleCache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,30 @@ class RuleCache {
val internalPolicies = mutable.Set[String]()
private val storageRuleInfo = mutable.ListBuffer[RuleInfo]()

def setRule(rule: ConfigAndRules): Unit = {
// TODO, rename setRule to withRule as it return the ruleCache object and setters are Unit functions
def setRule(rule: ConfigAndRules): RuleCache = {
this.rule = rule
rule.sources.foreach(r => ruleInfoMap.addOne(r.id -> r))
rule.sinks.foreach(r => ruleInfoMap.addOne(r.id -> r))
rule.collections.foreach(r => ruleInfoMap.addOne(r.id -> r))
rule.policies.foreach(r => policyOrThreatMap.addOne(r.id -> r))
rule.threats.foreach(r => policyOrThreatMap.addOne(r.id -> r))
this
}

def getRule: ConfigAndRules = rule

def setRuleInfo(ruleInfo: RuleInfo): Unit = {
ruleInfoMap.addOne(ruleInfo.id -> ruleInfo)
rule = ruleInfo.catLevelOne match {
case ai.privado.model.CatLevelOne.SOURCES => rule.copy(sources = rule.sources.appended(ruleInfo))
case ai.privado.model.CatLevelOne.SINKS => rule.copy(sinks = rule.sinks.appended(ruleInfo))
case ai.privado.model.CatLevelOne.COLLECTIONS => rule.copy(collections = rule.collections.appended(ruleInfo))
case _ => rule
}
ruleInfoMap.get(ruleInfo.id) match
case Some(_) => // Rule already exists, skip adding again
case None =>
ruleInfoMap.addOne(ruleInfo.id -> ruleInfo)
rule = ruleInfo.catLevelOne match {
case ai.privado.model.CatLevelOne.SOURCES => rule.copy(sources = rule.sources.appended(ruleInfo))
case ai.privado.model.CatLevelOne.SINKS => rule.copy(sinks = rule.sinks.appended(ruleInfo))
case ai.privado.model.CatLevelOne.COLLECTIONS => rule.copy(collections = rule.collections.appended(ruleInfo))
case _ => rule
}
}

def addStorageRuleInfo(ruleInfo: RuleInfo): Unit = storageRuleInfo.addOne(ruleInfo)
Expand Down
5 changes: 3 additions & 2 deletions src/main/scala/ai/privado/dataflow/Dataflow.scala
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,11 @@ class Dataflow(cpg: Cpg) {
println(s"${Calendar.getInstance().getTime} - --Filtering flows 1 invoked...")
appCache.totalFlowFromReachableBy = dataflowPathsUnfiltered.size

// Apply `this` filtering for JS & JAVA also
// Apply `this` filtering for JS, JAVA
val dataflowPaths = {
if (
privadoScanConfig.disableThisFiltering || (appCache.repoLanguage != Language.JAVA && appCache.repoLanguage != Language.JAVASCRIPT)
privadoScanConfig.disableThisFiltering || (!List(Language.JAVA, Language.JAVASCRIPT)
.contains(appCache.repoLanguage))
)
dataflowPathsUnfiltered
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ object DuplicateFlowProcessor {
auditCache.addIntoBeforeSecondFiltering(SourcePathInfo(flow.pathSourceId, flow.sinkId, flow.sinkPathId))
}
if (
privadoScanConfig.disableFlowSeparationByDataElement || (appCache.repoLanguage != Language.JAVA && appCache.repoLanguage != Language.JAVASCRIPT)
privadoScanConfig.disableFlowSeparationByDataElement || (!List(Language.JAVA, Language.JAVASCRIPT)
.contains(appCache.repoLanguage))
) {
// Filter out flows where source is cookie and sink is cookie read
if (
Expand Down
60 changes: 48 additions & 12 deletions src/main/scala/ai/privado/entrypoint/ScanProcessor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import io.joern.console.cpgcreation.guessLanguage
import io.shiftleft.codepropertygraph.generated.Languages
import org.slf4j.LoggerFactory
import ai.privado.languageEngine.csharp.processor.CSharpProcessor
import io.joern.x2cpg.SourceFiles

import java.util.Calendar
import scala.collection.parallel.CollectionConverters.ImmutableIterableIsParallelizable
Expand Down Expand Up @@ -199,6 +200,19 @@ object ScanProcessor extends CommandProcessor {
language = Language.withNameWithDefault(pathTree.last)
)
)
.filter(filterByLang),
inferences = configAndRules.inferences
.filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath))
.map(x =>
x.copy(
file = fullPath,
catLevelOne = CatLevelOne.INFERENCES,
catLevelTwo = pathTree.apply(2),
categoryTree = pathTree,
language = Language.withNameWithDefault(pathTree.last),
nodeType = NodeType.withNameWithDefault(pathTree.apply(3))
)
)
.filter(filterByLang)
)
case Left(error) =>
Expand All @@ -223,7 +237,8 @@ object ScanProcessor extends CommandProcessor {
semantics = a.semantics ++ b.semantics,
sinkSkipList = a.sinkSkipList ++ b.sinkSkipList,
systemConfig = a.systemConfig ++ b.systemConfig,
auditConfig = a.auditConfig ++ b.auditConfig
auditConfig = a.auditConfig ++ b.auditConfig,
inferences = a.inferences ++ b.inferences
)
)
catch {
Expand Down Expand Up @@ -281,6 +296,7 @@ object ScanProcessor extends CommandProcessor {
val sinkSkipList = externalConfigAndRules.sinkSkipList ++ internalConfigAndRules.sinkSkipList
val systemConfig = externalConfigAndRules.systemConfig ++ internalConfigAndRules.systemConfig
val auditConfig = externalConfigAndRules.auditConfig ++ internalConfigAndRules.auditConfig
val inferences = externalConfigAndRules.inferences ++ internalConfigAndRules.inferences
val mergedRules =
ConfigAndRules(
sources = mergePatterns(sources),
Expand All @@ -292,7 +308,8 @@ object ScanProcessor extends CommandProcessor {
semantics = semantics.distinctBy(_.signature),
sinkSkipList = sinkSkipList.distinctBy(_.id),
systemConfig = systemConfig,
auditConfig = auditConfig.distinctBy(_.id)
auditConfig = auditConfig.distinctBy(_.id),
inferences = mergePatterns(inferences)
)
logger.trace(mergedRules.toString)
println(s"${Calendar.getInstance().getTime} - Configuration parsed...")
Expand All @@ -306,7 +323,8 @@ object ScanProcessor extends CommandProcessor {
mergedRules.collections.size +
mergedRules.policies.size +
mergedRules.exclusions.size +
mergedRules.auditConfig.size
mergedRules.auditConfig.size +
mergedRules.inferences.size
)
}

Expand Down Expand Up @@ -366,16 +384,34 @@ object ScanProcessor extends CommandProcessor {
lang match {
case language if language == Languages.JAVASRC || language == Languages.JAVA =>
println(s"${Calendar.getInstance().getTime} - Detected language 'Java'")
new JavaProcessor(
getProcessedRule(Set(Language.JAVA), appCache),
this.config,
val kotlinPlusJavaRules = getProcessedRule(Set(Language.KOTLIN, Language.JAVA), appCache)
val filesWithKtExtension = SourceFiles.determine(
sourceRepoLocation,
dataFlowCache = getDataflowCache,
auditCache,
s3DatabaseDetailsCache,
appCache,
propertyFilterCache = propertyFilterCache
).processCpg()
Set(".kt"),
ignoredFilesRegex = Option(kotlinPlusJavaRules.getExclusionRegex.r)
)
if (filesWithKtExtension.isEmpty)
new JavaProcessor(
getProcessedRule(Set(Language.JAVA), appCache),
this.config,
sourceRepoLocation,
dataFlowCache = getDataflowCache,
auditCache,
s3DatabaseDetailsCache,
appCache,
propertyFilterCache = propertyFilterCache
).processCpg()
else
new KotlinProcessor(
kotlinPlusJavaRules,
this.config,
sourceRepoLocation,
dataFlowCache = getDataflowCache,
auditCache,
s3DatabaseDetailsCache,
appCache,
propertyFilterCache = propertyFilterCache
).processCpg()
case language if language == Languages.JSSRC =>
println(s"${Calendar.getInstance().getTime} - Detected language 'JavaScript'")
JavascriptProcessor.createJavaScriptCpg(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import overflowdb.traversal.Traversal
import io.shiftleft.semanticcpg.language.*
import ai.privado.languageEngine.go.tagger.source.IdentifierTagger
import ai.privado.languageEngine.go.tagger.config.GoDBConfigTagger
import ai.privado.languageEngine.go.tagger.sink.GoAPITagger
import ai.privado.languageEngine.go.tagger.sink.{GoAPISinkTagger, GoAPITagger}
import ai.privado.tagger.sink.RegularSinkTagger
import ai.privado.utility.Utilities.ingressUrls

Expand All @@ -37,7 +37,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {

new GoDBConfigTagger(cpg).createAndApply()

new GoAPITagger(cpg, ruleCache, privadoInput = privadoInputConfig, appCache = appCache).createAndApply()
GoAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache)

new RegularSinkTagger(cpg, ruleCache).createAndApply()

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package ai.privado.languageEngine.go.tagger.sink

import ai.privado.cache.{AppCache, RuleCache}
import ai.privado.entrypoint.PrivadoInput
import ai.privado.tagger.sink.api.APISinkTagger
import io.shiftleft.codepropertygraph.generated.Cpg

object GoAPISinkTagger extends APISinkTagger {

override def applyTagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache): Unit = {

super.applyTagger(cpg, ruleCache, privadoInput, appCache)

new GoAPITagger(cpg, ruleCache, privadoInput, appCache).createAndApply()
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ import ai.privado.languageEngine.java.tagger.collection.{
SOAPCollectionTagger
}
import ai.privado.languageEngine.java.tagger.config.JavaDBConfigTagger
import ai.privado.languageEngine.java.tagger.sink.api.JavaAPISinkTagger
import ai.privado.languageEngine.java.tagger.sink.{InheritMethodTagger, JavaAPITagger, MessagingConsumerCustomTagger}
import ai.privado.languageEngine.java.tagger.sink.api.{JavaAPISinkTagger, JavaAPITagger}
import ai.privado.languageEngine.java.tagger.sink.{InheritMethodTagger, MessagingConsumerCustomTagger}
import ai.privado.languageEngine.java.tagger.source.*
import ai.privado.tagger.PrivadoBaseTagger
import ai.privado.tagger.collection.{AndroidCollectionTagger, WebFormsCollectionTagger}
Expand Down Expand Up @@ -83,9 +83,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {

new JavaS3Tagger(cpg, s3DatabaseDetailsCache).createAndApply()

JavaAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig)
JavaAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache)

new JavaAPITagger(cpg, ruleCache, privadoInputConfig, appCache).createAndApply()
// Custom Rule tagging
if (!privadoInputConfig.ignoreInternalRules) {
// Adding custom rule to cache
Expand Down
Loading

0 comments on commit bf592ab

Please sign in to comment.