diff --git a/analysis/annotations/annotations.go b/analysis/annotations/annotations.go index d3ad4eca..1ceb4ca7 100644 --- a/analysis/annotations/annotations.go +++ b/analysis/annotations/annotations.go @@ -17,7 +17,9 @@ package annotations import ( "fmt" "go/ast" + "go/token" "regexp" + "strconv" "strings" "github.com/awslabs/ar-go-tools/analysis/config" @@ -41,6 +43,8 @@ const ( Sanitizer // SetOptions is the kind of SetOptions(...) annotations SetOptions + // Ignore is an empty annotation for a line + Ignore ) // AnyTag is the special tag used to match any other tag @@ -98,6 +102,24 @@ func (a Annotation) IsMatchingAnnotation(kind AnnotationKind, tag string) bool { return a.Kind == kind && (tag == AnyTag || (len(a.Tags) > 0 && a.Tags[0] == AnyTag) || slices.Contains(a.Tags, tag)) } +// LinePos is a simple line-file position indicator. +type LinePos struct { + Line int + File string +} + +// NewLinePos returns a LinePos from a token position. The column and offset are abstracted away. +func NewLinePos(pos token.Position) LinePos { + return LinePos{ + Line: pos.Line, + File: pos.Filename, + } +} + +func (l LinePos) String() string { + return l.File + ":" + strconv.Itoa(l.Line) +} + // A FunctionAnnotation groups the annotations relative to a function into main annotations for the entire function // and parameter annotations for each parameter type FunctionAnnotation struct { @@ -152,6 +174,17 @@ type ProgramAnnotations struct { Consts map[*ssa.NamedConst][]Annotation // Globals is the map of global variable annotations (TODO: implementation) Globals map[*ssa.Global][]Annotation + // Positional is the map of line-file location to annotations that are not attached to a specific construct. + // There can be only one annotation per line. + Positional map[LinePos]Annotation +} + +// IsIgnoredPos returns true when the given position is on the same line as an //argot:ignore annotation. +func (pa ProgramAnnotations) IsIgnoredPos(pos token.Position, tag string) bool { + if posAnnot, hasPosAnnot := pa.Positional[NewLinePos(pos)]; hasPosAnnot { + return posAnnot.Kind == Ignore && (posAnnot.Tags[0] == tag || posAnnot.Tags[0] == AnyTag) + } + return false } // Count returns the total number of annotations in the program @@ -178,19 +211,20 @@ func (pa ProgramAnnotations) Iter(fx func(a Annotation)) { for _, cst := range pa.Globals { funcutil.Iter(cst, fx) } + for _, cst := range pa.Positional { + fx(cst) + } } // CompleteFromSyntax takes a set of program annotations and adds additional non-ssa linked annotations // to the annotations -func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs []*packages.Package) { - for _, pkg := range pkgs { - for _, astFile := range pkg.Syntax { - pa.loadPackageDocAnnotations(astFile.Doc) - for _, comments := range astFile.Comments { - for _, comment := range comments.List { - if annotationContents := extractAnnotation(comment); annotationContents != nil { - pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos()).String()) - } +func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkg *packages.Package) { + for _, astFile := range pkg.Syntax { + pa.loadPackageDocAnnotations(astFile.Doc) + for _, comments := range astFile.Comments { + for _, comment := range comments.List { + if annotationContents := extractAnnotation(comment); annotationContents != nil { + pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos())) } } } @@ -205,11 +239,12 @@ func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs [] // will also print warnings when some syntactic components of the comments look like they should be an annotation. func LoadAnnotations(logger *config.LogGroup, packages []*ssa.Package) (ProgramAnnotations, error) { annotations := ProgramAnnotations{ - Configs: map[string]map[string]string{}, - Funcs: map[*ssa.Function]FunctionAnnotation{}, - Types: map[*ssa.Type][]Annotation{}, - Consts: map[*ssa.NamedConst][]Annotation{}, - Globals: map[*ssa.Global][]Annotation{}, + Configs: map[string]map[string]string{}, + Funcs: map[*ssa.Function]FunctionAnnotation{}, + Types: map[*ssa.Type][]Annotation{}, + Consts: map[*ssa.NamedConst][]Annotation{}, + Globals: map[*ssa.Global][]Annotation{}, + Positional: map[LinePos]Annotation{}, } for _, pkg := range packages { for _, m := range pkg.Members { @@ -287,7 +322,7 @@ func (pa ProgramAnnotations) loadPackageDocAnnotations(doc *ast.CommentGroup) { // loadFileAnnotations loads the annotation that are not tied to a specific ssa node. This includes: // - config annotations // - positional annotations -func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position string) { +func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position token.Position) { if len(annotationContents) <= 1 { logger.Warnf("ignoring argot annotation with no arguments at %s", position) return @@ -295,13 +330,23 @@ func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annota switch annotationContents[0] { case ConfigTarget: pa.loadConfigTargetAnnotation(logger, annotationContents, position) + case IgnoreTarget: + pa.addIgnoreLineAnnotation(position, annotationContents) + } +} + +// addIgnoreLineAnnotation adds a Ignore annotation at the position's line +func (pa ProgramAnnotations) addIgnoreLineAnnotation(position token.Position, annotationContents []string) { + pa.Positional[NewLinePos(position)] = Annotation{ + Kind: Ignore, + Tags: []string{annotationContents[1]}, // only the tag is used } } // loadConfigTargetAnnotation loads a config annotation. Config annotations look like // "//argot:config tag SetOptions(option-name-1=value1,option-name-2=value2)" and are always linked to a specific problem // tag. -func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position string) { +func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position token.Position) { if len(annotationContents) <= 2 { logger.Warnf("argot:config expects a target tag and one or more SetOptions") logger.Warnf("a comment is likely missing something at %s", position) diff --git a/analysis/annotations/annotations_test.go b/analysis/annotations/annotations_test.go index b7173886..67651e0e 100644 --- a/analysis/annotations/annotations_test.go +++ b/analysis/annotations/annotations_test.go @@ -36,9 +36,35 @@ func TestLoadAnnotations(t *testing.T) { testProgram.Prog.Build() logger := config.NewLogGroup(testProgram.Config) a, err := annotations.LoadAnnotations(logger, testProgram.Prog.AllPackages()) + a.CompleteFromSyntax(logger, testProgram.Pkgs[0]) if err != nil { t.Errorf("error loading annotations: %s", err) } + + // Positional annotations like //argot:ignore + t.Logf("%d positional annotations", len(a.Positional)) + if !funcutil.ExistsInMap(a.Positional, func(k annotations.LinePos, _ annotations.Annotation) bool { + return k.Line == 43 + }) { + t.Errorf("Expected annotation at line 43.") + } + + for pos, annot := range a.Positional { + if pos.Line == 43 && (annot.Kind != annotations.Ignore || !funcutil.Contains(annot.Tags, "_")) { + t.Errorf("Expected annotation at line 43 to be //argot:ignore _ but its %v", annot) + } + } + + // Config annotations like //argot:config + t.Logf("%d config annotations", len(a.Configs)) + if _, hasOpt := a.Configs["_"]; !hasOpt { + t.Errorf("expected a config option for _") + } + if a.Configs["_"]["log-level"] != "3" { + t.Errorf("Expected config option log-level set to 3 by annotation.") + } + + // Function annotations like //argot:function and //argot:param t.Logf("%d functions scanned", len(a.Funcs)) for ssaFunc, functionAnnotation := range a.Funcs { switch ssaFunc.Name() { diff --git a/analysis/annotations/testdata/main.go b/analysis/annotations/testdata/main.go index 2fb65f47..c769c2eb 100644 --- a/analysis/annotations/testdata/main.go +++ b/analysis/annotations/testdata/main.go @@ -16,6 +16,8 @@ package main import "fmt" +//argot:config _ @SetOptions(log-level=3) configures log-level for all problems + // sink and source annotation are relative to data categories (e.g. in this example bar, io and html) // Sink(_) means it is always a sink @@ -38,7 +40,7 @@ func foo() string { // //argot:function Sink(_) func superSensitiveFunction(iWillPrintYouUnencrypted string) { - fmt.Println(iWillPrintYouUnencrypted) + fmt.Println(iWillPrintYouUnencrypted) //argot:ignore _ } // sanitizerOfIo diff --git a/analysis/dataflow/inter_procedural.go b/analysis/dataflow/inter_procedural.go index 49a4e480..5331bddb 100644 --- a/analysis/dataflow/inter_procedural.go +++ b/analysis/dataflow/inter_procedural.go @@ -312,12 +312,15 @@ func (g *InterProceduralFlowGraph) RunVisitorOnEntryPoints(visitor Visitor, // entrypoints at once, but this would require a finer context-tracking mechanism than what the NodeWithCallStack // implements. // If the maximum number of alarms has been reached, stop early. + i := 0 for _, entry := range entryPoints { if !g.AnalyzerState.TestAlarmCount() { - g.AnalyzerState.Logger.Warnf("Some entrypoints are skipped, max number of alarms reached.") + g.AnalyzerState.Logger.Warnf("%d entrypoints are skipped, max number of alarms reached.", + len(entryPoints)-i) return } visitor.Visit(g.AnalyzerState, entry) + i++ } } diff --git a/analysis/dataflow/state.go b/analysis/dataflow/state.go index 3ab8a295..3865c55a 100644 --- a/analysis/dataflow/state.go +++ b/analysis/dataflow/state.go @@ -25,6 +25,7 @@ import ( "github.com/awslabs/ar-go-tools/analysis/config" "github.com/awslabs/ar-go-tools/analysis/lang" "github.com/awslabs/ar-go-tools/analysis/summaries" + "github.com/awslabs/ar-go-tools/internal/analysisutil" "github.com/awslabs/ar-go-tools/internal/pointer" "golang.org/x/tools/go/callgraph" "golang.org/x/tools/go/callgraph/cha" @@ -38,7 +39,7 @@ type AnalyzerState struct { // Annotations contains all the annotations of the program Annotations annotations.ProgramAnnotations - // The logger used during the analysis (can be used to control output. + // The logger used during the analysis (can be used to control output). Logger *config.LogGroup // The configuration file for the analysis @@ -120,10 +121,23 @@ func NewAnalyzerState(p *ssa.Program, pkgs []*packages.Package, l *config.LogGro steps []func(*AnalyzerState)) (*AnalyzerState, error) { var allContracts []Contract - // Load annotations + // Load annotations by scanning all packages' syntax pa, err := annotations.LoadAnnotations(l, p.AllPackages()) + if pkgs != nil { - pa.CompleteFromSyntax(l, pkgs) + for _, pkg := range pkgs { + analysisutil.VisitPackages(pkg, func(p *packages.Package) bool { + // Don't scan stdlib for annotations! + if summaries.IsStdPackageName(p.Name) { + return false + } + // TODO: find a way to not scan dependencies if there is demand. Currently, it is unlikely that some + // dependencies will contain argot annotations. + l.Debugf("Scan %s for annotations.\n", p.PkgPath) + pa.CompleteFromSyntax(l, p) + return true + }) + } } if err != nil { @@ -217,6 +231,7 @@ func NewDefaultAnalyzer(p *ssa.Program, pkgs []*packages.Package) (*AnalyzerStat // CopyTo copies pointers in receiver into argument (shallow copy of everything except mutex). // Do not use two copies in separate routines. func (s *AnalyzerState) CopyTo(b *AnalyzerState) { + b.Annotations = s.Annotations b.BoundingInfo = s.BoundingInfo b.Config = s.Config b.EscapeAnalysisState = s.EscapeAnalysisState diff --git a/analysis/taint/dataflow_visitor.go b/analysis/taint/dataflow_visitor.go index 53afa142..2214cad7 100644 --- a/analysis/taint/dataflow_visitor.go +++ b/analysis/taint/dataflow_visitor.go @@ -140,21 +140,28 @@ func (v *Visitor) Visit(s *df.AnalyzerState, source df.NodeWithTrace) { // you are tracking flows to logging but don't care about integers and booleans for example). if isFiltered(s, v.taintSpec, cur.Node) { if _, isIf := cur.Node.(*df.IfNode); !isIf || v.taintSpec.FailOnImplicitFlow { - logger.Infof("Filtered value: %s\n", cur.Node.String()) - logger.Infof("At: %s\n", cur.Node.Position(s)) + // Filtered values logged at debug level -- there can be many of those. + logger.Debugf("Filtered value: %s\n", cur.Node.String()) + logger.Debugf("At: %s\n", cur.Node.Position(s)) } continue } // If node is sink, then we reached a sink from a source, and we must log the taint flow. if isSink(s, v.taintSpec, cur.Node) && cur.Status.Kind == df.DefaultTracing { - if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) { - reportTaintFlow(s, v.currentSource, cur) - } - // Stop if there is a limit on number of alarms, and it has been reached. - if !s.IncrementAndTestAlarms() { - logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms) - return + // Don't report taint flow if the sink location is annotated with //argot:ignore + if s.Annotations.IsIgnoredPos(cur.Node.Position(s), v.taintSpec.Tag) { + s.Logger.Infof("//argot:ignore taint flow to %s", + cur.Node.Position(s)) + } else { + if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) { + reportTaintFlow(s, v.currentSource, cur) + } + // Stop if there is a limit on number of alarms, and it has been reached. + if !s.IncrementAndTestAlarms() { + logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms) + return + } } // A sink does not have successors in the taint flow analysis (but other sinks can be reached // as there are still values flowing). diff --git a/analysis/taint/report.go b/analysis/taint/report.go index 0dc1bc35..133a0921 100644 --- a/analysis/taint/report.go +++ b/analysis/taint/report.go @@ -76,7 +76,8 @@ func reportCoverage(coverage map[string]bool, coverageWriter io.StringWriter) { } // reportTaintFlow reports a taint flow by writing to a file if the configuration has the ReportPaths flag set, -// and writing in the logger +// and writing in the logger. +// The function checks the annotations to suppress reports where there are //argot:ignore annotations. func reportTaintFlow(c *dataflow.AnalyzerState, source dataflow.NodeWithTrace, sink *dataflow.VisitorNode) { c.Logger.Infof(" !!!! TAINT FLOW !!!!") c.Logger.Infof(" 💀 Sink reached at %s\n", formatutil.Red(sink.Node.Position(c))) diff --git a/analysis/taint/taint.go b/analysis/taint/taint.go index 14ec16bb..27189a14 100644 --- a/analysis/taint/taint.go +++ b/analysis/taint/taint.go @@ -140,6 +140,7 @@ func Analyze(cfg *config.Config, prog *ssa.Program, pkgs []*packages.Package) (A } } visitor := NewVisitor(&taintSpec) + state.Logger.Infof("Analyzing taint-tracking problem %s", taintSpec.Tag) analysis.RunInterProcedural(state, visitor, analysis.InterProceduralParams{ // The entry points are specific to each taint tracking problem (unlike in the intra-procedural pass) IsEntrypoint: func(node ssa.Node) bool { return IsSourceNode(state, &taintSpec, node) }, diff --git a/analysis/taint/testdata/annotations/main.go b/analysis/taint/testdata/annotations/main.go index ccd37c8f..a0a85c3b 100644 --- a/analysis/taint/testdata/annotations/main.go +++ b/analysis/taint/testdata/annotations/main.go @@ -22,7 +22,7 @@ import ( ) // Specific problem-level config options can be set in annotations -//argot:config lim SetOptions(max-alarms=2,unsafe-max-depth=2) +//argot:config lim SetOptions(max-alarms=12,unsafe-max-depth=2) // bar // It is also a source for the problem hybridDef also defined in the config.json @@ -79,7 +79,7 @@ func main() { sink(s) // @Sink(ex1) sinkOnSecondArg(s, "ok") // only second argument of this is a sink sinkOnSecondArg("ok", s) // @Sink(ex2) - sink(sanitizeSecondArg(s, "ok")) // @Sink(ex1) + sink(sanitizeSecondArg(s, "ok")) //argot:ignore _ (but should be an ex1 sink) sink(sanitizeSecondArg("ok", s)) sink(sanitizer(s)) fmt.Println(s) // @Sink(hybridDef) diff --git a/doc/01_taint.md b/doc/01_taint.md index 8ef77dd6..f132ec17 100644 --- a/doc/01_taint.md +++ b/doc/01_taint.md @@ -202,6 +202,17 @@ options: max-alarms: 2 ``` +#### Finding Suppression + +You may encounter false positives in the taint analysis, some of which cannot be easily resolved by making the configuration more precise or by changing the code. +When you are confident the finding is a false positive, you can suppress the findings of the taint analysis on a specific line by using the `//argot:ignore problem-tag` annotation. +For example: +```go +... + callSink(notReaalyTaintedData) //argot:ignore _ +``` +Will suppress findings for all taint problems. Taint problems can be associated with a `tag: tagName` in the configuration, and you can suppress findings specifically for `tagName` by using `//argot:ignore tagName`. + #### Warning Suppression The use can set the setting `warn: false` to suppress warnings during the analysis. This means that if the analysis encounters program constructs that make it unsound, those will not be reported. This setting does not affect the soundness of the analysis, but it will cause the tool to not report when your program falls beyond the soundness guarantees. diff --git a/internal/analysisutil/iterators.go b/internal/analysisutil/iterators.go new file mode 100644 index 00000000..2fcddc16 --- /dev/null +++ b/internal/analysisutil/iterators.go @@ -0,0 +1,39 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package analysisutil + +import ( + "golang.org/x/tools/go/packages" +) + +// VisitPackages calls f recursively on the root package provided, and then all the imports provided f returns +// true for that package. If f returns false, the imports will not be visited. +func VisitPackages(root *packages.Package, f func(p *packages.Package) bool) { + seen := map[*packages.Package]bool{} + queue := []*packages.Package{root} + for len(queue) > 0 { + cur := queue[0] + queue = queue[1:] + if seen[cur] { + continue + } + if f(cur) { + for _, importedPkg := range cur.Imports { + queue = append(queue, importedPkg) + } + } + seen[cur] = true + } +} diff --git a/internal/funcutil/collections.go b/internal/funcutil/collections.go index aa7bcec3..fa49aa71 100644 --- a/internal/funcutil/collections.go +++ b/internal/funcutil/collections.go @@ -134,6 +134,16 @@ func Exists[T any](a []T, f func(T) bool) bool { return false } +// ExistsInMap returns true when there exists some k,x in map a such that f(k,x), otherwise false. +func ExistsInMap[K comparable, T any](a map[K]T, f func(K, T) bool) bool { + for k, x := range a { + if f(k, x) { + return true + } + } + return false +} + // FindMap returns Some(f(x)) when there exists some x in slice a such that p(f(x)), otherwise None. func FindMap[T any, R any](a []T, f func(T) R, p func(R) bool) Optional[R] { for _, x := range a {