Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using argot:ignore annotations in the taint analysis. #101

Merged
merged 3 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 61 additions & 16 deletions analysis/annotations/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ package annotations
import (
"fmt"
"go/ast"
"go/token"
"regexp"
"strconv"
"strings"

"github.com/awslabs/ar-go-tools/analysis/config"
Expand All @@ -41,6 +43,8 @@ const (
Sanitizer
// SetOptions is the kind of SetOptions(...) annotations
SetOptions
// Ignore is an empty annotation for a line
Ignore
)

// AnyTag is the special tag used to match any other tag
Expand Down Expand Up @@ -98,6 +102,24 @@ func (a Annotation) IsMatchingAnnotation(kind AnnotationKind, tag string) bool {
return a.Kind == kind && (tag == AnyTag || (len(a.Tags) > 0 && a.Tags[0] == AnyTag) || slices.Contains(a.Tags, tag))
}

// LinePos is a simple line-file position indicator.
type LinePos struct {
Line int
File string
}

// NewLinePos returns a LinePos from a token position. The column and offset are abstracted away.
func NewLinePos(pos token.Position) LinePos {
return LinePos{
Line: pos.Line,
File: pos.Filename,
}
}

func (l LinePos) String() string {
return l.File + ":" + strconv.Itoa(l.Line)
}

// A FunctionAnnotation groups the annotations relative to a function into main annotations for the entire function
// and parameter annotations for each parameter
type FunctionAnnotation struct {
Expand Down Expand Up @@ -152,6 +174,17 @@ type ProgramAnnotations struct {
Consts map[*ssa.NamedConst][]Annotation
// Globals is the map of global variable annotations (TODO: implementation)
Globals map[*ssa.Global][]Annotation
// Positional is the map of line-file location to annotations that are not attached to a specific construct.
// There can be only one annotation per line.
Positional map[LinePos]Annotation
}

// IsIgnoredPos returns true when the given position is on the same line as an //argot:ignore annotation.
func (pa ProgramAnnotations) IsIgnoredPos(pos token.Position, tag string) bool {
if posAnnot, hasPosAnnot := pa.Positional[NewLinePos(pos)]; hasPosAnnot {
return posAnnot.Kind == Ignore && (posAnnot.Tags[0] == tag || posAnnot.Tags[0] == AnyTag)
}
return false
}

// Count returns the total number of annotations in the program
Expand All @@ -178,19 +211,20 @@ func (pa ProgramAnnotations) Iter(fx func(a Annotation)) {
for _, cst := range pa.Globals {
funcutil.Iter(cst, fx)
}
for _, cst := range pa.Positional {
fx(cst)
}
}

// CompleteFromSyntax takes a set of program annotations and adds additional non-ssa linked annotations
// to the annotations
func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs []*packages.Package) {
for _, pkg := range pkgs {
for _, astFile := range pkg.Syntax {
pa.loadPackageDocAnnotations(astFile.Doc)
for _, comments := range astFile.Comments {
for _, comment := range comments.List {
if annotationContents := extractAnnotation(comment); annotationContents != nil {
pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos()).String())
}
func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkg *packages.Package) {
for _, astFile := range pkg.Syntax {
pa.loadPackageDocAnnotations(astFile.Doc)
for _, comments := range astFile.Comments {
for _, comment := range comments.List {
if annotationContents := extractAnnotation(comment); annotationContents != nil {
pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos()))
}
}
}
Expand All @@ -205,11 +239,12 @@ func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs []
// will also print warnings when some syntactic components of the comments look like they should be an annotation.
func LoadAnnotations(logger *config.LogGroup, packages []*ssa.Package) (ProgramAnnotations, error) {
annotations := ProgramAnnotations{
Configs: map[string]map[string]string{},
Funcs: map[*ssa.Function]FunctionAnnotation{},
Types: map[*ssa.Type][]Annotation{},
Consts: map[*ssa.NamedConst][]Annotation{},
Globals: map[*ssa.Global][]Annotation{},
Configs: map[string]map[string]string{},
Funcs: map[*ssa.Function]FunctionAnnotation{},
Types: map[*ssa.Type][]Annotation{},
Consts: map[*ssa.NamedConst][]Annotation{},
Globals: map[*ssa.Global][]Annotation{},
Positional: map[LinePos]Annotation{},
}
for _, pkg := range packages {
for _, m := range pkg.Members {
Expand Down Expand Up @@ -287,21 +322,31 @@ func (pa ProgramAnnotations) loadPackageDocAnnotations(doc *ast.CommentGroup) {
// loadFileAnnotations loads the annotation that are not tied to a specific ssa node. This includes:
// - config annotations
// - positional annotations
func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position string) {
func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position token.Position) {
if len(annotationContents) <= 1 {
logger.Warnf("ignoring argot annotation with no arguments at %s", position)
return
}
switch annotationContents[0] {
case ConfigTarget:
pa.loadConfigTargetAnnotation(logger, annotationContents, position)
case IgnoreTarget:
pa.addIgnoreLineAnnotation(position, annotationContents)
}
}

// addIgnoreLineAnnotation adds a Ignore annotation at the position's line
func (pa ProgramAnnotations) addIgnoreLineAnnotation(position token.Position, annotationContents []string) {
pa.Positional[NewLinePos(position)] = Annotation{
Kind: Ignore,
Tags: []string{annotationContents[1]}, // only the tag is used
}
}

// loadConfigTargetAnnotation loads a config annotation. Config annotations look like
// "//argot:config tag SetOptions(option-name-1=value1,option-name-2=value2)" and are always linked to a specific problem
// tag.
func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position string) {
func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position token.Position) {
if len(annotationContents) <= 2 {
logger.Warnf("argot:config expects a target tag and one or more SetOptions")
logger.Warnf("a comment is likely missing something at %s", position)
Expand Down
26 changes: 26 additions & 0 deletions analysis/annotations/annotations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,35 @@ func TestLoadAnnotations(t *testing.T) {
testProgram.Prog.Build()
logger := config.NewLogGroup(testProgram.Config)
a, err := annotations.LoadAnnotations(logger, testProgram.Prog.AllPackages())
a.CompleteFromSyntax(logger, testProgram.Pkgs[0])
if err != nil {
t.Errorf("error loading annotations: %s", err)
}

// Positional annotations like //argot:ignore
t.Logf("%d positional annotations", len(a.Positional))
if !funcutil.ExistsInMap(a.Positional, func(k annotations.LinePos, _ annotations.Annotation) bool {
return k.Line == 43
}) {
t.Errorf("Expected annotation at line 43.")
}

for pos, annot := range a.Positional {
if pos.Line == 43 && (annot.Kind != annotations.Ignore || !funcutil.Contains(annot.Tags, "_")) {
t.Errorf("Expected annotation at line 43 to be //argot:ignore _ but its %v", annot)
}
}

// Config annotations like //argot:config
t.Logf("%d config annotations", len(a.Configs))
if _, hasOpt := a.Configs["_"]; !hasOpt {
t.Errorf("expected a config option for _")
}
if a.Configs["_"]["log-level"] != "3" {
t.Errorf("Expected config option log-level set to 3 by annotation.")
}

// Function annotations like //argot:function and //argot:param
t.Logf("%d functions scanned", len(a.Funcs))
for ssaFunc, functionAnnotation := range a.Funcs {
switch ssaFunc.Name() {
Expand Down
4 changes: 3 additions & 1 deletion analysis/annotations/testdata/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package main

import "fmt"

//argot:config _ @SetOptions(log-level=3) configures log-level for all problems

// sink and source annotation are relative to data categories (e.g. in this example bar, io and html)
// Sink(_) means it is always a sink

Expand All @@ -38,7 +40,7 @@ func foo() string {
//
//argot:function Sink(_)
func superSensitiveFunction(iWillPrintYouUnencrypted string) {
fmt.Println(iWillPrintYouUnencrypted)
fmt.Println(iWillPrintYouUnencrypted) //argot:ignore _
}

// sanitizerOfIo
Expand Down
5 changes: 4 additions & 1 deletion analysis/dataflow/inter_procedural.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,15 @@ func (g *InterProceduralFlowGraph) RunVisitorOnEntryPoints(visitor Visitor,
// entrypoints at once, but this would require a finer context-tracking mechanism than what the NodeWithCallStack
// implements.
// If the maximum number of alarms has been reached, stop early.
i := 0
for _, entry := range entryPoints {
if !g.AnalyzerState.TestAlarmCount() {
g.AnalyzerState.Logger.Warnf("Some entrypoints are skipped, max number of alarms reached.")
g.AnalyzerState.Logger.Warnf("%d entrypoints are skipped, max number of alarms reached.",
len(entryPoints)-i)
return
}
visitor.Visit(g.AnalyzerState, entry)
i++
}
}

Expand Down
21 changes: 18 additions & 3 deletions analysis/dataflow/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/awslabs/ar-go-tools/analysis/config"
"github.com/awslabs/ar-go-tools/analysis/lang"
"github.com/awslabs/ar-go-tools/analysis/summaries"
"github.com/awslabs/ar-go-tools/internal/analysisutil"
"github.com/awslabs/ar-go-tools/internal/pointer"
"golang.org/x/tools/go/callgraph"
"golang.org/x/tools/go/callgraph/cha"
Expand All @@ -38,7 +39,7 @@ type AnalyzerState struct {
// Annotations contains all the annotations of the program
Annotations annotations.ProgramAnnotations

// The logger used during the analysis (can be used to control output.
// The logger used during the analysis (can be used to control output).
Logger *config.LogGroup

// The configuration file for the analysis
Expand Down Expand Up @@ -120,10 +121,23 @@ func NewAnalyzerState(p *ssa.Program, pkgs []*packages.Package, l *config.LogGro
steps []func(*AnalyzerState)) (*AnalyzerState, error) {
var allContracts []Contract

// Load annotations
// Load annotations by scanning all packages' syntax
pa, err := annotations.LoadAnnotations(l, p.AllPackages())

if pkgs != nil {
pa.CompleteFromSyntax(l, pkgs)
for _, pkg := range pkgs {
analysisutil.VisitPackages(pkg, func(p *packages.Package) bool {
// Don't scan stdlib for annotations!
if summaries.IsStdPackageName(p.Name) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also avoid scanning dependencies? Not sure how to identify them though

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave a TODO and make an issue. Based on demand we can figure out how to implement it, but early on it's unlikely the dependencies will have annotations, and scanning is relatively fast compared to all the other analysis steps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue #102 for tracking this.

return false
}
// TODO: find a way to not scan dependencies if there is demand. Currently, it is unlikely that some
// dependencies will contain argot annotations.
l.Debugf("Scan %s for annotations.\n", p.PkgPath)
pa.CompleteFromSyntax(l, p)
return true
})
}
}

if err != nil {
Expand Down Expand Up @@ -217,6 +231,7 @@ func NewDefaultAnalyzer(p *ssa.Program, pkgs []*packages.Package) (*AnalyzerStat
// CopyTo copies pointers in receiver into argument (shallow copy of everything except mutex).
// Do not use two copies in separate routines.
func (s *AnalyzerState) CopyTo(b *AnalyzerState) {
b.Annotations = s.Annotations
b.BoundingInfo = s.BoundingInfo
b.Config = s.Config
b.EscapeAnalysisState = s.EscapeAnalysisState
Expand Down
25 changes: 16 additions & 9 deletions analysis/taint/dataflow_visitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,21 +140,28 @@ func (v *Visitor) Visit(s *df.AnalyzerState, source df.NodeWithTrace) {
// you are tracking flows to logging but don't care about integers and booleans for example).
if isFiltered(s, v.taintSpec, cur.Node) {
if _, isIf := cur.Node.(*df.IfNode); !isIf || v.taintSpec.FailOnImplicitFlow {
logger.Infof("Filtered value: %s\n", cur.Node.String())
logger.Infof("At: %s\n", cur.Node.Position(s))
// Filtered values logged at debug level -- there can be many of those.
logger.Debugf("Filtered value: %s\n", cur.Node.String())
logger.Debugf("At: %s\n", cur.Node.Position(s))
}
continue
}

// If node is sink, then we reached a sink from a source, and we must log the taint flow.
if isSink(s, v.taintSpec, cur.Node) && cur.Status.Kind == df.DefaultTracing {
if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) {
reportTaintFlow(s, v.currentSource, cur)
}
// Stop if there is a limit on number of alarms, and it has been reached.
if !s.IncrementAndTestAlarms() {
logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms)
return
// Don't report taint flow if the sink location is annotated with //argot:ignore
if s.Annotations.IsIgnoredPos(cur.Node.Position(s), v.taintSpec.Tag) {
s.Logger.Infof("//argot:ignore taint flow to %s",
cur.Node.Position(s))
} else {
if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) {
reportTaintFlow(s, v.currentSource, cur)
}
// Stop if there is a limit on number of alarms, and it has been reached.
if !s.IncrementAndTestAlarms() {
logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms)
return
}
}
// A sink does not have successors in the taint flow analysis (but other sinks can be reached
// as there are still values flowing).
Expand Down
3 changes: 2 additions & 1 deletion analysis/taint/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ func reportCoverage(coverage map[string]bool, coverageWriter io.StringWriter) {
}

// reportTaintFlow reports a taint flow by writing to a file if the configuration has the ReportPaths flag set,
// and writing in the logger
// and writing in the logger.
// The function checks the annotations to suppress reports where there are //argot:ignore annotations.
func reportTaintFlow(c *dataflow.AnalyzerState, source dataflow.NodeWithTrace, sink *dataflow.VisitorNode) {
c.Logger.Infof(" !!!! TAINT FLOW !!!!")
c.Logger.Infof(" 💀 Sink reached at %s\n", formatutil.Red(sink.Node.Position(c)))
Expand Down
1 change: 1 addition & 0 deletions analysis/taint/taint.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ func Analyze(cfg *config.Config, prog *ssa.Program, pkgs []*packages.Package) (A
}
}
visitor := NewVisitor(&taintSpec)
state.Logger.Infof("Analyzing taint-tracking problem %s", taintSpec.Tag)
analysis.RunInterProcedural(state, visitor, analysis.InterProceduralParams{
// The entry points are specific to each taint tracking problem (unlike in the intra-procedural pass)
IsEntrypoint: func(node ssa.Node) bool { return IsSourceNode(state, &taintSpec, node) },
Expand Down
4 changes: 2 additions & 2 deletions analysis/taint/testdata/annotations/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
)

// Specific problem-level config options can be set in annotations
//argot:config lim SetOptions(max-alarms=2,unsafe-max-depth=2)
//argot:config lim SetOptions(max-alarms=12,unsafe-max-depth=2)

// bar
// It is also a source for the problem hybridDef also defined in the config.json
Expand Down Expand Up @@ -79,7 +79,7 @@ func main() {
sink(s) // @Sink(ex1)
sinkOnSecondArg(s, "ok") // only second argument of this is a sink
sinkOnSecondArg("ok", s) // @Sink(ex2)
sink(sanitizeSecondArg(s, "ok")) // @Sink(ex1)
sink(sanitizeSecondArg(s, "ok")) //argot:ignore _ (but should be an ex1 sink)
sink(sanitizeSecondArg("ok", s))
sink(sanitizer(s))
fmt.Println(s) // @Sink(hybridDef)
Expand Down
11 changes: 11 additions & 0 deletions doc/01_taint.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,17 @@ options:
max-alarms: 2
```

#### Finding Suppression

You may encounter false positives in the taint analysis, some of which cannot be easily resolved by making the configuration more precise or by changing the code.
When you are confident the finding is a false positive, you can suppress the findings of the taint analysis on a specific line by using the `//argot:ignore problem-tag` annotation.
For example:
```go
...
callSink(notReaalyTaintedData) //argot:ignore _
```
Will suppress findings for all taint problems. Taint problems can be associated with a `tag: tagName` in the configuration, and you can suppress findings specifically for `tagName` by using `//argot:ignore tagName`.

#### Warning Suppression
The use can set the setting `warn: false` to suppress warnings during the analysis. This means that if the analysis encounters program constructs that make it unsound, those will not be reported. This setting does not affect the soundness of the analysis, but it will cause the tool to not report when your program falls beyond the soundness guarantees.

Expand Down
39 changes: 39 additions & 0 deletions internal/analysisutil/iterators.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package analysisutil

import (
"golang.org/x/tools/go/packages"
)

// VisitPackages calls f recursively on the root package provided, and then all the imports provided f returns
// true for that package. If f returns false, the imports will not be visited.
func VisitPackages(root *packages.Package, f func(p *packages.Package) bool) {
seen := map[*packages.Package]bool{}
queue := []*packages.Package{root}
for len(queue) > 0 {
cur := queue[0]
queue = queue[1:]
if seen[cur] {
continue
}
if f(cur) {
for _, importedPkg := range cur.Imports {
queue = append(queue, importedPkg)
}
}
seen[cur] = true
}
}
Loading
Loading