Skip to content

Commit

Permalink
feat(cli): Add an argument to limit the number of repositories analyzed
Browse files Browse the repository at this point in the history
  • Loading branch information
Walz committed Dec 6, 2021
1 parent d8dc700 commit d480265
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 13 deletions.
1 change: 1 addition & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ linters:
- funlen
- gocyclo
- varnamelen
- ifshort # bug: https://github.com/esimonov/ifshort/issues/23
disable-all: false
presets:
- bugs
Expand Down
18 changes: 14 additions & 4 deletions cmd/src-fingerprint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@ var date = "unknown"

const MaxPipelineEvents = 100

func runExtract(pipeline *srcfingerprint.Pipeline, user string, after string) chan srcfingerprint.PipelineEvent {
func runExtract(
pipeline *srcfingerprint.Pipeline,
user string,
after string,
limit int) chan srcfingerprint.PipelineEvent {
// buffer it a bit so it won't block if this is going too fast
ch := make(chan srcfingerprint.PipelineEvent, MaxPipelineEvents)

go func(eventChannel chan srcfingerprint.PipelineEvent) {
defer close(eventChannel)
pipeline.ExtractRepositories(user, after, eventChannel)
pipeline.ExtractRepositories(user, after, eventChannel, limit)
}(ch)

return ch
Expand Down Expand Up @@ -71,6 +75,7 @@ type authorInfo struct {
}

const DefaultClonerN = 8
const DefaultLimit = 100

func main() {
cli.VersionFlag = &cli.BoolFlag{
Expand Down Expand Up @@ -152,6 +157,11 @@ func main() {
Name: "provider-url",
Usage: "base URL of the Git provider API. If not set, defaults URL are used.",
},
&cli.IntFlag{
Name: "limit",
Value: DefaultLimit,
Usage: "maximum number of repositories to analyze (0 for unlimited).",
},
},
Action: mainAction,
}
Expand All @@ -165,7 +175,7 @@ func mainAction(c *cli.Context) error {
if c.Bool("verbose") {
log.SetLevel(log.InfoLevel)
} else {
log.SetLevel(log.ErrorLevel)
log.SetLevel(log.WarnLevel)
}

output := os.Stdout
Expand Down Expand Up @@ -226,7 +236,7 @@ func mainAction(c *cli.Context) error {

ticker := time.Tick(1 * time.Second)

eventChannel := runExtract(&pipeline, c.String("object"), c.String("after"))
eventChannel := runExtract(&pipeline, c.String("object"), c.String("after"), c.Int("limit"))

// runtime stats
var (
Expand Down
9 changes: 7 additions & 2 deletions extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"os"
"os/exec"
"regexp"

log "github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -72,9 +73,13 @@ func (fe *FastExtractor) Run(path string, after string) chan *GitFile {

var gitFile GitFile

err := json.Unmarshal(line, &gitFile)
// Replace backslashes by escaped backslashes
re := regexp.MustCompile(`\\\\(.)`)
cleanedLine := re.ReplaceAll(line, []byte("\\\\$1"))

err := json.Unmarshal(cleanedLine, &gitFile)
if err != nil {
log.Warnln(err)
log.Warnln("Error while parsing", string(line), err)
}

fe.ChanGitFiles <- &gitFile
Expand Down
24 changes: 19 additions & 5 deletions pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ func (p *Pipeline) gather(
wg *sync.WaitGroup,
eventChan chan<- PipelineEvent,
user string,
output chan<- provider.GitRepository) {
output chan<- provider.GitRepository,
limit int) {
defer wg.Done()
defer close(output)

Expand All @@ -82,8 +83,21 @@ func (p *Pipeline) gather(

p.publishEvent(eventChan, RepositoryListPipelineEvent{repositories})

for _, repository := range repositories {
output <- repository
collected := 0
ignored := 0

for index, repository := range repositories {
if limit > 0 && index >= limit {
ignored++
} else {
collected++
output <- repository
}
}

if ignored > 0 {
log.Warnln("Limit reached for number of repositories")
log.Warnf("Collected %d repos, ignored %d repos.", collected, ignored)
}

log.Infoln("Done gathering repositories")
Expand Down Expand Up @@ -119,7 +133,7 @@ const (
)

// ExtractRepositories extract repositories and analyze it for a given user and provider.
func (p *Pipeline) ExtractRepositories(user string, after string, eventChan chan<- PipelineEvent) {
func (p *Pipeline) ExtractRepositories(user string, after string, eventChan chan<- PipelineEvent, limit int) {
log.Infof("Extracting user %v\n", user)

repositoryChannel := make(chan provider.GitRepository)
Expand All @@ -133,7 +147,7 @@ func (p *Pipeline) ExtractRepositories(user string, after string, eventChan chan

wg.Add(1)

go p.gather(&wg, eventChan, user, repositoryChannel)
go p.gather(&wg, eventChan, user, repositoryChannel, limit)

for i := 0; i < extractionWorkersCount; i++ {
wg.Add(1)
Expand Down
30 changes: 28 additions & 2 deletions pipeline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,33 @@ func (suite *PipelineTestSuite) TestGather() {
providerMock.On("Gather", "user").Return([]provider.GitRepository{createGitRepository("1")}, nil)

wg.Add(1)
go pipeline.gather(wg, nil, "user", outputChan)
go pipeline.gather(wg, nil, "user", outputChan, 0)

repositories := make([]provider.GitRepository, 0, 2)
for output := range outputChan {
repositories = append(repositories, output)
}
wg.Wait()

providerMock.AssertExpectations(suite.T())
assert.Equal(suite.T(), []provider.GitRepository{gitRepositoryMock{name: "1"}}, repositories)
}

func (suite *PipelineTestSuite) TestGatherWithLimit() {
outputChan := make(chan provider.GitRepository)
wg := &sync.WaitGroup{}
providerMock := &ProviderMock{}
pipeline := Pipeline{
Provider: providerMock,
}

providerMock.On("Gather", "user").Return(
[]provider.GitRepository{createGitRepository("1"), createGitRepository("2")},
nil,
)

wg.Add(1)
go pipeline.gather(wg, nil, "user", outputChan, 1)

repositories := make([]provider.GitRepository, 0, 2)
for output := range outputChan {
Expand Down Expand Up @@ -136,7 +162,7 @@ func (suite *PipelineTestSuite) TestExtractRepositories() {
go func() {
defer close(eventChan)

pipeline.ExtractRepositories("user", "", eventChan)
pipeline.ExtractRepositories("user", "", eventChan, 0)
}()

events := make([]PipelineEvent, 0)
Expand Down

0 comments on commit d480265

Please sign in to comment.