Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into k/rm-trace-spancontext
Browse files Browse the repository at this point in the history
  • Loading branch information
keegancsmith committed Aug 2, 2024
2 parents dd033e3 + acacc5e commit 9d583f6
Show file tree
Hide file tree
Showing 44 changed files with 1,151 additions and 622 deletions.
46 changes: 46 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Index folder",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "cmd/zoekt-git-index",
"cwd": "${workspaceFolder}",
"args": ["-index", "${input:indexPath}", "${input:path}"]
},
{
"name": "Webserver",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "cmd/zoekt-webserver",
"cwd": "${workspaceFolder}",
"args": ["-index", "${input:indexPath}"]
},
{
"name": "Attach to Process (from list)",
"type": "go",
"request": "attach",
"mode": "local"
}
],
"inputs": [
{
"id": "path",
"description": "Please enter the path to the project to index",
"default": "",
"type": "promptString"
},
{
"id": "indexPath",
"description": "Enter the path where indexes are stored",
"default": "${userHome}/.zoekt",
"type": "promptString"
}
]
}
6 changes: 2 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ RUN go install -ldflags "-X github.com/sourcegraph/zoekt.Version=$VERSION" ./cmd

FROM rust:alpine3.19 AS rust-builder

RUN apk update --no-cache && apk upgrade --no-cache && \
apk add --no-cache git wget musl-dev>=1.1.24-r10 build-base
RUN apk add --no-cache git wget musl-dev build-base

RUN wget -qO- https://github.com/sourcegraph/sourcegraph/archive/0c8aa18eece45922a2b56dc0f94e21b1bb533e7d.tar.gz | tar xz && mv sourcegraph-* sourcegraph

Expand All @@ -29,8 +28,7 @@ RUN cargo install --path sourcegraph/docker-images/syntax-highlighter --root /sy

FROM alpine:3.19 AS zoekt

RUN apk update --no-cache && apk upgrade --no-cache && \
apk add --no-cache git ca-certificates bind-tools tini jansson wget
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget

COPY install-ctags-alpine.sh .
RUN ./install-ctags-alpine.sh && rm install-ctags-alpine.sh
Expand Down
5 changes: 2 additions & 3 deletions Dockerfile.indexserver
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
FROM alpine:3.19

RUN apk update --no-cache && apk upgrade --no-cache && \
apk add --no-cache ca-certificates bind-tools tini 'git>=2.38.5-r0' jansson && \
apk add --upgrade --no-cache 'libcrypto1.1>=1.1.1n-r0' 'libssl1.1>=1.1.1n-r0' 'pcre2>=10.40-r0' 'e2fsprogs>=1.46.6-r0'
RUN apk add --no-cache ca-certificates bind-tools tini git jansson

# Run as non-root user sourcegraph. External volumes should be mounted under /data (which will be owned by sourcegraph).
RUN mkdir -p /home/sourcegraph
RUN addgroup -S sourcegraph && adduser -S -G sourcegraph -h /home/sourcegraph sourcegraph && mkdir -p /data && chown -R sourcegraph:sourcegraph /data
Expand Down
3 changes: 1 addition & 2 deletions Dockerfile.webserver
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
FROM alpine:3.19

RUN apk update --no-cache && apk upgrade --no-cache && \
apk add --no-cache ca-certificates bind-tools tini
RUN apk add --no-cache ca-certificates bind-tools tini

# Run as non-root user sourcegraph. External volumes should be mounted under /data (which will be owned by sourcegraph).
RUN mkdir -p /home/sourcegraph
Expand Down
14 changes: 10 additions & 4 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -946,10 +946,16 @@ type SearchOptions struct {
// will be used. This option is temporary and is only exposed for testing/ tuning purposes.
DocumentRanksWeight float64

// EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
// Currently, this treats each match in a file as a term and computes an approximation to BM25.
// EXPERIMENTAL. If true, use text-search style scoring instead of the default
// scoring formula. The scoring algorithm treats each match in a file as a term
// and computes an approximation to BM25.
//
// The calculation of IDF assumes that Zoekt visits all documents containing any
// of the query terms during evaluation. This is true, for example, if all query
// terms are ORed together.
//
// When enabled, all other scoring signals are ignored, including document ranks.
UseKeywordScoring bool
UseBM25Scoring bool

// If set, the search results will contain debug information for scoring.
DebugScore bool
Expand Down Expand Up @@ -1008,7 +1014,7 @@ func (s *SearchOptions) String() string {
addBool("Whole", s.Whole)
addBool("ChunkMatches", s.ChunkMatches)
addBool("UseDocumentRanks", s.UseDocumentRanks)
addBool("UseKeywordScoring", s.UseKeywordScoring)
addBool("UseBM25Scoring", s.UseBM25Scoring)
addBool("DebugScore", s.DebugScore)

b.WriteByte('}')
Expand Down
4 changes: 2 additions & 2 deletions api_proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ func SearchOptionsFromProto(p *proto.SearchOptions) *SearchOptions {
UseDocumentRanks: p.GetUseDocumentRanks(),
DocumentRanksWeight: p.GetDocumentRanksWeight(),
DebugScore: p.GetDebugScore(),
UseKeywordScoring: p.GetUseKeywordScoring(),
UseBM25Scoring: p.GetUseBm25Scoring(),
}
}

Expand All @@ -723,6 +723,6 @@ func (s *SearchOptions) ToProto() *proto.SearchOptions {
UseDocumentRanks: s.UseDocumentRanks,
DocumentRanksWeight: s.DocumentRanksWeight,
DebugScore: s.DebugScore,
UseKeywordScoring: s.UseKeywordScoring,
UseBm25Scoring: s.UseBM25Scoring,
}
}
5 changes: 3 additions & 2 deletions bits.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func (n ngram) String() string {
type runeNgramOff struct {
ngram ngram
// index is the original index inside of the returned array of splitNGrams
index uint32
index int
}

func (a runeNgramOff) Compare(b runeNgramOff) int {
Expand Down Expand Up @@ -149,9 +149,10 @@ func splitNGrams(str []byte) []runeNgramOff {
ng := runesToNGram(runeGram)
result = append(result, runeNgramOff{
ngram: ng,
index: uint32(len(result)),
index: len(result),
})
}

return result
}

Expand Down
11 changes: 10 additions & 1 deletion build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ type Options struct {
changedOrRemovedFiles []string

LanguageMap ctags.LanguageMap

// ShardMerging is true if builder should respect compound shards. This is a
// Sourcegraph specific option.
ShardMerging bool
}

// HashOptions contains only the options in Options that upon modification leads to IndexState of IndexStateMismatch during the next index building.
Expand Down Expand Up @@ -194,6 +198,7 @@ func (o *Options) Flags(fs *flag.FlagSet) {

// Sourcegraph specific
fs.BoolVar(&o.DisableCTags, "disable_ctags", x.DisableCTags, "If set, ctags will not be called.")
fs.BoolVar(&o.ShardMerging, "shard_merging", x.ShardMerging, "If set, builder will respect compound shards.")
}

// Args generates command line arguments for o. It is the "inverse" of Flags.
Expand Down Expand Up @@ -233,6 +238,10 @@ func (o *Options) Args() []string {
args = append(args, "-disable_ctags")
}

if o.ShardMerging {
args = append(args, "-shard_merging")
}

return args
}

Expand Down Expand Up @@ -774,7 +783,7 @@ func (b *Builder) Finish() error {

for p := range toDelete {
// Don't delete compound shards, set tombstones instead.
if zoekt.ShardMergingEnabled() && strings.HasPrefix(filepath.Base(p), "compound-") {
if b.opts.ShardMerging && strings.HasPrefix(filepath.Base(p), "compound-") {
if !strings.HasSuffix(p, ".zoekt") {
continue
}
Expand Down
4 changes: 2 additions & 2 deletions build/builder_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
//go:build !windows
// +build !windows
//go:build !windows && !wasm
// +build !windows,!wasm

package build

Expand Down
24 changes: 12 additions & 12 deletions build/scoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ func TestBM25(t *testing.T) {
query: &query.Substring{Pattern: "example"},
content: exampleJava,
language: "Java",
// keyword-score:1.63 (sum-tf: 6.00, length-ratio: 2.00)
wantScore: 1.63,
// bm25-score: 0.57 <- sum-termFrequencyScore: 10.00, length-ratio: 1.00
wantScore: 0.57,
}, {
// Matches only on content
fileName: "example.java",
Expand All @@ -89,25 +89,25 @@ func TestBM25(t *testing.T) {
}},
content: exampleJava,
language: "Java",
// keyword-score:5.75 (sum-tf: 56.00, length-ratio: 2.00)
wantScore: 5.75,
// bm25-score: 1.75 <- sum-termFrequencyScore: 56.00, length-ratio: 1.00
wantScore: 1.75,
},
{
// Matches only on filename
fileName: "example.java",
query: &query.Substring{Pattern: "java"},
content: exampleJava,
language: "Java",
// keyword-score:1.07 (sum-tf: 2.00, length-ratio: 2.00)
wantScore: 1.07,
// bm25-score: 0.51 <- sum-termFrequencyScore: 5.00, length-ratio: 1.00
wantScore: 0.51,
},
{
// Matches only on filename, and content is missing
fileName: "a/b/c/config.go",
query: &query.Substring{Pattern: "config.go"},
language: "Go",
// keyword-score:1.91 (sum-tf: 2.00, length-ratio: 0.00)
wantScore: 1.91,
// bm25-score: 0.60 <- sum-termFrequencyScore: 5.00, length-ratio: 0.00
wantScore: 0.60,
},
}

Expand Down Expand Up @@ -584,7 +584,7 @@ func skipIfCTagsUnavailable(t *testing.T, parserType ctags.CTagsParserType) {
}
}

func checkScoring(t *testing.T, c scoreCase, keywordScoring bool, parserType ctags.CTagsParserType) {
func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTagsParserType) {
skipIfCTagsUnavailable(t, parserType)

name := c.language
Expand Down Expand Up @@ -625,9 +625,9 @@ func checkScoring(t *testing.T, c scoreCase, keywordScoring bool, parserType cta
defer ss.Close()

srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{
UseKeywordScoring: keywordScoring,
ChunkMatches: true,
DebugScore: true})
UseBM25Scoring: useBM25,
ChunkMatches: true,
DebugScore: true})
if err != nil {
t.Fatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/zoekt-git-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func run() int {
DeltaShardNumberFallbackThreshold: *deltaShardNumberFallbackThreshold,
}

if err := gitindex.IndexGitRepo(gitOpts); err != nil {
if _, err := gitindex.IndexGitRepo(gitOpts); err != nil {
log.Printf("indexGitRepo(%s, delta=%t): %v", dir, gitOpts.BuildOptions.IsDelta, err)
exitStatus = 1
}
Expand Down
8 changes: 8 additions & 0 deletions cmd/zoekt-indexserver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ type ConfigEntry struct {
ExcludeTopics []string
Active bool
NoArchived bool
GerritFetchMetaConfig bool
GerritRepoNameFormat string
}

func randomize(entries []ConfigEntry) []ConfigEntry {
Expand Down Expand Up @@ -259,6 +261,12 @@ func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string
if c.Active {
cmd.Args = append(cmd.Args, "-active")
}
if c.GerritFetchMetaConfig {
cmd.Args = append(cmd.Args, "-fetch-meta-config")
}
if c.GerritRepoNameFormat != "" {
cmd.Args = append(cmd.Args, "-repo-name-format", c.GerritRepoNameFormat)
}
cmd.Args = append(cmd.Args, c.GerritApiURL)
} else {
log.Printf("executeMirror: ignoring config, because it does not contain any valid repository definition: %v", c)
Expand Down
2 changes: 1 addition & 1 deletion cmd/zoekt-indexserver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func periodicFetch(repoDir, indexDir string, opts *Options, pendingRepos chan<-
// fetchGitRepo runs git-fetch, and returns true if there was an
// update.
func fetchGitRepo(dir string) bool {
cmd := exec.Command("git", "--git-dir", dir, "fetch", "origin")
cmd := exec.Command("git", "--git-dir", dir, "fetch", "origin", "--prune")

output, err := cmd.CombinedOutput()
if err != nil {
Expand Down
Loading

0 comments on commit 9d583f6

Please sign in to comment.