Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Magik and PKl languages that are not handled by Linguist #790

Merged
merged 7 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions indexbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"time"
"unicode/utf8"

"github.com/go-enry/go-enry/v2"
"github.com/sourcegraph/zoekt/internal/languages"
)

var _ = log.Println
Expand Down Expand Up @@ -397,7 +397,7 @@ func (b *IndexBuilder) addSymbols(symbols []*Symbol) {

func DetermineLanguageIfUnknown(doc *Document) {
if doc.Language == "" {
doc.Language = enry.GetLanguage(doc.Name, doc.Content)
doc.Language = languages.GetLanguage(doc.Name, doc.Content)
}
}

Expand Down
64 changes: 64 additions & 0 deletions internal/languages/language.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// This file wraps the logic of go-enry (https://github.com/go-enry/go-enry) to support additional languages.
// go-enry is based off of a package called Linguist (https://github.com/github/linguist)
// and sometimes programming languages may not be supported by Linguist
mmanela marked this conversation as resolved.
Show resolved Hide resolved
// or may take a while to get merged in and make it into go-enry. This wrapper
// gives us flexibility to support languages in those cases. We list additional languages
// in this file and remove them once they make it into Linguist and go-enry
package languages
mmanela marked this conversation as resolved.
Show resolved Hide resolved

import (
"path/filepath"
"strings"

"github.com/go-enry/go-enry/v2"
)

var unsupportedByLinguistAliasMap = map[string]string{
// Pkl Configuration Language (https://pkl-lang.org/)
// Add to linguist on 6/7/24
// can remove once go-enry package updates
// to that linguist version
"pkl": "Pkl",
// Magik Language
"magik": "Magik",
}

var unsupportedByLinguistExtensionToNameMap = map[string]string{
// Pkl Configuration Language (https://pkl-lang.org/)
".pkl": "Pkl",
// Magik Language
".magik": "Magik",
}

// getLanguagesByAlias is a replacement for enry.GetLanguagesByAlias
// It supports languages that are missing in linguist
func GetLanguageByAlias(alias string) (language string, ok bool) {
language, ok = enry.GetLanguageByAlias(alias)
if !ok {
normalizedAlias := strings.ToLower(alias)
language, ok = unsupportedByLinguistAliasMap[normalizedAlias]
}

return
}

// GetLanguage is a replacement for enry.GetLanguage
// to find out the most probable language to return but includes support
// for languages missing from linguist
func GetLanguage(filename string, content []byte) (language string) {
language = enry.GetLanguage(filename, content)

// If go-enry failed to find language, fall back on our
// internal check for languages missing in linguist
if language == "" {
ext := filepath.Ext(filename)
normalizedExt := strings.ToLower(ext)
if ext == "" {
return
}
if lang, ok := unsupportedByLinguistExtensionToNameMap[normalizedExt]; ok {
language = lang
}
}
return
}
95 changes: 95 additions & 0 deletions internal/languages/language_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package languages

import "testing"

func TestGetLanguageByAlias(t *testing.T) {
tests := []struct {
name string
alias string
want string
wantOk bool
}{
{
name: "empty alias",
alias: "",
want: "",
wantOk: false,
},
{
name: "unknown alias",
alias: "unknown",
want: "",
wantOk: false,
},
{
name: "supported alias",
alias: "go",
want: "Go",
wantOk: true,
},
{
name: "unsupported by linguist alias",
alias: "magik",
want: "Magik",
wantOk: true,
},
{
name: "unsupported by linguist alias normalized",
alias: "mAgIk",
want: "Magik",
wantOk: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, ok := GetLanguageByAlias(tt.alias)
if got != tt.want || ok != tt.wantOk {
t.Errorf("GetLanguageByAlias(%q) = %q, %t, want %q, %t", tt.alias, got, ok, tt.want, tt.wantOk)
}
})
}
}

func TestGetLanguage(t *testing.T) {
tests := []struct {
name string
filename string
content []byte
want string
}{
{
name: "empty filename",
filename: "",
content: []byte(""),
want: "",
},
{
name: "unknown extension",
filename: "file.unknown",
content: []byte(""),
want: "",
},
{
name: "supported extension",
filename: "file.go",
content: []byte("package main"),
want: "Go",
},
{
name: "unsupported by linguist extension",
filename: "file.magik",
content: []byte(""),
want: "Magik",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := GetLanguage(tt.filename, tt.content)
if got != tt.want {
t.Errorf("GetLanguage(%q, %q) = %q, want %q", tt.filename, tt.content, got, tt.want)
}
})
}
}
4 changes: 2 additions & 2 deletions query/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import (
"log"
"regexp/syntax"

"github.com/go-enry/go-enry/v2"
"github.com/grafana/regexp"
"github.com/sourcegraph/zoekt/internal/languages"
)

var _ = log.Printf
Expand Down Expand Up @@ -172,7 +172,7 @@ func parseExpr(in []byte) (Q, int, error) {
}
expr = q
case tokLang:
canonical, ok := enry.GetLanguageByAlias(text)
canonical, ok := languages.GetLanguageByAlias(text)
if !ok {
expr = &Const{false}
} else {
Expand Down
Loading