diff --git a/cmd/registry-bigquery/common/common.go b/cmd/registry-bigquery/common/common.go new file mode 100644 index 00000000..08b4a61a --- /dev/null +++ b/cmd/registry-bigquery/common/common.go @@ -0,0 +1,49 @@ +package common + +import ( + "context" + "net/http" + "time" + + "cloud.google.com/go/bigquery" + "google.golang.org/api/googleapi" +) + +// Used by subcommands, now provides a single common timestap for a command invocation. +var Now = time.Now() + +// Get a BigQuery dataset by name and create it if it doesn't exist. +func GetOrCreateDataset(ctx context.Context, client *bigquery.Client, name string) (*bigquery.Dataset, error) { + dataset := client.Dataset(name) + if err := dataset.Create(ctx, nil); err != nil { + switch v := err.(type) { + case *googleapi.Error: + if v.Code != http.StatusConflict { // already exists + return nil, err + } + default: + return nil, err + } + } + return dataset, nil +} + +// Get a BigQuery table by name and create it if it doesn't exist. +func GetOrCreateTable(ctx context.Context, dataset *bigquery.Dataset, name string, prototype interface{}) (*bigquery.Table, error) { + table := dataset.Table(name) + schema, err := bigquery.InferSchema(prototype) + if err != nil { + return nil, err + } + if err := table.Create(ctx, &bigquery.TableMetadata{Schema: schema}); err != nil { + switch v := err.(type) { + case *googleapi.Error: + if v.Code != http.StatusConflict { // already exists + return nil, err + } + default: + return nil, err + } + } + return table, nil +} diff --git a/cmd/registry-bigquery/index/index.go b/cmd/registry-bigquery/index/index.go index 29e94445..2b7df993 100644 --- a/cmd/registry-bigquery/index/index.go +++ b/cmd/registry-bigquery/index/index.go @@ -15,13 +15,7 @@ package index import ( - "context" - "net/http" - "time" - - "cloud.google.com/go/bigquery" "github.com/spf13/cobra" - "google.golang.org/api/googleapi" ) func Command() *cobra.Command { @@ -31,46 +25,8 @@ func Command() *cobra.Command { } cmd.AddCommand(infoCommand()) + cmd.AddCommand(linksCommand()) cmd.AddCommand(operationsCommand()) cmd.AddCommand(serversCommand()) return cmd } - -// Used by subcommands, now provides a single common timestap for a command invocation. -var now = time.Now() - -// Get a BigQuery dataset by name and create it if it doesn't exist. -func getOrCreateDataset(ctx context.Context, client *bigquery.Client, name string) (*bigquery.Dataset, error) { - dataset := client.Dataset(name) - if err := dataset.Create(ctx, nil); err != nil { - switch v := err.(type) { - case *googleapi.Error: - if v.Code != http.StatusConflict { // already exists - return nil, err - } - default: - return nil, err - } - } - return dataset, nil -} - -// Get a BigQuery table by name and create it if it doesn't exist. -func getOrCreateTable(ctx context.Context, dataset *bigquery.Dataset, name string, prototype interface{}) (*bigquery.Table, error) { - table := dataset.Table(name) - schema, err := bigquery.InferSchema(prototype) - if err != nil { - return nil, err - } - if err := table.Create(ctx, &bigquery.TableMetadata{Schema: schema}); err != nil { - switch v := err.(type) { - case *googleapi.Error: - if v.Code != http.StatusConflict { // already exists - return nil, err - } - default: - return nil, err - } - } - return table, nil -} diff --git a/cmd/registry-bigquery/index/info.go b/cmd/registry-bigquery/index/info.go index 5852efe8..17d07548 100644 --- a/cmd/registry-bigquery/index/info.go +++ b/cmd/registry-bigquery/index/info.go @@ -21,6 +21,7 @@ import ( "time" "cloud.google.com/go/bigquery" + "github.com/apigee/registry-experimental/cmd/registry-bigquery/common" "github.com/apigee/registry-experimental/pkg/yamlquery" "github.com/apigee/registry/pkg/connection" "github.com/apigee/registry/pkg/mime" @@ -62,11 +63,11 @@ func infoCommand() *cobra.Command { if err != nil { return err } - ds, err := getOrCreateDataset(ctx, client, dataset) + ds, err := common.GetOrCreateDataset(ctx, client, dataset) if err != nil { return err } - table, err := getOrCreateTable(ctx, ds, "info", info{}) + table, err := common.GetOrCreateTable(ctx, ds, "info", info{}) if err != nil { return err } @@ -161,7 +162,7 @@ func (v *infoVisitor) getOpenAPIInfo(specName names.Spec, b []byte) error { Api: specName.ApiID, Version: specName.VersionID, Spec: specName.SpecID, - Timestamp: now, + Timestamp: common.Now, }) } return nil diff --git a/cmd/registry-bigquery/index/links.go b/cmd/registry-bigquery/index/links.go new file mode 100644 index 00000000..3d6a064f --- /dev/null +++ b/cmd/registry-bigquery/index/links.go @@ -0,0 +1,155 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "context" + "fmt" + "log" + "time" + + "cloud.google.com/go/bigquery" + "github.com/apigee/registry-experimental/cmd/registry-bigquery/common" + "github.com/apigee/registry/cmd/registry/patch" + "github.com/apigee/registry/pkg/application/apihub" + "github.com/apigee/registry/pkg/connection" + "github.com/apigee/registry/pkg/mime" + "github.com/apigee/registry/pkg/names" + "github.com/apigee/registry/pkg/visitor" + "github.com/apigee/registry/rpc" + "github.com/spf13/cobra" +) + +func linksCommand() *cobra.Command { + var filter string + var project string + var dataset string + var batchSize int + cmd := &cobra.Command{ + Use: "links PATTERN", + Short: "Build a BigQuery index of links between resources", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + c, err := connection.ActiveConfig() + if err != nil { + return err + } + pattern := c.FQName(args[0]) + adminClient, err := connection.NewAdminClientWithSettings(ctx, c) + if err != nil { + return err + } + registryClient, err := connection.NewRegistryClientWithSettings(ctx, c) + if err != nil { + return err + } + if project == "" { + project = c.Project + } + client, err := bigquery.NewClient(ctx, project) + if err != nil { + return err + } + ds, err := common.GetOrCreateDataset(ctx, client, dataset) + if err != nil { + return err + } + table, err := common.GetOrCreateTable(ctx, ds, "links", link{}) + if err != nil { + return err + } + v := &linksVisitor{ + registryClient: registryClient, + } + err = visitor.Visit(ctx, v, visitor.VisitorOptions{ + RegistryClient: registryClient, + AdminClient: adminClient, + Pattern: pattern, + Filter: filter, + ImplicitProject: &rpc.Project{Name: "projects/implicit"}, + }) + if err != nil { + return err + } + u := table.Inserter() + log.Printf("uploading %d links", len(v.links)) + for start := 0; start < len(v.links); start += batchSize { + log.Printf("%d", start) + end := min(start+batchSize, len(v.links)) + if err := u.Put(ctx, v.links[start:end]); err != nil { + return err + } + } + return nil + }, + } + cmd.Flags().StringVar(&filter, "filter", "", "Filter selected resources") + cmd.Flags().StringVar(&project, "project", "", "Project to use for BigQuery upload (defaults to registry project)") + cmd.Flags().StringVar(&dataset, "dataset", "registry", "BigQuery dataset name") + cmd.Flags().IntVar(&batchSize, "batch-size", 10000, "Batch size to use when uploading records to BigQuery") + return cmd +} + +type link struct { + Source string + Target string + Kind string + Timestamp time.Time +} + +type linksVisitor struct { + visitor.Unsupported + registryClient connection.RegistryClient + links []*link +} + +func (v *linksVisitor) ArtifactHandler() visitor.ArtifactHandler { + return func(ctx context.Context, message *rpc.Artifact) error { + artifactName, err := names.ParseArtifact(message.Name) + if err != nil { + return err + } + kind := mime.KindForMimeType(message.MimeType) + if kind != "ReferenceList" { + return nil // skip it + } + m := &apihub.ReferenceList{} + err = visitor.FetchArtifactContents(ctx, v.registryClient, message) + if err != nil { + return err + } + if err := patch.UnmarshalContents(message.GetContents(), message.GetMimeType(), m); err != nil { + return err + } + for _, l := range m.References { + if l.Resource != "" { + n, err := names.ParseApi(l.Resource) + if err != nil { + continue + } + fmt.Printf("%s -->%s (%s)\n", artifactName.ApiID(), n.ApiID, l.Category) + v.links = append(v.links, + &link{ + Source: artifactName.ApiID(), + Target: n.ApiID, + Kind: l.Category, + Timestamp: common.Now, + }) + } + } + return nil + } +} diff --git a/cmd/registry-bigquery/index/operations.go b/cmd/registry-bigquery/index/operations.go index 3e4ad66d..e28232ea 100644 --- a/cmd/registry-bigquery/index/operations.go +++ b/cmd/registry-bigquery/index/operations.go @@ -22,6 +22,7 @@ import ( "time" "cloud.google.com/go/bigquery" + "github.com/apigee/registry-experimental/cmd/registry-bigquery/common" "github.com/apigee/registry-experimental/pkg/yamlquery" "github.com/apigee/registry/pkg/connection" "github.com/apigee/registry/pkg/mime" @@ -63,11 +64,11 @@ func operationsCommand() *cobra.Command { if err != nil { return err } - ds, err := getOrCreateDataset(ctx, client, dataset) + ds, err := common.GetOrCreateDataset(ctx, client, dataset) if err != nil { return err } - table, err := getOrCreateTable(ctx, ds, "operations", operation{}) + table, err := common.GetOrCreateTable(ctx, ds, "operations", operation{}) if err != nil { return err } @@ -175,7 +176,7 @@ func (v *operationsVisitor) getOpenAPIOperations(specName names.Spec, b []byte) Api: specName.ApiID, Version: specName.VersionID, Spec: specName.SpecID, - Timestamp: now, + Timestamp: common.Now, }) } diff --git a/cmd/registry-bigquery/index/servers.go b/cmd/registry-bigquery/index/servers.go index f48223a0..cbffbb41 100644 --- a/cmd/registry-bigquery/index/servers.go +++ b/cmd/registry-bigquery/index/servers.go @@ -21,6 +21,7 @@ import ( "time" "cloud.google.com/go/bigquery" + "github.com/apigee/registry-experimental/cmd/registry-bigquery/common" "github.com/apigee/registry-experimental/pkg/yamlquery" "github.com/apigee/registry/pkg/connection" "github.com/apigee/registry/pkg/mime" @@ -62,11 +63,11 @@ func serversCommand() *cobra.Command { if err != nil { return err } - ds, err := getOrCreateDataset(ctx, client, dataset) + ds, err := common.GetOrCreateDataset(ctx, client, dataset) if err != nil { return err } - table, err := getOrCreateTable(ctx, ds, "servers", server{}) + table, err := common.GetOrCreateTable(ctx, ds, "servers", server{}) if err != nil { return err } @@ -162,7 +163,7 @@ func (v *serversVisitor) getOpenAPIServers(specName names.Spec, b []byte) error Api: specName.ApiID, Version: specName.VersionID, Spec: specName.SpecID, - Timestamp: now, + Timestamp: common.Now, }) } } @@ -180,7 +181,7 @@ func (v *serversVisitor) getOpenAPIServers(specName names.Spec, b []byte) error Api: specName.ApiID, Version: specName.VersionID, Spec: specName.SpecID, - Timestamp: now, + Timestamp: common.Now, }) } return nil diff --git a/cmd/registry-bigquery/main.go b/cmd/registry-bigquery/main.go index d1a64ea8..1f37ccbe 100644 --- a/cmd/registry-bigquery/main.go +++ b/cmd/registry-bigquery/main.go @@ -20,6 +20,7 @@ import ( "os" "github.com/apigee/registry-experimental/cmd/registry-bigquery/index" + "github.com/apigee/registry-experimental/cmd/registry-bigquery/match" "github.com/apigee/registry/pkg/log" "github.com/google/uuid" "github.com/spf13/cobra" @@ -33,6 +34,7 @@ func main() { cmd := &cobra.Command{} cmd.AddCommand(index.Command()) + cmd.AddCommand(match.Command()) if err := cmd.ExecuteContext(ctx); err != nil { os.Exit(1) } diff --git a/cmd/registry-bigquery/match/match.go b/cmd/registry-bigquery/match/match.go new file mode 100644 index 00000000..8b7285fc --- /dev/null +++ b/cmd/registry-bigquery/match/match.go @@ -0,0 +1,216 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package match + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "cloud.google.com/go/bigquery" + "github.com/apigee/registry-experimental/pkg/yamlquery" + "github.com/apigee/registry/pkg/connection" + "github.com/apigee/registry/pkg/mime" + "github.com/apigee/registry/pkg/names" + "github.com/apigee/registry/pkg/visitor" + "github.com/apigee/registry/rpc" + "github.com/spf13/cobra" + "google.golang.org/api/iterator" + "gopkg.in/yaml.v3" +) + +func Command() *cobra.Command { + var filter string + var project string + var dataset string + var batchSize int + cmd := &cobra.Command{ + Use: "match PATTERN", + Short: "Match API specs with a BigQuery index of API information", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + c, err := connection.ActiveConfig() + if err != nil { + return err + } + pattern := c.FQName(args[0]) + adminClient, err := connection.NewAdminClientWithSettings(ctx, c) + if err != nil { + return err + } + registryClient, err := connection.NewRegistryClientWithSettings(ctx, c) + if err != nil { + return err + } + if project == "" { + project = c.Project + } + client, err := bigquery.NewClient(ctx, project) + if err != nil { + return err + } + v := &matchVisitor{ + registryClient: registryClient, + bigQueryClient: client, + } + err = visitor.Visit(ctx, v, visitor.VisitorOptions{ + RegistryClient: registryClient, + AdminClient: adminClient, + Pattern: pattern, + Filter: filter, + ImplicitProject: &rpc.Project{Name: "projects/implicit"}, + }) + if err != nil { + return err + } + + return nil + }, + } + cmd.Flags().StringVar(&filter, "filter", "", "Filter selected resources") + cmd.Flags().StringVar(&project, "project", "", "Project to use for BigQuery upload (defaults to registry project)") + cmd.Flags().StringVar(&dataset, "dataset", "registry", "BigQuery dataset name") + cmd.Flags().IntVar(&batchSize, "batch-size", 10000, "Batch size to use when uploading records to BigQuery") + return cmd +} + +type matchVisitor struct { + visitor.Unsupported + registryClient connection.RegistryClient + bigQueryClient *bigquery.Client +} + +func (v *matchVisitor) SpecHandler() visitor.SpecHandler { + return func(ctx context.Context, message *rpc.ApiSpec) error { + fmt.Printf("%s\n", message.Name) + specName, err := names.ParseSpec(message.Name) + if err != nil { + return err + } + return visitor.GetSpec(ctx, v.registryClient, specName, true, + func(ctx context.Context, spec *rpc.ApiSpec) error { + if mime.IsOpenAPIv2(spec.MimeType) || mime.IsOpenAPIv3(spec.MimeType) { + err := v.matchOpenAPI(ctx, specName, spec.Contents) + if err != nil { + return err + } + } + return nil + }) + } +} + +func (v *matchVisitor) matchOpenAPI(ctx context.Context, specName names.Spec, b []byte) error { + var doc yaml.Node + err := yaml.Unmarshal(b, &doc) + if err != nil { + return err + } + operations := make([]*operation, 0) + paths := yamlquery.QueryNode(&doc, "paths") + if paths != nil { + for i := 0; i < len(paths.Content); i += 2 { + path := paths.Content[i].Value + fields := paths.Content[i+1] + for j := 0; j < len(fields.Content); j += 2 { + fieldName := fields.Content[j].Value + // Skip any fields (summary, description, etc) that aren't methods. + if fieldName != "get" && + fieldName != "put" && + fieldName != "post" && + fieldName != "delete" && + fieldName != "options" && + fieldName != "patch" { + continue + } + method := strings.ToUpper(fieldName) + operations = append(operations, + &operation{ + Method: method, + Path: path, + Api: specName.ApiID, + Version: specName.VersionID, + Spec: specName.SpecID, + }) + } + } + } + + counts := make(map[string]int) + total := len(operations) + fmt.Printf("%d total operations\n", total) + for i, op := range operations { + fmt.Printf("%d: %s %s\n", i, op.Method, op.Path) + pattern := strings.ReplaceAll(op.Path, "*", "%") + query := fmt.Sprintf( + `SELECT * FROM registry.operations WHERE path like "%s" and method = "%s"`, + pattern, + op.Method) + q := v.bigQueryClient.Query(query) + it, err := q.Read(ctx) + if err != nil { + return err + } + for { + var match operation + err = it.Next(&match) + if err == iterator.Done { + break + } + if err != nil { + return err + } + name := fmt.Sprintf("apis/%s/versions/%s/specs/%s", match.Api, match.Version, match.Spec) + counts[name]++ + } + } + + apis := make([]string, 0) + for k := range counts { + apis = append(apis, k) + } + sort.Slice(apis, func(i int, j int) bool { + api_i := apis[i] + api_j := apis[j] + count_i := counts[api_i] + count_j := counts[api_j] + if count_i > count_j { + return true + } else if count_i < count_j { + return false + } + // apis with equal counts are alphabetized + return api_i < api_j + }) + fmt.Println("") + // print the array backwards so the best match is last + for i := len(apis) - 1; i >= 0; i-- { + api := apis[i] + fmt.Printf("%0.5f\t%s\n", 1.0*float32(counts[api])/float32(total), api) + } + return nil +} + +type operation struct { + Path string + Method string + Api string + Version string + Spec string + Timestamp time.Time +}