Skip to content

Commit

Permalink
tool: implement a detector for new fields using proto parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
jingyih committed Dec 10, 2024
1 parent 1527577 commit 5a20db7
Show file tree
Hide file tree
Showing 8 changed files with 682 additions and 6 deletions.
2 changes: 2 additions & 0 deletions dev/tools/controllerbuilder/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"fmt"
"os"

"github.com/GoogleCloudPlatform/k8s-config-connector/dev/tools/controllerbuilder/pkg/commands/detectnewfields"
"github.com/GoogleCloudPlatform/k8s-config-connector/dev/tools/controllerbuilder/pkg/commands/exportcsv"
"github.com/GoogleCloudPlatform/k8s-config-connector/dev/tools/controllerbuilder/pkg/commands/generatecontroller"
"github.com/GoogleCloudPlatform/k8s-config-connector/dev/tools/controllerbuilder/pkg/commands/generatedirectreconciler"
Expand All @@ -44,6 +45,7 @@ func Execute() {
rootCmd.AddCommand(updatetypes.BuildCommand(&generateOptions))
rootCmd.AddCommand(exportcsv.BuildCommand(&generateOptions))
rootCmd.AddCommand(exportcsv.BuildPromptCommand(&generateOptions))
rootCmd.AddCommand(detectnewfields.BuildCommand(&generateOptions))

if err := rootCmd.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
Expand Down
4 changes: 2 additions & 2 deletions dev/tools/controllerbuilder/pkg/codegen/mappergenerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@ func (v *MapperGenerator) GenerateMappers() error {
}
out := v.getOutputFile(k)
out.packageName = lastGoComponent(goPackage)
{

{
pbPackage := pair.ProtoGoPackage
krmPackage := pair.KRMType.GoPackage

Expand Down
8 changes: 4 additions & 4 deletions dev/tools/controllerbuilder/pkg/codegen/typegenerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func (g *TypeGenerator) visitMessage(messageDescriptor protoreflect.MessageDescr

g.visitedMessages = append(g.visitedMessages, messageDescriptor)

msgs, err := findDependenciesForMessage(messageDescriptor)
msgs, err := FindDependenciesForMessage(messageDescriptor, nil) // TODO: explicitly set ignored fields when generating Go types
if err != nil {
return err
}
Expand Down Expand Up @@ -367,12 +367,12 @@ func isAcronym(s string) bool {
}
}

// findDependenciesForMessage recursively explores the dependent proto messages of the given message.
func findDependenciesForMessage(message protoreflect.MessageDescriptor) ([]protoreflect.MessageDescriptor, error) {
// FindDependenciesForMessage recursively explores the dependent proto messages of the given message.
func FindDependenciesForMessage(message protoreflect.MessageDescriptor, ignoredFields sets.String) ([]protoreflect.MessageDescriptor, error) {
msgs := make(map[string]protoreflect.MessageDescriptor)
for i := 0; i < message.Fields().Len(); i++ {
field := message.Fields().Get(i)
FindDependenciesForField(field, msgs, nil) // TODO: explicitly set ignored fields when generating Go types
FindDependenciesForField(field, msgs, ignoredFields)
}

RemoveNotMappedToGoStruct(msgs)
Expand Down
50 changes: 50 additions & 0 deletions dev/tools/controllerbuilder/pkg/commands/detectnewfields/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Field Detection Tool

This tool identifies changes in GCP API proto definitions by comparing the pinned version specified in the `git.version` file with the latest version at HEAD.

It can identify:
- New fields **added** to messages
- Fields **removed** from messages
- Fields that **changed type**

## Usage

```bash
# Basic usage - checks all proto messages used in "generate.sh"
$ go run . detect-new-fields

# Check specific messages
$ go run . detect-new-fields \
--target-messages="google.cloud.bigquery.datatransfer.v1.TransferConfig"

# Ignore specific fields using a config file
$ go run . detect-new-fields \
--ignored-fields-file=config/ignored_fields.yaml
```

An example ignored_fields.yaml
```yaml
google.cloud.bigquery.connection.v1:
Connection:
- salesforceDataCloud
google.api.apikeys.v2:
Key:
- createTime
- updateTime
```
## Example Output
```
Changes detected in message: google.cloud.bigquery.datatransfer.v1.TransferConfig
New field: schedule_options_v2
New field: error
Changes detected in message: google.cloud.discoveryengine.v1.DataStore
New field: billing_estimation
New field: workspace_config
Changes detected in message: google.cloud.discoveryengine.v1.Engine
New field: disable_analytics
Changes detected in message: google.spanner.admin.instance.v1.Instance
New field: default_backup_schedule_type
New field: replica_compute_capacity
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package detectnewfields

import (
"context"
"fmt"
"os"
"sort"
"strings"

"github.com/GoogleCloudPlatform/k8s-config-connector/dev/tools/controllerbuilder/pkg/newfieldsdetector"
"github.com/GoogleCloudPlatform/k8s-config-connector/dev/tools/controllerbuilder/pkg/options"
"github.com/spf13/cobra"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
)

type DetectNewFieldsOptions struct {
*options.GenerateOptions

targetMessages string // comma-separated list of proto message names
ignoredFieldsFile string // path to ignored fields YAML file
outputFormat string // optional: json, yaml, or text
}

func (o *DetectNewFieldsOptions) InitDefaults() error {
o.outputFormat = "text"

// Set default ignored fields file path
_, err := options.RepoRoot()
if err != nil {
return err
}
// TODO: create this file
// o.ignoredFieldsFile = filepath.Join(repoRoot, "dev", "tools", "controllerbuilder", "config", "ignored_fields.yaml")

return nil
}

func (o *DetectNewFieldsOptions) BindFlags(cmd *cobra.Command) {
cmd.Flags().StringVar(&o.targetMessages, "target-messages", o.targetMessages, "Comma-separated list of target fully qualified proto message names to check")
cmd.Flags().StringVar(&o.ignoredFieldsFile, "ignored-fields-file", o.ignoredFieldsFile, "Path to YAML file containing ignored fields configuration")
cmd.Flags().StringVar(&o.outputFormat, "output-format", o.outputFormat, "Output format: text, json, or yaml")
}

func BuildCommand(baseOptions *options.GenerateOptions) *cobra.Command {
opt := &DetectNewFieldsOptions{
GenerateOptions: baseOptions,
}

if err := opt.InitDefaults(); err != nil {
fmt.Fprintf(os.Stderr, "Error initializing defaults: %v\n", err)
os.Exit(1)
}

cmd := &cobra.Command{
Use: "detect-new-fields",
Short: "Detect new fields between pinned and HEAD versions of proto definitions",
Long: `Detect new fields by comparing the pinned version of proto definitions with the current HEAD version.
The pinned version is determined by the version specified in mockgcp/git.versions.`,
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
if err := runNewFieldDetector(ctx, opt); err != nil {
return err
}
return nil
},
}

opt.BindFlags(cmd)

return cmd
}

func runNewFieldDetector(ctx context.Context, opt *DetectNewFieldsOptions) error {
ignoredFields, err := newfieldsdetector.LoadIgnoredFields(opt.ignoredFieldsFile)
if err != nil {
return fmt.Errorf("loading ignored fields: %w", err)
}

targetMessages := sets.NewString()
if opt.targetMessages != "" {
targetMessages = sets.NewString(strings.Split(opt.targetMessages, ",")...)
}
newFieldDetector, err := newfieldsdetector.NewFieldDetector(&newfieldsdetector.DetectorOptions{
TargetMessages: targetMessages,
IgnoredFields: ignoredFields,
})
if err != nil {
return fmt.Errorf("creating new field detector: %w", err)
}

diffs, err := newFieldDetector.DetectNewFields()
if err != nil {
return fmt.Errorf("detecting new fields: %w", err)
}

return outputResults(diffs, opt.outputFormat)
}

func outputResults(diffs []newfieldsdetector.MessageDiff, format string) error {
if len(diffs) == 0 {
klog.Info("No changes detected in the fields")
return nil
}

sort.Slice(diffs, func(i, j int) bool {
return diffs[i].MessageName < diffs[j].MessageName
})

switch format {
case "text":
for _, diff := range diffs {
fmt.Printf("Changes detected in message: %s\n", diff.MessageName)
for _, field := range diff.NewFields {
fmt.Printf(" New field: %v\n", field)
}
for _, field := range diff.RemovedFields {
fmt.Printf(" Removed field: %v\n", field)
}
for field, change := range diff.ChangedFields {
fmt.Printf(" Changed field %s: %v -> %v (repeated: %v)\n",
field, change.OldType, change.NewType, change.IsRepeated)
}
}
case "json":
// TODO
return fmt.Errorf("JSON output not yet implemented")
case "yaml":
// TODO
return fmt.Errorf("YAML output not yet implemented")
default:
return fmt.Errorf("unsupported output format: %s", format)
}

return nil
}
79 changes: 79 additions & 0 deletions dev/tools/controllerbuilder/pkg/newfieldsdetector/ignoredfields.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package newfieldsdetector

import (
"fmt"
"os"

"gopkg.in/yaml.v2"
"k8s.io/apimachinery/pkg/util/sets"
)

// IgnoredFieldsConfig represents the structure of the ignored fields YAML file.
//
// Example YAML:
/*
google.cloud.bigquery.connection.v1:
Connection:
- salesforceDataCloud
google.api.apikeys.v2:
Key:
- createTime
- updateTime
*/
type IgnoredFieldsConfig struct {
// key is proto package name (e.g., "google.cloud.compute.v1").
ProtoPackages map[string]MessageFieldIgnores `yaml:",inline"`
}

type MessageFieldIgnores struct {
// key is proto message name (e.g. "Instance")
// value is list of field names to be ignored in the message.
Messages map[string][]string `yaml:",inline"`
}

// LoadIgnoredFields loads and parses the ignored fields YAML file
func LoadIgnoredFields(configPath string) (sets.String, error) {
if configPath == "" {
return sets.NewString(), nil
}

data, err := os.ReadFile(configPath)
if err != nil {
return nil, fmt.Errorf("reading ignored fields config: %w", err)
}
var config IgnoredFieldsConfig
if err := yaml.Unmarshal(data, &config); err != nil {
return nil, fmt.Errorf("parsing ignored fields config: %w", err)
}
ignoredFields := sets.NewString()
// use fully qualified field names in ignoredFields map. e.g. "google.cloud.compute.v1.Instance.id"
for pkgName, pkgIgnores := range config.ProtoPackages {
for msgName, fields := range pkgIgnores.Messages {
for _, fieldName := range fields {
fullyQualifiedName := fmt.Sprintf("%s.%s.%s", pkgName, msgName, fieldName)
ignoredFields.Insert(fullyQualifiedName)
}
}
}
return ignoredFields, nil
}

// IsFieldIgnored checks if a field should be ignored based on its fully qualified name
func IsFieldIgnored(ignoredFields sets.String, fullyQualifiedMessageName, fieldName string) bool {
fullyQualifiedFieldName := fmt.Sprintf("%s.%s", fullyQualifiedMessageName, fieldName)
return ignoredFields.Has(fullyQualifiedFieldName)
}
Loading

0 comments on commit 5a20db7

Please sign in to comment.