-
Notifications
You must be signed in to change notification settings - Fork 63
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PythonMutator: add diagnostics #1531
Changes from 3 commits
ca3bdfd
cf32e36
6442bc7
4bb07a4
8f6a597
268f6fe
1b41c4e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
package python | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"io" | ||
|
||
"github.com/databricks/cli/libs/diag" | ||
"github.com/databricks/cli/libs/dyn" | ||
) | ||
|
||
type pythonDiagnostic struct { | ||
Severity pythonSeverity `json:"severity"` | ||
Summary string `json:"summary"` | ||
Detail string `json:"detail,omitempty"` | ||
Location pythonDiagnosticLocation `json:"location,omitempty"` | ||
Path string `json:"path,omitempty"` | ||
} | ||
|
||
type pythonDiagnosticLocation struct { | ||
File string `json:"file"` | ||
Line int `json:"line"` | ||
Column int `json:"column"` | ||
} | ||
|
||
type pythonSeverity = string | ||
|
||
const ( | ||
pythonError pythonSeverity = "error" | ||
pythonWarning pythonSeverity = "warning" | ||
) | ||
|
||
// parsePythonDiagnostics parses diagnostics from the Python mutator. | ||
// | ||
// diagnostics file is newline-separated JSON objects with pythonDiagnostic structure. | ||
func parsePythonDiagnostics(input io.Reader) (diag.Diagnostics, error) { | ||
diags := diag.Diagnostics{} | ||
decoder := json.NewDecoder(input) | ||
|
||
for decoder.More() { | ||
var parsedLine pythonDiagnostic | ||
|
||
err := decoder.Decode(&parsedLine) | ||
|
||
if err != nil { | ||
pietern marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return nil, fmt.Errorf("failed to parse diags: %s", err) | ||
} | ||
|
||
severity, err := convertPythonSeverity(parsedLine.Severity) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to parse severity: %s", err) | ||
} | ||
|
||
path, err := convertPythonPath(parsedLine.Path) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to parse path: %s", err) | ||
} | ||
|
||
diag := diag.Diagnostic{ | ||
Severity: severity, | ||
Summary: parsedLine.Summary, | ||
Detail: parsedLine.Detail, | ||
Location: convertPythonLocation(parsedLine.Location), | ||
Path: path, | ||
} | ||
|
||
diags = diags.Append(diag) | ||
} | ||
|
||
return diags, nil | ||
} | ||
|
||
func convertPythonPath(path string) (dyn.Path, error) { | ||
if path == "" { | ||
return nil, nil | ||
} | ||
|
||
return dyn.NewPathFromString(path) | ||
} | ||
|
||
func convertPythonSeverity(severity pythonSeverity) (diag.Severity, error) { | ||
switch severity { | ||
case pythonError: | ||
return diag.Error, nil | ||
case pythonWarning: | ||
return diag.Warning, nil | ||
default: | ||
return 0, fmt.Errorf("unexpected value: %s", severity) | ||
} | ||
} | ||
|
||
func convertPythonLocation(location pythonDiagnosticLocation) dyn.Location { | ||
return dyn.Location{ | ||
File: location.File, | ||
Line: location.Line, | ||
Column: location.Column, | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
package python | ||
|
||
import ( | ||
"bytes" | ||
"testing" | ||
|
||
"github.com/databricks/cli/libs/diag" | ||
"github.com/databricks/cli/libs/dyn" | ||
assert "github.com/databricks/cli/libs/dyn/dynassert" | ||
) | ||
|
||
func TestConvertPythonLocation(t *testing.T) { | ||
location := convertPythonLocation(pythonDiagnosticLocation{ | ||
File: "src/examples/file.py", | ||
Line: 1, | ||
Column: 2, | ||
}) | ||
|
||
assert.Equal(t, dyn.Location{ | ||
File: "src/examples/file.py", | ||
Line: 1, | ||
Column: 2, | ||
}, location) | ||
} | ||
|
||
type parsePythonDiagnosticsTest struct { | ||
name string | ||
input string | ||
expected diag.Diagnostics | ||
} | ||
|
||
func TestParsePythonDiagnostics(t *testing.T) { | ||
|
||
testCases := []parsePythonDiagnosticsTest{ | ||
{ | ||
name: "short error with location", | ||
input: `{"severity": "error", "summary": "error summary", "location": {"file": "src/examples/file.py", "line": 1, "column": 2}}`, | ||
expected: diag.Diagnostics{ | ||
{ | ||
Severity: diag.Error, | ||
Summary: "error summary", | ||
Location: dyn.Location{ | ||
File: "src/examples/file.py", | ||
Line: 1, | ||
Column: 2, | ||
}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "short error with path", | ||
input: `{"severity": "error", "summary": "error summary", "path": "resources.jobs.job0.name"}`, | ||
expected: diag.Diagnostics{ | ||
{ | ||
Severity: diag.Error, | ||
Summary: "error summary", | ||
Path: dyn.MustPathFromString("resources.jobs.job0.name"), | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "empty file", | ||
input: "", | ||
expected: diag.Diagnostics{}, | ||
}, | ||
{ | ||
name: "newline file", | ||
input: "\n", | ||
expected: diag.Diagnostics{}, | ||
}, | ||
{ | ||
name: "warning with detail", | ||
input: `{"severity": "warning", "summary": "warning summary", "detail": "warning detail"}`, | ||
expected: diag.Diagnostics{ | ||
{ | ||
Severity: diag.Warning, | ||
Summary: "warning summary", | ||
Detail: "warning detail", | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "multiple errors", | ||
input: `{"severity": "error", "summary": "error summary (1)"}` + "\n" + | ||
`{"severity": "error", "summary": "error summary (2)"}`, | ||
expected: diag.Diagnostics{ | ||
{ | ||
Severity: diag.Error, | ||
Summary: "error summary (1)", | ||
}, | ||
{ | ||
Severity: diag.Error, | ||
Summary: "error summary (2)", | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
for _, tc := range testCases { | ||
t.Run(tc.name, func(t *testing.T) { | ||
diags, err := parsePythonDiagnostics(bytes.NewReader([]byte(tc.input))) | ||
|
||
assert.NoError(t, err) | ||
assert.Equal(t, tc.expected, diags) | ||
}) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ package python | |
import ( | ||
"context" | ||
"encoding/json" | ||
"errors" | ||
"fmt" | ||
"os" | ||
"path/filepath" | ||
|
@@ -87,6 +88,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno | |
return diag.Errorf("\"experimental.pydabs.enabled\" can only be used when \"experimental.pydabs.venv_path\" is set") | ||
} | ||
|
||
// mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics' | ||
var mutateDiags diag.Diagnostics | ||
var mutateDiagsHasError = errors.New("unexpected error") | ||
|
||
err := b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) { | ||
pythonPath := interpreterPath(experimental.PyDABs.VEnvPath) | ||
|
||
|
@@ -103,9 +108,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno | |
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) | ||
} | ||
|
||
rightRoot, err := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot) | ||
if err != nil { | ||
return dyn.InvalidValue, err | ||
rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot) | ||
mutateDiags = diags | ||
if diags.HasError() { | ||
return dyn.InvalidValue, mutateDiagsHasError | ||
} | ||
|
||
visitor, err := createOverrideVisitor(ctx, m.phase) | ||
|
@@ -116,7 +122,15 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno | |
return merge.Override(leftRoot, rightRoot, visitor) | ||
}) | ||
|
||
return diag.FromErr(err) | ||
if err == mutateDiagsHasError { | ||
if !mutateDiags.HasError() { | ||
panic("mutateDiags has no error, but error is expected") | ||
} | ||
|
||
return mutateDiags | ||
} | ||
|
||
return mutateDiags.Extend(diag.FromErr(err)) | ||
} | ||
|
||
func createCacheDir(ctx context.Context) (string, error) { | ||
|
@@ -138,9 +152,10 @@ func createCacheDir(ctx context.Context) (string, error) { | |
return os.MkdirTemp("", "-pydabs") | ||
} | ||
|
||
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) { | ||
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) { | ||
inputPath := filepath.Join(cacheDir, "input.json") | ||
outputPath := filepath.Join(cacheDir, "output.json") | ||
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") | ||
|
||
args := []string{ | ||
pythonPath, | ||
|
@@ -152,42 +167,71 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r | |
inputPath, | ||
"--output", | ||
outputPath, | ||
"--diagnostics", | ||
diagnosticsPath, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a plan to keep the pyDABs version consistent with the CLI version? We don't need one immediately it'll likely be important down the line given there's an implicit dependency on the types defined in the CLI and pyDABs ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't have to be consistent because changes need to be backward and forward-compatible. If we make a breaking change, we raise the minimum required version. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is the minimum required version stored/communicated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't. For now, we can add a minimal required version of CLI as the configuration in the template, and document it. We can also check in VerifyCliVersion, but right now, it's executed before we run the Python mutator if we want to append a different minimum required version there. Another difficulty is that this configuration parameter might already be used. Another way is to pass the current CLI version into Python code and check it there. We can make it a part of the effective bundle configuration. |
||
} | ||
|
||
// we need to marshal dyn.Value instead of bundle.Config to JSON to support | ||
// non-string fields assigned with bundle variables | ||
rootConfigJson, err := json.Marshal(root.AsAny()) | ||
if err != nil { | ||
return dyn.InvalidValue, fmt.Errorf("failed to marshal root config: %w", err) | ||
} | ||
|
||
err = os.WriteFile(inputPath, rootConfigJson, 0600) | ||
if err != nil { | ||
return dyn.InvalidValue, fmt.Errorf("failed to write input file: %w", err) | ||
if err := writeInputFile(inputPath, root); err != nil { | ||
return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) | ||
} | ||
|
||
stderrWriter := newLogWriter(ctx, "stderr: ") | ||
stdoutWriter := newLogWriter(ctx, "stdout: ") | ||
|
||
_, err = process.Background( | ||
_, processErr := process.Background( | ||
ctx, | ||
args, | ||
process.WithDir(rootPath), | ||
process.WithStderrWriter(stderrWriter), | ||
process.WithStdoutWriter(stdoutWriter), | ||
) | ||
|
||
pythonDiagnostics, pythonDiagnosticsErr := loadDiagnosticsFile(diagnosticsPath) | ||
|
||
// if diagnostics file exists, it gives the most descriptive errors | ||
// if there is any error, we treat it as fatal error, and stop processing | ||
if pythonDiagnostics.HasError() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd recommend rolling up There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The idea behind
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error handling flow is not super straightforward. Could you include a few debug log lines to capture error conditions even if we don't act on them? When this goes wrong I'd like to be able to analyse why/how/etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree. I added logging just after each error is created |
||
return dyn.InvalidValue, pythonDiagnostics | ||
} | ||
|
||
// process can fail without reporting errors in diagnostics file or creating it, for instance, | ||
// venv doesn't have PyDABs library installed | ||
if processErr != nil { | ||
return dyn.InvalidValue, diag.Errorf("python mutator process failed: %sw, use --debug to enable logging", processErr) | ||
} | ||
|
||
// or we can fail to read diagnostics file, that should always be created | ||
if pythonDiagnosticsErr != nil { | ||
return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) | ||
} | ||
|
||
output, err := loadOutputFile(rootPath, outputPath) | ||
if err != nil { | ||
return dyn.InvalidValue, diag.Errorf("failed to load Python mutator output: %s", err) | ||
} | ||
|
||
// we pass through pythonDiagnostic because it contains warnings | ||
return output, pythonDiagnostics | ||
} | ||
|
||
func writeInputFile(inputPath string, input dyn.Value) error { | ||
// we need to marshal dyn.Value instead of bundle.Config to JSON to support | ||
// non-string fields assigned with bundle variables | ||
rootConfigJson, err := json.Marshal(input.AsAny()) | ||
if err != nil { | ||
return dyn.InvalidValue, fmt.Errorf("python mutator process failed: %w", err) | ||
return fmt.Errorf("failed to marshal input: %w", err) | ||
} | ||
|
||
return os.WriteFile(inputPath, rootConfigJson, 0600) | ||
} | ||
|
||
func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) { | ||
outputFile, err := os.Open(outputPath) | ||
if err != nil { | ||
return dyn.InvalidValue, fmt.Errorf("failed to open Python mutator output: %w", err) | ||
return dyn.InvalidValue, fmt.Errorf("failed to open output file: %w", err) | ||
} | ||
|
||
defer func() { | ||
_ = outputFile.Close() | ||
}() | ||
defer outputFile.Close() | ||
|
||
// we need absolute path because later parts of pipeline assume all paths are absolute | ||
// and this file will be used as location to resolve relative paths. | ||
|
@@ -204,24 +248,38 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r | |
|
||
generated, err := yamlloader.LoadYAML(virtualPath, outputFile) | ||
if err != nil { | ||
return dyn.InvalidValue, fmt.Errorf("failed to parse Python mutator output: %w", err) | ||
return dyn.InvalidValue, fmt.Errorf("failed to parse output file: %w", err) | ||
} | ||
|
||
normalized, diagnostic := convert.Normalize(config.Root{}, generated) | ||
if diagnostic.Error() != nil { | ||
return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %w", diagnostic.Error()) | ||
return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %w", diagnostic.Error()) | ||
} | ||
|
||
// warnings shouldn't happen because output should be already normalized | ||
// when it happens, it's a bug in the mutator, and should be treated as an error | ||
|
||
for _, d := range diagnostic.Filter(diag.Warning) { | ||
return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %s", d.Summary) | ||
return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %s", d.Summary) | ||
} | ||
|
||
return normalized, nil | ||
} | ||
|
||
// loadDiagnosticsFile loads diagnostics from a file. | ||
// | ||
// It contains a list of warnings and errors that we should print to users. | ||
func loadDiagnosticsFile(path string) (diag.Diagnostics, error) { | ||
file, err := os.Open(path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it guaranteed to exist, even if there are no diagnostics? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think it will be less error-prone like that, so it doesn't happen that integration suddenly breaks and stops to print warnings and errors There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please capture this intent in the comment. The next person looking at this code could add a not-exists check not being aware of this intent. |
||
if err != nil { | ||
return nil, fmt.Errorf("failed to open diagnostics file: %w", err) | ||
} | ||
|
||
defer file.Close() | ||
|
||
return parsePythonDiagnostics(file) | ||
} | ||
|
||
func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) { | ||
switch phase { | ||
case PythonMutatorPhaseLoad: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we serialize the diagnostics on the Python side in JSON instead of using the string representation for
path
andlocation
?That way we'll not need all the deserialization logic here to convert strings for location and path back to a
diag.Dianostic
. We could directly usejson.Unmarshal
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc: @pietern for thoughts on the serialzation format for diagnostics. Separately customers have asked for machine readable warnings, and the format we decide on here should be consistent with that.
I'd recommend the full JSON body because it's the simplest to maintain as well as is easy to parse for consumers of the JSON representation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did research on how other tools report warnings:
They use JSON for source locations.
All k8s tools report the path as a string. I will change it to do the same.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also note that this format is purely internal, and we wouldn't show this to users, occasionally, we might need to read it for debugging.
If we ever want CLI to report warnings in a structured format, it can use any format not necessarily the same as here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 -- I wouldn't want to couple them as they might need to evolve independently.