databricks · pietern · Jul 2, 2024 · Jun 26, 2024 · Jun 27, 2024 · Jul 1, 2024
@@ -0,0 +1,98 @@
+package python
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+
+	"github.com/databricks/cli/libs/diag"
+	"github.com/databricks/cli/libs/dyn"
+)
+
+type pythonDiagnostic struct {
+	Severity pythonSeverity           `json:"severity"`
+	Summary  string                   `json:"summary"`
+	Detail   string                   `json:"detail,omitempty"`
+	Location pythonDiagnosticLocation `json:"location,omitempty"`
+	Path     string                   `json:"path,omitempty"`
+}
+
+type pythonDiagnosticLocation struct {
+	File   string `json:"file"`
+	Line   int    `json:"line"`
+	Column int    `json:"column"`
+}
+
+type pythonSeverity = string
+
+const (
+	pythonError   pythonSeverity = "error"
+	pythonWarning pythonSeverity = "warning"
+)
+
+// parsePythonDiagnostics parses diagnostics from the Python mutator.
+//
+// diagnostics file is newline-separated JSON objects with pythonDiagnostic structure.
+func parsePythonDiagnostics(input io.Reader) (diag.Diagnostics, error) {
+	diags := diag.Diagnostics{}
+	decoder := json.NewDecoder(input)
+
+	for decoder.More() {
+		var parsedLine pythonDiagnostic
+
+		err := decoder.Decode(&parsedLine)
+
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse diags: %s", err)
+		}
+
+		severity, err := convertPythonSeverity(parsedLine.Severity)
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse severity: %s", err)
+		}
+
+		path, err := convertPythonPath(parsedLine.Path)
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse path: %s", err)
+		}
+
+		diag := diag.Diagnostic{
+			Severity: severity,
+			Summary:  parsedLine.Summary,
+			Detail:   parsedLine.Detail,
+			Location: convertPythonLocation(parsedLine.Location),
+			Path:     path,
+		}
+
+		diags = diags.Append(diag)
+	}
+
+	return diags, nil
+}
+
+func convertPythonPath(path string) (dyn.Path, error) {
+	if path == "" {
+		return nil, nil
+	}
+
+	return dyn.NewPathFromString(path)
+}
+
+func convertPythonSeverity(severity pythonSeverity) (diag.Severity, error) {
+	switch severity {
+	case pythonError:
+		return diag.Error, nil
+	case pythonWarning:
+		return diag.Warning, nil
+	default:
+		return 0, fmt.Errorf("unexpected value: %s", severity)
+	}
+}
+
+func convertPythonLocation(location pythonDiagnosticLocation) dyn.Location {
+	return dyn.Location{
+		File:   location.File,
+		Line:   location.Line,
+		Column: location.Column,
+	}
+}
@@ -0,0 +1,107 @@
+package python
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/databricks/cli/libs/diag"
+	"github.com/databricks/cli/libs/dyn"
+	assert "github.com/databricks/cli/libs/dyn/dynassert"
+)
+
+func TestConvertPythonLocation(t *testing.T) {
+	location := convertPythonLocation(pythonDiagnosticLocation{
+		File:   "src/examples/file.py",
+		Line:   1,
+		Column: 2,
+	})
+
+	assert.Equal(t, dyn.Location{
+		File:   "src/examples/file.py",
+		Line:   1,
+		Column: 2,
+	}, location)
+}
+
+type parsePythonDiagnosticsTest struct {
+	name     string
+	input    string
+	expected diag.Diagnostics
+}
+
+func TestParsePythonDiagnostics(t *testing.T) {
+
+	testCases := []parsePythonDiagnosticsTest{
+		{
+			name:  "short error with location",
+			input: `{"severity": "error", "summary": "error summary", "location": {"file": "src/examples/file.py", "line": 1, "column": 2}}`,
+			expected: diag.Diagnostics{
+				{
+					Severity: diag.Error,
+					Summary:  "error summary",
+					Location: dyn.Location{
+						File:   "src/examples/file.py",
+						Line:   1,
+						Column: 2,
+					},
+				},
+			},
+		},
+		{
+			name:  "short error with path",
+			input: `{"severity": "error", "summary": "error summary", "path": "resources.jobs.job0.name"}`,
+			expected: diag.Diagnostics{
+				{
+					Severity: diag.Error,
+					Summary:  "error summary",
+					Path:     dyn.MustPathFromString("resources.jobs.job0.name"),
+				},
+			},
+		},
+		{
+			name:     "empty file",
+			input:    "",
+			expected: diag.Diagnostics{},
+		},
+		{
+			name:     "newline file",
+			input:    "\n",
+			expected: diag.Diagnostics{},
+		},
+		{
+			name:  "warning with detail",
+			input: `{"severity": "warning", "summary": "warning summary", "detail": "warning detail"}`,
+			expected: diag.Diagnostics{
+				{
+					Severity: diag.Warning,
+					Summary:  "warning summary",
+					Detail:   "warning detail",
+				},
+			},
+		},
+		{
+			name: "multiple errors",
+			input: `{"severity": "error", "summary": "error summary (1)"}` + "\n" +
+				`{"severity": "error", "summary": "error summary (2)"}`,
+			expected: diag.Diagnostics{
+				{
+					Severity: diag.Error,
+					Summary:  "error summary (1)",
+				},
+				{
+					Severity: diag.Error,
+					Summary:  "error summary (2)",
+				},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			diags, err := parsePythonDiagnostics(bytes.NewReader([]byte(tc.input)))
+
+			assert.NoError(t, err)
+			assert.Equal(t, tc.expected, diags)
+		})
+	}
+}
@@ -3,6 +3,7 @@ package python
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -87,6 +88,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
 		return diag.Errorf("\"experimental.pydabs.enabled\" can only be used when \"experimental.pydabs.venv_path\" is set")
 	}
 
+	// mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics'
+	var mutateDiags diag.Diagnostics
+	var mutateDiagsHasError = errors.New("unexpected error")
+
 	err := b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) {
 		pythonPath := interpreterPath(experimental.PyDABs.VEnvPath)
 
@@ -103,9 +108,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
 			return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
 		}
 
-		rightRoot, err := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot)
-		if err != nil {
-			return dyn.InvalidValue, err
+		rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot)
+		mutateDiags = diags
+		if diags.HasError() {
+			return dyn.InvalidValue, mutateDiagsHasError
 		}
 
 		visitor, err := createOverrideVisitor(ctx, m.phase)
@@ -116,7 +122,15 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
 		return merge.Override(leftRoot, rightRoot, visitor)
 	})
 
-	return diag.FromErr(err)
+	if err == mutateDiagsHasError {
+		if !mutateDiags.HasError() {
+			panic("mutateDiags has no error, but error is expected")
+		}
+
+		return mutateDiags
+	}
+
+	return mutateDiags.Extend(diag.FromErr(err))
 }
 
 func createCacheDir(ctx context.Context) (string, error) {
@@ -138,9 +152,10 @@ func createCacheDir(ctx context.Context) (string, error) {
 	return os.MkdirTemp("", "-pydabs")
 }
 
-func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) {
+func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) {
 	inputPath := filepath.Join(cacheDir, "input.json")
 	outputPath := filepath.Join(cacheDir, "output.json")
+	diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
 
 	args := []string{
 		pythonPath,
@@ -152,42 +167,71 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r
 		inputPath,
 		"--output",
 		outputPath,
+		"--diagnostics",
+		diagnosticsPath,
 	}
 
-	// we need to marshal dyn.Value instead of bundle.Config to JSON to support
-	// non-string fields assigned with bundle variables
-	rootConfigJson, err := json.Marshal(root.AsAny())
-	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to marshal root config: %w", err)
-	}
-
-	err = os.WriteFile(inputPath, rootConfigJson, 0600)
-	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to write input file: %w", err)
+	if err := writeInputFile(inputPath, root); err != nil {
+		return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
 	}
 
 	stderrWriter := newLogWriter(ctx, "stderr: ")
 	stdoutWriter := newLogWriter(ctx, "stdout: ")
 
-	_, err = process.Background(
+	_, processErr := process.Background(
 		ctx,
 		args,
 		process.WithDir(rootPath),
 		process.WithStderrWriter(stderrWriter),
 		process.WithStdoutWriter(stdoutWriter),
 	)
+
+	pythonDiagnostics, pythonDiagnosticsErr := loadDiagnosticsFile(diagnosticsPath)
+
+	// if diagnostics file exists, it gives the most descriptive errors
+	// if there is any error, we treat it as fatal error, and stop processing
+	if pythonDiagnostics.HasError() {
+		return dyn.InvalidValue, pythonDiagnostics
+	}
+
+	// process can fail without reporting errors in diagnostics file or creating it, for instance,
+	// venv doesn't have PyDABs library installed
+	if processErr != nil {
+		return dyn.InvalidValue, diag.Errorf("python mutator process failed: %sw, use --debug to enable logging", processErr)
+	}
+
+	// or we can fail to read diagnostics file, that should always be created
+	if pythonDiagnosticsErr != nil {
+		return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
+	}
+
+	output, err := loadOutputFile(rootPath, outputPath)
+	if err != nil {
+		return dyn.InvalidValue, diag.Errorf("failed to load Python mutator output: %s", err)
+	}
+
+	// we pass through pythonDiagnostic because it contains warnings
+	return output, pythonDiagnostics
+}
+
+func writeInputFile(inputPath string, input dyn.Value) error {
+	// we need to marshal dyn.Value instead of bundle.Config to JSON to support
+	// non-string fields assigned with bundle variables
+	rootConfigJson, err := json.Marshal(input.AsAny())
 	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("python mutator process failed: %w", err)
+		return fmt.Errorf("failed to marshal input: %w", err)
 	}
 
+	return os.WriteFile(inputPath, rootConfigJson, 0600)
+}
+
+func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) {
 	outputFile, err := os.Open(outputPath)
 	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to open Python mutator output: %w", err)
+		return dyn.InvalidValue, fmt.Errorf("failed to open output file: %w", err)
 	}
 
-	defer func() {
-		_ = outputFile.Close()
-	}()
+	defer outputFile.Close()
 
 	// we need absolute path because later parts of pipeline assume all paths are absolute
 	// and this file will be used as location to resolve relative paths.
@@ -204,24 +248,38 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r
 
 	generated, err := yamlloader.LoadYAML(virtualPath, outputFile)
 	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to parse Python mutator output: %w", err)
+		return dyn.InvalidValue, fmt.Errorf("failed to parse output file: %w", err)
 	}
 
 	normalized, diagnostic := convert.Normalize(config.Root{}, generated)
 	if diagnostic.Error() != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %w", diagnostic.Error())
+		return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %w", diagnostic.Error())
 	}
 
 	// warnings shouldn't happen because output should be already normalized
 	// when it happens, it's a bug in the mutator, and should be treated as an error
 
 	for _, d := range diagnostic.Filter(diag.Warning) {
-		return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %s", d.Summary)
+		return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %s", d.Summary)
 	}
 
 	return normalized, nil
 }
 
+// loadDiagnosticsFile loads diagnostics from a file.
+//
+// It contains a list of warnings and errors that we should print to users.
+func loadDiagnosticsFile(path string) (diag.Diagnostics, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open diagnostics file: %w", err)
+	}
+
+	defer file.Close()
+
+	return parsePythonDiagnostics(file)
+}
+
 func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) {
 	switch phase {
 	case PythonMutatorPhaseLoad: