Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

[flytepropeller] Support attribute access on promises #615

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,4 @@ require (
)

replace github.com/aws/amazon-sagemaker-operator-for-k8s => github.com/aws/amazon-sagemaker-operator-for-k8s v1.0.1-0.20210303003444-0fb33b1fd49d
replace github.com/flyteorg/flyteidl => ../flyteidl
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ByronHsu can you update this?

20 changes: 20 additions & 0 deletions pkg/compiler/validators/bindings.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,26 @@ func validateBinding(w c.WorkflowBuilder, nodeID c.NodeID, nodeParam string, bin
}
}

// If the type is a struct (e.g. dataclass) and the attribute path is longer than 0,
// We skip the type check and let it fail at runtime because we don't know the type of struct field
if sourceType.GetSimple() == flyte.SimpleType_STRUCT && len(val.Promise.AttrPath) > 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wild-endeavor is this necessarily true. if we have dataclass_json do we know the type?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ByronHsu would you mind just trying to fill this in a bit? If it's really not doable, or if it has a bunch of edge cases, I think it's okay.

Walking through some examples. Let's say the simple case of

@task
def t1() -> List[str]:
    ...

@task
def t2(needs_str: str):
    ...

@workflow
def wf():
    res = t1()
    t2(needs_str=res[5])

In this case t1 will have a TypedInterface output of

        value {
          type {
            collection_type {
              simple: STRING
            }
          }
          description: "o0"
        }

and because it's a string it will type check against the string in t2. The same can be applied for nested lists and dictionaries. As long as they are expressed via collection_type and map_value_type they are relatively easily type-checkable. I assume this is what the code below is doing?

The dataclass case is more complicated

@dataclass_json
@dataclass
class MyDC(object):
    snapshotDate: datetime
    region: str

@task
def t1(needs_dt: datetime):
    ...

@task
def t2(needs_str: str):
    ...

@workflow
def wf(a: MyDC):
    t1(needs_dt=a.snapshotDate)
    t2(needs_str=a.region)

The reason it's more complicated is because the dataclass types are completely obscured (esp. since flyte idl currently doesn't support multi-variate map types). I assume this is why you're skipping checking in the simple/struct case.

Could you see if it's possible though to capture it? Can we

  • Add a new field to LiteralType that is only relevant for the scalar case. Maybe just in LiteralType or in TypeStructure.
  • Add a nested literal map of the types that's only present in the dataclass case. Just iterate through the fields in dataclass and recursively call the TypeEngine.
  • Add the same checking logic in propeller as the normal map_value_type if it's a simple_struct and this new field is present.

What do you think? It will add to the correctness of this new feature. And it will make Dan happy. And in the end, that's what we're all really about.

return param.GetType(), []c.NodeID{val.Promise.NodeId}, true
}

// If the variable has an attribute path. Extract the type of the last attribute.
for range val.Promise.AttrPath {
if sourceType.GetCollectionType() != nil {
sourceType = sourceType.GetCollectionType()
}
if sourceType.GetMapValueType() != nil {
sourceType = sourceType.GetMapValueType()
}
// If the current type is struct, skip the type check because we don't know the type of struct field
if sourceType.GetSimple() == flyte.SimpleType_STRUCT {
return param.GetType(), []c.NodeID{val.Promise.NodeId}, true
}
}

if !validateParamTypes || AreTypesCastable(sourceType, expectedType) {
val.Promise.NodeId = upNode.GetId()
return param.GetType(), []c.NodeID{val.Promise.NodeId}, true
Expand Down
145 changes: 145 additions & 0 deletions pkg/compiler/validators/bindings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"testing"

c "github.com/flyteorg/flytepropeller/pkg/compiler/common"
structpb "github.com/golang/protobuf/ptypes/struct"

"github.com/flyteorg/flyteidl/clients/go/coreutils"
"github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core"
Expand Down Expand Up @@ -305,6 +306,150 @@ func TestValidateBindings(t *testing.T) {
}
})

t.Run("List/Dict Promises with attribute path", func(t *testing.T) {
// List/Dict with attribute path should conduct validation

n := &mocks.NodeBuilder{}
n.OnGetId().Return("node1")
n.OnGetInterface().Return(&core.TypedInterface{
Inputs: &core.VariableMap{
Variables: map[string]*core.Variable{},
},
Outputs: &core.VariableMap{
Variables: map[string]*core.Variable{},
},
})

n2 := &mocks.NodeBuilder{}
n2.OnGetId().Return("node2")
n2.OnGetOutputAliases().Return(nil)
n2.OnGetInterface().Return(&core.TypedInterface{
Inputs: &core.VariableMap{
Variables: map[string]*core.Variable{},
},
Outputs: &core.VariableMap{
Variables: map[string]*core.Variable{
"n2_out": {
Type: LiteralTypeForLiteral(coreutils.MustMakeLiteral(map[string]interface{}{"x": []interface{}{1, 3, 4}})),
},
},
},
})

wf := &mocks.WorkflowBuilder{}
wf.OnGetNode("n2").Return(n2, true)
wf.On("AddExecutionEdge", mock.Anything, mock.Anything).Return(nil)

bindings := []*core.Binding{
{
Var: "x",
Binding: &core.BindingData{
Value: &core.BindingData_Promise{
Promise: &core.OutputReference{
Var: "n2_out",
NodeId: "n2",
AttrPath: []*core.PromiseAttribute{
{
Value: &core.PromiseAttribute_StringValue{"x"},
},
{
Value: &core.PromiseAttribute_IntValue{0},
},
},
},
},
},
},
}

vars := &core.VariableMap{
Variables: map[string]*core.Variable{
"x": {
Type: LiteralTypeForLiteral(coreutils.MustMakeLiteral(1)),
},
},
}

compileErrors := compilerErrors.NewCompileErrors()
_, ok := ValidateBindings(wf, n, bindings, vars, true, c.EdgeDirectionBidirectional, compileErrors)
assert.True(t, ok)
if compileErrors.HasErrors() {
assert.NoError(t, compileErrors)
}
})

t.Run("pb.Struct Promises with attribute path", func(t *testing.T) {
// Dataclass with attribute path should skip validation

n := &mocks.NodeBuilder{}
n.OnGetId().Return("node1")
n.OnGetInterface().Return(&core.TypedInterface{
Inputs: &core.VariableMap{
Variables: map[string]*core.Variable{},
},
Outputs: &core.VariableMap{
Variables: map[string]*core.Variable{},
},
})

n2 := &mocks.NodeBuilder{}
n2.OnGetId().Return("node2")
n2.OnGetOutputAliases().Return(nil)
n2.OnGetInterface().Return(&core.TypedInterface{
Inputs: &core.VariableMap{
Variables: map[string]*core.Variable{},
},
Outputs: &core.VariableMap{
Variables: map[string]*core.Variable{
"n2_out": {
Type: LiteralTypeForLiteral(coreutils.MustMakeLiteral(&structpb.Struct{})),
},
},
},
})

wf := &mocks.WorkflowBuilder{}
wf.OnGetNode("n2").Return(n2, true)
wf.On("AddExecutionEdge", mock.Anything, mock.Anything).Return(nil)

bindings := []*core.Binding{
{
Var: "x",
Binding: &core.BindingData{
Value: &core.BindingData_Promise{
Promise: &core.OutputReference{
Var: "n2_out",
NodeId: "n2",
AttrPath: []*core.PromiseAttribute{
{
Value: &core.PromiseAttribute_StringValue{"x"},
},
{
Value: &core.PromiseAttribute_IntValue{0},
},
},
},
},
},
},
}

vars := &core.VariableMap{
Variables: map[string]*core.Variable{
"x": {
Type: LiteralTypeForLiteral(coreutils.MustMakeLiteral(1)),
},
},
}

compileErrors := compilerErrors.NewCompileErrors()
_, ok := ValidateBindings(wf, n, bindings, vars, true, c.EdgeDirectionBidirectional, compileErrors)
assert.True(t, ok)
if compileErrors.HasErrors() {
assert.NoError(t, compileErrors)
}
})

t.Run("Nil Binding Value", func(t *testing.T) {
n := &mocks.NodeBuilder{}
n.OnGetId().Return("node1")
Expand Down
156 changes: 156 additions & 0 deletions pkg/controller/nodes/attr_path_resolver.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package nodes

import (
"context"

"github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core"
"github.com/flyteorg/flytepropeller/pkg/controller/nodes/errors"
"google.golang.org/protobuf/types/known/structpb"
)

// resolveAttrPathInPromise resolves the literal with attribute path
// If the promise is chained with attributes (e.g. promise.a["b"][0]), then we need to resolve the promise
func resolveAttrPathInPromise(ctx context.Context, nodeID string, literal *core.Literal, bindAttrPath []*core.PromiseAttribute) (*core.Literal, error) {
var currVal *core.Literal = literal
var tmpVal *core.Literal
var err error
var exist bool
count := 0

for _, attr := range bindAttrPath {
switch currVal.GetValue().(type) {
case *core.Literal_Map:
tmpVal, exist = currVal.GetMap().GetLiterals()[attr.GetStringValue()]
if exist == false {
return nil, errors.Errorf(errors.PromiseAttributeResolveError, nodeID, "key [%v] does not exist in literal %v", attr.GetStringValue(), currVal.GetMap().GetLiterals())
}
currVal = tmpVal
count += 1
case *core.Literal_Collection:
if int(attr.GetIntValue()) >= len(currVal.GetCollection().GetLiterals()) {
return nil, errors.Errorf(errors.PromiseAttributeResolveError, nodeID, "index [%v] is out of range of %v", attr.GetIntValue(), currVal.GetCollection().GetLiterals())
}
currVal = currVal.GetCollection().GetLiterals()[attr.GetIntValue()]
count += 1
// scalar is always the leaf, so we can break here
case *core.Literal_Scalar:
break
}
}

// resolve dataclass
if currVal.GetScalar() != nil && currVal.GetScalar().GetGeneric() != nil {
st := currVal.GetScalar().GetGeneric()
// start from index "count"
currVal, err = resolveAttrPathInPbStruct(ctx, nodeID, st, bindAttrPath[count:])
if err != nil {
return nil, err
}
}

return currVal, nil
}

// resolveAttrPathInPbStruct resolves the protobuf struct (e.g. dataclass) with attribute path
func resolveAttrPathInPbStruct(ctx context.Context, nodeID string, st *structpb.Struct, bindAttrPath []*core.PromiseAttribute) (*core.Literal, error) {
ByronHsu marked this conversation as resolved.
Show resolved Hide resolved

var currVal interface{}
var tmpVal interface{}
var exist bool

currVal = st.AsMap()

// Turn the current value to a map so it can be resolved more easily
for _, attr := range bindAttrPath {
switch currVal.(type) {
ByronHsu marked this conversation as resolved.
Show resolved Hide resolved
// map
case map[string]interface{}:
tmpVal, exist = currVal.(map[string]interface{})[attr.GetStringValue()]
if exist == false {
return nil, errors.Errorf(errors.PromiseAttributeResolveError, nodeID, "key [%v] does not exist in literal %v", attr.GetStringValue(), currVal)
}
currVal = tmpVal
// list
case []interface{}:
if int(attr.GetIntValue()) >= len(currVal.([]interface{})) {
return nil, errors.Errorf(errors.PromiseAttributeResolveError, nodeID, "index [%v] is out of range of %v", attr.GetIntValue(), currVal)
}
currVal = currVal.([]interface{})[attr.GetIntValue()]
}
}

// After resolve, convert the interface to literal
literal, err := convertInterfaceToLiteral(ctx, nodeID, currVal)

return literal, err
}

// convertInterfaceToLiteral converts the protobuf struct (e.g. dataclass) to literal
func convertInterfaceToLiteral(ctx context.Context, nodeID string, obj interface{}) (*core.Literal, error) {

literal := &core.Literal{}

switch obj.(type) {
case map[string]interface{}:
new_st, err := structpb.NewStruct(obj.(map[string]interface{}))
if err != nil {
return nil, err
}
literal.Value = &core.Literal_Scalar{
Scalar: &core.Scalar{
Value: &core.Scalar_Generic{
Generic: new_st,
},
},
}
case []interface{}:
literals := []*core.Literal{}
for _, v := range obj.([]interface{}) {
// recursively convert the interface to literal
literal, err := convertInterfaceToLiteral(ctx, nodeID, v)
if err != nil {
return nil, err
}
literals = append(literals, literal)
}
literal.Value = &core.Literal_Collection{
Collection: &core.LiteralCollection{
Literals: literals,
},
}
case interface{}:
scalar, err := convertInterfaceToLiteralScalar(ctx, nodeID, obj)
if err != nil {
return nil, err
}
literal.Value = scalar
}

return literal, nil
}

// convertInterfaceToLiteralScalar converts the a single value to a literal scalar
func convertInterfaceToLiteralScalar(ctx context.Context, nodeID string, obj interface{}) (*core.Literal_Scalar, error) {
value := &core.Primitive{}

switch obj.(type) {
case string:
value.Value = &core.Primitive_StringValue{StringValue: obj.(string)}
case int:
value.Value = &core.Primitive_Integer{Integer: int64(obj.(int))}
case float64:
value.Value = &core.Primitive_FloatValue{FloatValue: obj.(float64)}
case bool:
ByronHsu marked this conversation as resolved.
Show resolved Hide resolved
value.Value = &core.Primitive_Boolean{Boolean: obj.(bool)}
default:
return nil, errors.Errorf(errors.PromiseAttributeResolveError, nodeID, "Failed to resolve interface to literal scalar")
}

return &core.Literal_Scalar{
Scalar: &core.Scalar{
Value: &core.Scalar_Primitive{
Primitive: value,
},
},
}, nil
}
Loading
Loading