Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor TaskEntity to TaskEntity<TState> #175

Merged
merged 9 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 37 additions & 164 deletions src/Abstractions/Entities/TaskEntity.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Licensed under the MIT License.

using System.Reflection;
using System.Threading.Tasks;

namespace Microsoft.DurableTask.Entities;

Expand All @@ -12,9 +11,7 @@ namespace Microsoft.DurableTask.Entities;
/// <remarks>
/// <para><b>Entity State</b></para>
/// <para>
/// All entity implementations are required to be serializable by the configured <see cref="DataConverter"/>. An entity
/// will have its state deserialized before executing an operation, and then the new state will be the serialized value
/// of the <see cref="ITaskEntity"/> implementation instance post-operation.
/// The state of an entity can be retrieved and updated via <see cref="TaskEntityOperation.Context"/>.
/// </para>
/// </remarks>
public interface ITaskEntity
Expand All @@ -30,6 +27,7 @@ public interface ITaskEntity
/// <summary>
/// An <see cref="ITaskEntity"/> which dispatches its operations to public instance methods or properties.
/// </summary>
/// <typeparam name="TState">The state type held by this entity.</typeparam>
/// <remarks>
/// <para><b>Method Binding</b></para>
/// <para>
Expand Down Expand Up @@ -71,176 +69,51 @@ public interface ITaskEntity
///
/// <para><b>Entity State</b></para>
/// <para>
/// Unchanged from <see cref="ITaskEntity"/>. Entity state is the serialized value of the entity after an operation
/// completes.
/// Entity state will be hydrated into the <see cref="TaskEntity{TState}.State"/> property. The contents of this
/// property will be persisted to <see cref="TaskEntityContext.SetState(object?)"/> when the operation has completed.
/// Deleting entity state can be accomplished by setting to default(<typeparamref name="TState"/>).
/// </para>
/// </remarks>
public abstract class TaskEntity : ITaskEntity
public abstract class TaskEntity<TState> : ITaskEntity
{
/**
* TODO:
* 1. Consider caching a compiled delegate for a given operation name.
*/
static readonly BindingFlags InstanceBindingFlags
= BindingFlags.Public | BindingFlags.Instance | BindingFlags.IgnoreCase;

/// <inheritdoc/>
public ValueTask<object?> RunAsync(TaskEntityOperation operation)
{
Check.NotNull(operation);
if (!this.TryDispatchMethod(operation, out object? result, out Type returnType))
{
throw new NotSupportedException($"No suitable method found for entity operation '{operation}'.");
}

if (typeof(Task).IsAssignableFrom(returnType))
{
// Task or Task<T>
return new(AsGeneric((Task)result!, returnType)); // we assume a declared Task return type is never null.
}

if (returnType == typeof(ValueTask))
{
// ValueTask
return AsGeneric((ValueTask)result!); // we assume a declared ValueTask return type is never null.
}

if (returnType.IsGenericType && returnType.GetGenericTypeDefinition() == typeof(ValueTask<>))
{
// ValueTask<T>
return AsGeneric(result!, returnType); // No inheritance, have to do purely via reflection.
}

return new(result);
}

static bool TryGetInput(ParameterInfo parameter, TaskEntityOperation operation, out object? input)
{
if (!operation.HasInput)
{
if (parameter.HasDefaultValue)
{
input = parameter.DefaultValue;
return true;
}

input = null;
return false;
}

input = operation.GetInput(parameter.ParameterType);
return true;
}

static async Task<object?> AsGeneric(Task task, Type declared)
{
await task;
if (declared.IsGenericType && declared.GetGenericTypeDefinition() == typeof(Task<>))
{
return declared.GetProperty("Result", BindingFlags.Public | BindingFlags.Instance).GetValue(task);
}

return null;
}

static ValueTask<object?> AsGeneric(ValueTask t)
{
static async Task<object?> Await(ValueTask t)
{
await t;
return null;
}
/// <summary>
/// Gets a value indicating whether dispatching operations to <see cref="State"/> is allowed. State dispatch will
/// only be attempted if entity-level dispatch does not succeed. Default is <c>true</c>.
/// </summary>
protected bool AllowStateDispatch => true;

if (t.IsCompletedSuccessfully)
{
return default;
}
/// <summary>
/// Gets or sets the state for this entity.
/// </summary>
/// <remarks>
/// This will be hydrated as part of <see cref="RunAsync(TaskEntityOperation)"/>. The contents of this property
/// will be persisted to <see cref="TaskEntityContext.SetState(object?)"/> when the operation completes. Deleting
/// entity state can be accomplished by setting this to default(<typeparamref name="TState"/>).
/// </remarks>
protected TState State { get; set; } = default!; // leave null-checks to end implementation.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not super familiar with how ! is intended to be used. I thought it means "I know for sure this is not null, so you don't need to try to prove it" but your use here seems to suggest that it can also be used to say "I don't care whether this is null or not". Is this an o.k. way to use the !?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes the ! operator is for suppressing nullability warnings. You can generalize it as appropriate for use when static analysis does not work for your scenario. In our case, it doesn't work, we don't know the deserialization logic of TState, we don't know if they want to throw on null or not.

Although, after some thought I think TState? is appropriate here. Because we will initialize to default(TState). I just want to verify it works as intended with structs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am undecided if TState or TState? is better here. The common scenario is for TState to be non-null for most operation dispatches (since we initialize via Activator.CreateInstance now). However, there are a couple scenarios where null is okay when interacting with this property:

  1. Deleting state would be done via this.State = default!; (which is null for reference types).
  2. Overriding InitializeState and setting to null is also allowed - it is then up to users to decide how null state behaves.

So, which is better? TState? to show that it can be null, and then users need to use !. everywhere. OR TState to avoid the ! because it is typically not null.

Copy link
Member

@sebastianburckhardt sebastianburckhardt Aug 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about Optional<TState>?

Since the backend really does make a distinction between "not present" (meaning there is no record for this entity in storage) and "default value" it may be just be safer to maintain that distinction throughout the API. Otherwise weird things can happen, especially for types where the default value is not null.

We could also just copy how Optional<T> is implemented and have a boolean HasState

Copy link
Member

@sebastianburckhardt sebastianburckhardt Aug 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see that Optional<TState> would be readonly so we can't just use that one directly. But here is something similar that could work I think:

  1. a boolean property HasState which determines whether the entity has state or not, and
  2. an always non-null TState State
  3. An InitializeState that returns an always non-null state
    and then call InitializeState implicitly whenever users access the State getter and HasState=false

Copy link
Member Author

@jviau jviau Aug 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per our offline discussion, I have switched back to TState from TState?. We will not use an optional type and have an experience of only deleting state when it is null (and not default of a struct).

We will add a future implicit delete operation which will delete state for reference or value type.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will add a future implicit delete operation which will delete state for reference or value type.

Can we add this operation now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have the implicit delete code ready for another PR.


return new(Await(t));
}
/// <summary>
/// Gets the entity operation.
/// </summary>
protected TaskEntityOperation Operation { get; private set; } = null!;

static ValueTask<object?> AsGeneric(object result, Type type)
{
// result and type here must be some form of ValueTask<T>.
if ((bool)type.GetProperty("IsCompletedSuccessfully").GetValue(result))
{
return new(type.GetProperty("Result").GetValue(result));
}
else
{
Task t = (Task)type.GetMethod("AsTask", BindingFlags.Instance | BindingFlags.Public)
.Invoke(result, null);
return new(t.ToGeneric<object?>());
}
}
/// <summary>
/// Gets the entity context.
/// </summary>
protected TaskEntityContext Context => this.Operation.Context;

bool TryDispatchMethod(TaskEntityOperation operation, out object? result, out Type returnType)
/// <inheritdoc/>
public ValueTask<object?> RunAsync(TaskEntityOperation operation)
{
Type t = this.GetType();

// Will throw AmbiguousMatchException if more than 1 overload for the method name exists.
MethodInfo? method = t.GetMethod(operation.Name, InstanceBindingFlags);
if (method is null)
{
result = null;
returnType = typeof(void);
return false;
}

ParameterInfo[] parameters = method.GetParameters();
object?[] inputs = new object[parameters.Length];

int i = 0;
ParameterInfo? inputResolved = null;
ParameterInfo? contextResolved = null;
ParameterInfo? operationResolved = null;
foreach (ParameterInfo parameter in parameters)
this.Operation = Check.NotNull(operation);
object? state = operation.Context.GetState(typeof(TState));
this.State = state is null ? default! : (TState)state;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there is an important feature still missing here, regarding automatic first initialization of the entity state.

For most applications, the first time an entity is accessed, its state has to be somehow constructed/initialized.

Of course one could write code for this in every operation, but that would be very inconvenient and error-prone for the user. With the design proposed here, users would be essentially forced to do this (or add their own extensions that do it).

For reference, in the current DF interface we accommodate this "convenient API for initial creation" by

a. (for the function-based API) support specifying an initializer when calling GetState, and
b. (for the object-based API) automatically call the default constructor if the entity is null.

I think we need something to support convenient initial creation in this API as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a virtual method to TaskEntity to initialize state. This should cover the scenario.

if (!operation.TryDispatch(this, out object? result, out Type returnType)
&& (this.AllowStateDispatch && !operation.TryDispatch(this.State, out result, out returnType)))
jviau marked this conversation as resolved.
Show resolved Hide resolved
{
if (parameter.ParameterType == typeof(TaskEntityContext))
{
ThrowIfDuplicateBinding(contextResolved, parameter, "context", operation);
inputs[i] = operation.Context;
contextResolved = parameter;
}
else if (parameter.ParameterType == typeof(TaskEntityOperation))
{
ThrowIfDuplicateBinding(operationResolved, parameter, "operation", operation);
inputs[i] = operation;
operationResolved = parameter;
}
else
{
ThrowIfDuplicateBinding(inputResolved, parameter, "input", operation);
if (TryGetInput(parameter, operation, out object? input))
{
inputs[i] = input;
inputResolved = parameter;
}
else
{
throw new InvalidOperationException($"Error dispatching {operation} to '{method}'.\n" +
$"There was an error binding parameter '{parameter}'. The operation expected an input value, " +
"but no input was provided by the caller.");
}
}

i++;
throw new NotSupportedException($"No suitable method found for entity operation '{operation}'.");
}

result = method.Invoke(this, inputs);
returnType = method.ReturnType;
return true;

static void ThrowIfDuplicateBinding(
ParameterInfo? existing, ParameterInfo parameter, string bindingConcept, TaskEntityOperation operation)
{
if (existing is not null)
{
throw new InvalidOperationException($"Error dispatching {operation} to '{parameter.Member}'.\n" +
$"Unable to bind {bindingConcept} to '{parameter}' because it has " +
$"already been bound to parameter '{existing}'. Please remove the duplicate parameter in method " +
$"'{parameter.Member}'.\nEntity operation: {operation}.");
}
}
return TaskEntityHelpers.UnwrapAsync(this.Context, () => this.State, result, returnType);
}
}
16 changes: 10 additions & 6 deletions src/Abstractions/Entities/TaskEntityContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,15 @@ public abstract void StartOrchestration(
TaskName name, object? input = null, StartOrchestrationOptions? options = null);

/// <summary>
/// Deletes the state of this entity after the current operation completes.
/// Gets the current state for the entity this context is for.
/// </summary>
/// <remarks>
/// The state deletion only takes effect after the current operation completes. Any state changes made during the
/// current operation will be ignored in favor of the deletion.
/// </remarks>
public abstract void DeleteState();
/// <param name="type">The type to retrieve the state as.</param>
/// <returns>The entity state.</returns>
public abstract object? GetState(Type type);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For clarity, it may be nice to document how GetState handles default states, in particular for non-object types.
For example, if I do GetState(typeof(int)) on an entity whose state is null (i.e. it has not been set, or it has been set to zero), then we would expect GetState to return 0 ?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another option would be simply to support only nullable types for entities, then we don't have this strange corner case.

I think I prefer this more restrictive solution.... there is something very strange about setting an integer entity to zero and then see it disappear from storage. It is clever, but this will invariably confuse a lot of people.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the object? return type was clear that if state is not present, you get null. I can clarify that in a comment. We already have this same behavior with DataConverter.

An alternative is bool TryGetState<T>(out T? state), but this then becomes ambiguous in the false case: was there no state to begin with, or did the state not map to T? Throwing from here violates the expectations of the Try* pattern. I think will just clarify the comment for GetState.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the object? return type was clear that if state is not present, you get null

The problem with that is that you get strange behaviors. For example, for TaskEntity<int>: after the user assigns this.State = 0 then this.State == null which looks broken (not logically sound). The reason is that assigning the default value (0 is the default value of int) is considered deletion and therefore removes the entity state completely.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per our offline discussion, setting TState to default of a struct (0 for an int in this example), will not delete state. Users will need to declare TaskEntity<int?> and then assign this.State = null. Otherwise, they can implement a Reset operation which performs this.State = this.InitializeState(); - this does not "delete" state, but resets the entity to be identical to as if state was null as far as the user is concerned.

We will also add a built in delete operation for them to rely on.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just to clarify: reset is not identical to delete as far as the user is concerned because a reset entity will still be returned by subsequent queries, while a deleted entity is not.

Our users definitely care about this distinction. we had endless discussions about this and many issues, e.g. Azure/azure-functions-durable-extension#931 (comment).
The lesson we learnt is (a) users struggle to discover how to delete entities unless we tell them very clearly how to do that, and (b) the runtime needs to reliably remove deleted entities from subsequent entity queries.


/// <summary>
/// Sets the entity state. Setting of <c>null</c> will clear entity state.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest 'delete' instead of 'clear' since it will delete records from storage, not just reset the entity to its initial state. This makes a difference, for example when querying entities.

/// </summary>
/// <param name="state">The state to set.</param>
public abstract void SetState(object? state);
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest to add an overload method for deleting state:

public void DeleteState() { this.SetState(null); }

It's redundant (and we can say that in the comment) but when I write or read code I would much rather use DeleteState() than SetState(null) since it makes it much more explicit what is happening (records being removed from storage and no longer returned by queries).

100 changes: 100 additions & 0 deletions src/Abstractions/Entities/TaskEntityHelpers.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

using System.Reflection;

namespace Microsoft.DurableTask.Entities;

/// <summary>
/// Helpers for task entities.
/// </summary>
static class TaskEntityHelpers
{
/// <summary>
/// Unwraps a dispatched result for a <see cref="TaskEntityOperation"/> into a <see cref="ValueTask{Object}"/>

Check warning on line 14 in src/Abstractions/Entities/TaskEntityHelpers.cs

View workflow job for this annotation

GitHub Actions / build

Documentation text should end with a period

Check warning on line 14 in src/Abstractions/Entities/TaskEntityHelpers.cs

View workflow job for this annotation

GitHub Actions / build

Documentation text should end with a period
jviau marked this conversation as resolved.
Show resolved Hide resolved
/// </summary>
/// <param name="context">The entity context.</param>
/// <param name="state">Delegate to resolve new state for the entity.</param>
/// <param name="result">The result of the operation.</param>
/// <param name="resultType">The declared type of the result (may be different that actual type).</param>
/// <returns>A value task which holds the result of the operation and sets state before it completes.</returns>
public static ValueTask<object?> UnwrapAsync(
TaskEntityContext context, Func<object?> state, object? result, Type resultType)
{
// NOTE: Func<object?> is used for state so that we can lazily resolve it AFTER the operation has ran.
Check.NotNull(context);
Check.NotNull(resultType);

if (typeof(Task).IsAssignableFrom(resultType))
{
// Task or Task<T>
// We assume a declared Task return type is never null.
return new(UnwrapTask(context, state, (Task)result!, resultType));
}

if (resultType == typeof(ValueTask))
{
// ValueTask
// We assume a declared ValueTask return type is never null.
return UnwrapValueTask(context, state, (ValueTask)result!);
}

if (resultType.IsGenericType && resultType.GetGenericTypeDefinition() == typeof(ValueTask<>))
{
// ValueTask<T>
// No inheritance, have to do purely via reflection.
return UnwrapValueTaskOfT(context, state, result!, resultType);
}

context.SetState(state());
return new(result);
}

static async Task<object?> UnwrapTask(TaskEntityContext context, Func<object?> state, Task task, Type declared)
{
await task;
context.SetState(state());
if (declared.IsGenericType && declared.GetGenericTypeDefinition() == typeof(Task<>))
{
return declared.GetProperty("Result", BindingFlags.Public | BindingFlags.Instance).GetValue(task);
}

return null;
}

static ValueTask<object?> UnwrapValueTask(TaskEntityContext context, Func<object?> state, ValueTask t)
{
async Task<object?> Await(ValueTask t)
{
await t;
context.SetState(state());
return null;
}

if (t.IsCompletedSuccessfully)
{
context.SetState(state());
return default;
}

return new(Await(t));
}

static ValueTask<object?> UnwrapValueTaskOfT(
TaskEntityContext context, Func<object?> state, object result, Type type)
{
// Result and type here must be some form of ValueTask<T>.
// TODO: can this amount of reflection be avoided?
if ((bool)type.GetProperty("IsCompletedSuccessfully").GetValue(result))
{
context.SetState(state());
return new(type.GetProperty("Result").GetValue(result));
}
else
{
Task t = (Task)type.GetMethod("AsTask", BindingFlags.Instance | BindingFlags.Public).Invoke(result, null);
Type taskType = typeof(Task<>).MakeGenericType(type.GetGenericArguments()[0]);
return new(UnwrapTask(context, state, t, taskType));
}
}
}
Loading
Loading