-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Extension for DataFrame + Jupyter notebooks that adds properties that return concrete columns #25
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,224 @@ | ||
// Copyright (c) .NET Foundation and contributors. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for full license information. | ||
|
||
using System; | ||
using System.CommandLine; | ||
using System.CommandLine.Invocation; | ||
using System.IO; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
using Microsoft.DotNet.Interactive; | ||
using Microsoft.DotNet.Interactive.Commands; | ||
using Microsoft.DotNet.Interactive.CSharp; | ||
using Microsoft.DotNet.Interactive.Events; | ||
using Microsoft.DotNet.Interactive.Formatting; | ||
|
||
namespace Microsoft.Data.Analysis.Interactive | ||
{ | ||
public class DataFrameKernelExtension : IKernelExtension | ||
{ | ||
public Task OnLoadAsync(IKernel kernel) | ||
bool _generateCsvMethod = false; | ||
|
||
public string GetFriendlyName(Type type) | ||
{ | ||
//Formatter<DataFrame>.Register((tree, writer) => | ||
//{ | ||
// writer.Write(""); | ||
//}, "text/html"); | ||
string friendlyName = type.Name; | ||
if (type.IsArray) | ||
{ | ||
// Not handled yet | ||
return "DataFrameColumn"; | ||
} | ||
if (type.IsGenericType) | ||
{ | ||
int backTick = friendlyName.IndexOf('`'); | ||
if (backTick > 0) | ||
{ | ||
friendlyName = friendlyName.Remove(backTick); | ||
} | ||
friendlyName += "<"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels like there should be an existing utility function somewhere that does this: Given a Type, write out its language specific name. Note that as written, this will only work for C#. |
||
Type[] typeParameters = type.GetGenericArguments(); | ||
for (int i = 0; i < typeParameters.Length; ++i) | ||
{ | ||
string typeParamName = GetFriendlyName(typeParameters[i]); | ||
friendlyName += (i == 0 ? typeParamName : "," + typeParamName); | ||
} | ||
friendlyName += ">"; | ||
} | ||
|
||
return friendlyName; | ||
} | ||
|
||
public StringBuilder GetTypedDataFrameWithProperties(DataFrame dataFrame, string resultTypeName, out StringBuilder prettyFormatter) | ||
{ | ||
prettyFormatter = new StringBuilder(); | ||
StringBuilder stringBuilder = new StringBuilder(); | ||
string constructor = @$" | ||
public class {resultTypeName} : DataFrame | ||
{{ | ||
public {resultTypeName}(DataFrame dataFrame) | ||
{{ | ||
foreach (var column in dataFrame.Columns) | ||
{{ | ||
Columns.Add(column); | ||
}} | ||
}} | ||
|
||
"; | ||
stringBuilder.Append(constructor); | ||
prettyFormatter.Append(constructor); | ||
|
||
foreach (var column in dataFrame.Columns) | ||
{ | ||
string columnName = column.Name.Replace(" ", string.Empty); | ||
Type dataType = column.DataType; | ||
string typeName = GetFriendlyName(column.GetType()); | ||
stringBuilder.Append($@" | ||
public {typeName} {columnName} | ||
{{ | ||
get | ||
{{ | ||
int columnIndex = Columns.IndexOf(""{columnName}""); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why isn't this just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then why not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because |
||
return Columns[columnIndex] as {typeName}; | ||
}} | ||
}}"); | ||
stringBuilder.AppendLine(); | ||
prettyFormatter.Append($@" | ||
public {typeName} {columnName} {{ get; }}"); | ||
prettyFormatter.AppendLine(); | ||
} | ||
|
||
stringBuilder.AppendLine(); | ||
stringBuilder.Append(@"}"); | ||
prettyFormatter.AppendLine(); | ||
prettyFormatter.Append(@"}"); | ||
prettyFormatter.AppendLine(); | ||
if (_generateCsvMethod) | ||
{ | ||
AddLoadCsvToTypedDataFrame(stringBuilder, resultTypeName, prettyFormatter); | ||
} | ||
return stringBuilder; | ||
} | ||
|
||
public void AddLoadCsvToTypedDataFrame(StringBuilder stringBuilder, string resultTypeName, StringBuilder prettyFormatter) | ||
{ | ||
string loadCsv = $@" | ||
public static new {resultTypeName} LoadCsv(string filename, | ||
char separator = ',', bool header = true, | ||
string[] columnNames = null, Type[] dataTypes = null, | ||
int numRows = -1, int guessRows = 10, | ||
bool addIndexColumn = false)"; | ||
stringBuilder.Append($@"{loadCsv} | ||
{{ | ||
DataFrame df = DataFrame.LoadCsv(filename: filename, separator: separator, header: header, columnNames: columnNames, dataTypes: dataTypes, numRows: numRows, | ||
guessRows: guessRows, addIndexColumn: addIndexColumn); | ||
{resultTypeName} ret = new {resultTypeName}(df); | ||
return ret; | ||
}}" | ||
|
||
); | ||
prettyFormatter.Append(loadCsv); | ||
prettyFormatter.AppendLine(); | ||
} | ||
|
||
public async Task HandleCsvAsync(FileInfo csv, KernelInvocationContext context) | ||
{ | ||
_generateCsvMethod = true; | ||
// Infer the type and generated name from fileName | ||
string fileName = csv.Name.Split('.')[0]; //Something like housing.A.B.csv would return housing | ||
string typeName = fileName.Replace(" ", ""); | ||
StringBuilder strBuilder; | ||
StringBuilder prettyFormatter; | ||
using (var stream = csv.Open(FileMode.Open)) | ||
{ | ||
DataFrame df = DataFrame.LoadCsv(stream); | ||
strBuilder = GenerateTypedDataFrame(df, typeName, context, out StringBuilder outPrettyFormatter); | ||
prettyFormatter = outPrettyFormatter; | ||
} | ||
_generateCsvMethod = false; | ||
|
||
// Create a new DataFrame var called dataFrameName | ||
string dataFrameName = typeName + "DataFrame"; | ||
strBuilder.AppendLine(); | ||
string buildNamedDataFrame = $@" | ||
DataFrame _df = DataFrame.LoadCsv(filename: @""{csv.FullName}""); | ||
{typeName} {dataFrameName} = new {typeName}(_df); | ||
"; | ||
strBuilder.Append(buildNamedDataFrame); | ||
prettyFormatter.AppendLine(); | ||
prettyFormatter.Append(buildNamedDataFrame); | ||
await context.DisplayAsync(prettyFormatter.ToString()); | ||
context.Publish(new DisplayedValueProduced($"Created {typeName} {dataFrameName}: ", context.Command)); | ||
|
||
await SubmitCodeToKernel(strBuilder, context); | ||
} | ||
|
||
public StringBuilder GenerateTypedDataFrame(DataFrame df, string typeName, KernelInvocationContext context, out StringBuilder prettyFormatter) | ||
{ | ||
StringBuilder typedDataFrame = GetTypedDataFrameWithProperties(df, typeName, out prettyFormatter); | ||
return typedDataFrame; | ||
} | ||
|
||
private async Task SubmitCodeToKernel(StringBuilder code, KernelInvocationContext context) | ||
{ | ||
var command = new SubmitCode(code.ToString()); | ||
await context.HandlingKernel.SendAsync(command); | ||
} | ||
|
||
public async Task HandleDataFrameAsync(string dataFrameName, string typeName, KernelInvocationContext context) | ||
{ | ||
if (context.HandlingKernel is CSharpKernel cSharp) | ||
{ | ||
System.Collections.Immutable.ImmutableArray<CodeAnalysis.Scripting.ScriptVariable> variables = cSharp.ScriptState.Variables; | ||
for (int i = 0; i < variables.Length; i++) | ||
{ | ||
CodeAnalysis.Scripting.ScriptVariable variable = variables[i]; | ||
if ((dataFrameName == null || variable.Name == dataFrameName) && variable.Value is DataFrame df) | ||
{ | ||
var strBuilder = GenerateTypedDataFrame(df, typeName, context, out StringBuilder prettyFormatter); | ||
await context.DisplayAsync(prettyFormatter.ToString()); | ||
await SubmitCodeToKernel(strBuilder, context); | ||
} | ||
} | ||
} | ||
} | ||
|
||
public Task OnLoadAsync(IKernel kernel) | ||
{ | ||
var kernelBase = kernel as KernelBase; | ||
var directive = new Command("#!doit") | ||
var directive = new Command("#!generatedataframe") | ||
{ | ||
Handler = CommandHandler.Create(async (FileInfo csv, string typeName, KernelInvocationContext context) => | ||
Handler = CommandHandler.Create(async (FileInfo csv, string dataFrameName, string typeName, KernelInvocationContext context) => | ||
{ | ||
// do the job | ||
var command = new SubmitCode(@$"public class {typeName}{{}}"); | ||
context.Publish(new DisplayedValueProduced($"emitting {typeName} from {csv.FullName}", context.Command)); | ||
await context.HandlingKernel.SendAsync(command); | ||
try | ||
{ | ||
if (csv != null) | ||
{ | ||
HandleCsvAsync(csv, context); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why doesn't |
||
} | ||
else | ||
{ | ||
HandleDataFrameAsync(dataFrameName, typeName, context); | ||
} | ||
} | ||
catch (Exception) | ||
{ | ||
await context.DisplayAsync($"Encountered an exception. Could not create type { (csv != null ? csv.Name : typeName)}"); | ||
} | ||
}) | ||
}; | ||
|
||
directive.AddOption(new Option<FileInfo>( | ||
"csv").ExistingOnly()); | ||
"--csv", "Read in a csv file into a DataFrame with strong properties. Also emits the generated DataFrame type").ExistingOnly()); | ||
|
||
directive.AddOption(new Option<string>( | ||
"typeName", | ||
getDefaultValue:() => "Foo")); | ||
"--type-name", | ||
getDefaultValue: () => "InteractiveDataFrame", | ||
"The name of the generated DataFrame type. Defaults to InteractiveDataFrame")); | ||
|
||
directive.AddOption(new Option<string>( | ||
"--dataframe-name", | ||
"The DataFrame variable to generate type information for")); | ||
kernelBase.AddDirective(directive); | ||
|
||
return Task.CompletedTask; | ||
|
||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be better to flow this
bool
down through the method calls as a parameter, instead of setting a field property here? What happens if multiple threads are calling this object at the same time?