From 4a7264e2a7942674bcda360c89e979d89167c80d Mon Sep 17 00:00:00 2001 From: KDCLLC Date: Mon, 26 Dec 2016 16:22:07 -0500 Subject: [PATCH] merge sample project to the main code base add NaiveBayes to the sample switch to use project reference and 4 unit tests are failing update getting started to support new vs2017 project files --- Src/numl.Tests/numl.Tests.csproj | 1 + Src/numl.Tests/project.json | 21 + Src/numl.sln | 30 +- Src/numl/Supervised/Generator.cs | 360 +++++++++--------- Src/numl/numl.csproj | 12 +- Src/numl/numl.xproj | 21 - .../Data/SampleData.cs | 97 +++++ Src/numlsample.GettingStarted/Data/Tennis.cs | 34 ++ .../ExampleCode/QuickStart.cs | 54 +++ .../ExampleCode/SimpleNumlWorkflow.cs | 354 +++++++++++++++++ Src/numlsample.GettingStarted/Program.cs | 18 + .../Properties/AssemblyInfo.cs | 18 + Src/numlsample.GettingStarted/Randm.cs | 251 ++++++++++++ .../numlsample.GettingStarted.csproj | 21 + 14 files changed, 1085 insertions(+), 207 deletions(-) create mode 100644 Src/numl.Tests/project.json delete mode 100644 Src/numl/numl.xproj create mode 100644 Src/numlsample.GettingStarted/Data/SampleData.cs create mode 100644 Src/numlsample.GettingStarted/Data/Tennis.cs create mode 100644 Src/numlsample.GettingStarted/ExampleCode/QuickStart.cs create mode 100644 Src/numlsample.GettingStarted/ExampleCode/SimpleNumlWorkflow.cs create mode 100644 Src/numlsample.GettingStarted/Program.cs create mode 100644 Src/numlsample.GettingStarted/Properties/AssemblyInfo.cs create mode 100644 Src/numlsample.GettingStarted/Randm.cs create mode 100644 Src/numlsample.GettingStarted/numlsample.GettingStarted.csproj diff --git a/Src/numl.Tests/numl.Tests.csproj b/Src/numl.Tests/numl.Tests.csproj index b156782..3a86fe0 100644 --- a/Src/numl.Tests/numl.Tests.csproj +++ b/Src/numl.Tests/numl.Tests.csproj @@ -7,6 +7,7 @@ + diff --git a/Src/numl.Tests/project.json b/Src/numl.Tests/project.json new file mode 100644 index 0000000..1f4b106 --- /dev/null +++ b/Src/numl.Tests/project.json @@ -0,0 +1,21 @@ +{ + "version": "1.0.0-*", + "dependencies": { + "numl": "0.9.13-beta", + "xunit": "2.2.0-*", + "FluentAssertions": "4.14.0", + "dotnet-test-xunit": "2.2.0-*" + }, + "testRunner": "xunit", + "frameworks": { + "netcoreapp1.1": { + "dependencies": { + "Microsoft.NETCore.App": { + "version": "1.1.0-*", + "type": "platform" + } + } + } + } + +} \ No newline at end of file diff --git a/Src/numl.sln b/Src/numl.sln index d0151e7..d6d13e3 100644 --- a/Src/numl.sln +++ b/Src/numl.sln @@ -1,13 +1,19 @@ - Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 -VisualStudioVersion = 15.0.26228.4 +VisualStudioVersion = 15.0.26228.9 MinimumVisualStudioVersion = 10.0.40219.1 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{B002D86D-AF04-48EE-9F22-F7DC8FA747A4}" + ProjectSection(SolutionItems) = preProject + ..\.gitattributes = ..\.gitattributes + ..\.gitignore = ..\.gitignore + ..\Build\build.cake = ..\Build\build.cake + EndProjectSection +EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "numl", "numl\numl.csproj", "{554363C6-5979-4C9A-90E6-E70AF2D5CC09}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "numl.Tests", "numl.Tests\numl.Tests.csproj", "{801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "numl.Tests", "numl.Tests\numl.Tests.csproj", "{801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "numlR", "numlR\numlR.csproj", "{1C65B9E4-0843-47F7-A0F6-4CF7EF30164E}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "numlsample.GettingStarted", "numlsample.GettingStarted\numlsample.GettingStarted.csproj", "{70589B72-0E1B-4D92-B92D-B0EE30B2E73C}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -43,6 +49,16 @@ Global {801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}.Release|x64.Build.0 = Release|Any CPU {801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}.Release|x86.ActiveCfg = Release|Any CPU {801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}.Release|x86.Build.0 = Release|Any CPU + {70589B72-0E1B-4D92-B92D-B0EE30B2E73C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {70589B72-0E1B-4D92-B92D-B0EE30B2E73C}.Debug|x64.ActiveCfg = Debug|Any CPU + {70589B72-0E1B-4D92-B92D-B0EE30B2E73C}.Debug|x86.ActiveCfg = Debug|Any CPU + {70589B72-0E1B-4D92-B92D-B0EE30B2E73C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {70589B72-0E1B-4D92-B92D-B0EE30B2E73C}.Release|x64.ActiveCfg = Release|Any CPU + {70589B72-0E1B-4D92-B92D-B0EE30B2E73C}.Release|x86.ActiveCfg = Release|Any CPU + {801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}.Release|x64.ActiveCfg = Release|Any CPU + {801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}.Release|x64.Build.0 = Release|Any CPU + {801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}.Release|x86.ActiveCfg = Release|Any CPU + {801D4C20-F3BB-48B5-8530-2F8E9D2AC1D1}.Release|x86.Build.0 = Release|Any CPU {1C65B9E4-0843-47F7-A0F6-4CF7EF30164E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {1C65B9E4-0843-47F7-A0F6-4CF7EF30164E}.Debug|Any CPU.Build.0 = Debug|Any CPU {1C65B9E4-0843-47F7-A0F6-4CF7EF30164E}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -62,4 +78,10 @@ Global GlobalSection(TestCaseManagementSettings) = postSolution CategoryFile = numl.vsmdi EndGlobalSection + GlobalSection(TestCaseManagementSettings) = postSolution + CategoryFile = numl.vsmdi + EndGlobalSection + GlobalSection(TestCaseManagementSettings) = postSolution + CategoryFile = numl.vsmdi + EndGlobalSection EndGlobal diff --git a/Src/numl/Supervised/Generator.cs b/Src/numl/Supervised/Generator.cs index 1bac53b..6559d0d 100644 --- a/Src/numl/Supervised/Generator.cs +++ b/Src/numl/Supervised/Generator.cs @@ -1,35 +1,35 @@ -// file: Supervised\Generator.cs -// -// summary: Implements the generator class -using System; -using numl.Model; -using System.Linq; - -using numl.Math.LinearAlgebra; -using System.Collections.Generic; -using numl.Utils; - -namespace numl.Supervised -{ - /// A generator. - public abstract class Generator : IGenerator - { - private Descriptor _Descriptor; - - /// Event queue for all listeners interested in ModelChanged events. - public event EventHandler ModelChanged; - /// Raises the model event. - /// Source of the event. - /// Event information to send to registered event handlers. - protected virtual void OnModelChanged(object sender, ModelEventArgs e) - { - EventHandler handler = ModelChanged; - if (handler != null) - handler(sender, e); - } - - /// Gets or sets the descriptor. - /// The descriptor. +// file: Supervised\Generator.cs +// +// summary: Implements the generator class +using System; +using numl.Model; +using System.Linq; + +using numl.Math.LinearAlgebra; +using System.Collections.Generic; +using numl.Utils; + +namespace numl.Supervised +{ + /// A generator. + public abstract class Generator : IGenerator + { + private Descriptor _Descriptor; + + /// Event queue for all listeners interested in ModelChanged events. + public event EventHandler ModelChanged; + /// Raises the model event. + /// Source of the event. + /// Event information to send to registered event handlers. + protected virtual void OnModelChanged(object sender, ModelEventArgs e) + { + EventHandler handler = ModelChanged; + if (handler != null) + handler(sender, e); + } + + /// Gets or sets the descriptor. + /// The descriptor. public Descriptor Descriptor { get { return this._Descriptor; } @@ -47,73 +47,73 @@ public Descriptor Descriptor /// /// If True, examples will keep their original ordering from the set. /// - public bool PreserveOrder { get; set; } - - /// - /// Gets or sets whether to perform feature normalisation using the specified Feature Normalizer. - /// - public bool NormalizeFeatures { get; set; } - /// - /// Gets or sets the feature normalizer to use for each item. - /// - public numl.Math.Normalization.INormalizer FeatureNormalizer { get; set; } - - /// - /// Gets or sets the Feature properties from the original training set. - /// - public numl.Math.Summary FeatureProperties { get; set; } - + public bool PreserveOrder { get; set; } + + /// + /// Gets or sets whether to perform feature normalization using the specified Feature Normalizer. + /// + public bool NormalizeFeatures { get; set; } + /// + /// Gets or sets the feature normalizer to use for each item. + /// + public numl.Math.Normalization.INormalizer FeatureNormalizer { get; set; } + + /// + /// Gets or sets the Feature properties from the original training set. + /// + public numl.Math.Summary FeatureProperties { get; set; } + /// /// Gets or sets whether the prediction label is discrete / categorical. - /// - public bool IsDiscrete { get; set; } - - /// - /// Initializes a new Generator instance. - /// - public Generator() - { - this.NormalizeFeatures = false; - this.FeatureNormalizer = new numl.Math.Normalization.MinMaxNormalizer(); - } - - /// - /// Override to perform custom pre-processing steps on the raw Matrix data. - /// - /// Matrix of examples. - /// - public virtual void Preprocess(Matrix X) - { - this.FeatureProperties = new numl.Math.Summary() - { - Average = X.Mean(VectorType.Row), - StandardDeviation = X.StdDev(VectorType.Row), - Minimum = X.Min(VectorType.Row), - Maximum = X.Max(VectorType.Row), - Median = X.Median(VectorType.Row) - }; - - if (this.NormalizeFeatures) - { - if (this.FeatureNormalizer != null) - { - for (int i = 0; i < X.Rows; i++) - { - Vector vectors = this.FeatureNormalizer.Normalize(X[i, VectorType.Row], this.FeatureProperties); - for (int j = 0; j < X.Cols; j++) - { - X[i, j] = vectors[j]; - } - } - } - } - } - + /// + public bool IsDiscrete { get; set; } + + /// + /// Initializes a new Generator instance. + /// + public Generator() + { + this.NormalizeFeatures = false; + this.FeatureNormalizer = new numl.Math.Normalization.MinMaxNormalizer(); + } + + /// + /// Override to perform custom pre-processing steps on the raw Matrix data. + /// + /// Matrix of examples. + /// + public virtual void Preprocess(Matrix X) + { + this.FeatureProperties = new numl.Math.Summary() + { + Average = X.Mean(VectorType.Row), + StandardDeviation = X.StdDev(VectorType.Row), + Minimum = X.Min(VectorType.Row), + Maximum = X.Max(VectorType.Row), + Median = X.Median(VectorType.Row) + }; + + if (this.NormalizeFeatures) + { + if (this.FeatureNormalizer != null) + { + for (int i = 0; i < X.Rows; i++) + { + Vector vectors = this.FeatureNormalizer.Normalize(X[i, VectorType.Row], this.FeatureProperties); + for (int j = 0; j < X.Cols; j++) + { + X[i, j] = vectors[j]; + } + } + } + } + } + /// /// Converts a label Vector to a 1-of-k encoded Matrix for discrete values, otherwise returns a n x 1 continuous matrix. /// /// Vector of class labels. - /// Matrix. + /// Matrix. public virtual Matrix ToEncoded(Vector y) { // check IsDiscrete in case a descriptor is not provided. @@ -123,90 +123,90 @@ public virtual Matrix ToEncoded(Vector y) } else return y.ToMatrix(VectorType.Col); - } - - /// Generate model based on a set of examples. - /// Thrown when the requested operation is invalid. - /// Example set. - /// Model. - public IModel Generate(IEnumerable examples) - { - if (examples.Count() == 0) throw new InvalidOperationException("Empty example set."); - - if (Descriptor == null) - throw new InvalidOperationException("Descriptor is null"); - - return Generate(Descriptor, examples); - } - - /// Generate model based on a set of examples. - /// Thrown when the requested operation is invalid. - /// The description. - /// Example set. - /// Model. - public IModel Generate(Descriptor description, IEnumerable examples) - { - if (examples.Count() == 0) throw new InvalidOperationException("Empty example set."); - - Descriptor = description; - if (Descriptor.Features == null || Descriptor.Features.Length == 0) - throw new InvalidOperationException("Invalid descriptor: Empty feature set!"); - if (Descriptor.Label == null) - throw new InvalidOperationException("Invalid descriptor: Empty label!"); - - var dataset = (this.PreserveOrder ? examples : examples.Shuffle()); - - var doubles = Descriptor.Convert(dataset); - var (X, Y) = doubles.ToExamples(); - - return Generate(X, Y); - } - - /// - /// Generate model from descriptor and examples - /// - /// Object type - /// Desriptor - /// Examples - /// Model - public IModel Generate(Descriptor descriptor, IEnumerable examples) where T : class - { - return Generate(descriptor, examples as IEnumerable); - } - - /// Generate model based on a set of examples. - /// The Matrix to process. - /// The Vector to process. - /// Model. - public abstract IModel Generate(Matrix x, Vector y); - - - } - - /// Additional information for model events. - public class ModelEventArgs : EventArgs - { - /// Constructor. - /// The model. - /// (Optional) the message. - public ModelEventArgs(IModel model, string message = "") - { - Message = message; - Model = model; - } - /// Gets or sets the model. - /// The model. - public IModel Model { get; private set; } - /// Gets or sets the message. - /// The message. - public string Message { get; private set; } - /// Makes. - /// The model. - /// (Optional) the message. - /// The ModelEventArgs. - internal static ModelEventArgs Make(IModel model, string message = "") - { - return new ModelEventArgs(model, message); - } - } -} + } + + /// Generate model based on a set of examples. + /// Thrown when the requested operation is invalid. + /// Example set. + /// Model. + public IModel Generate(IEnumerable examples) + { + if (examples.Count() == 0) throw new InvalidOperationException("Empty example set."); + + if (Descriptor == null) + throw new InvalidOperationException("Descriptor is null"); + + return Generate(Descriptor, examples); + } + + /// Generate model based on a set of examples. + /// Thrown when the requested operation is invalid. + /// The description. + /// Example set. + /// Model. + public IModel Generate(Descriptor description, IEnumerable examples) + { + if (examples.Count() == 0) throw new InvalidOperationException("Empty example set."); + + Descriptor = description; + if (Descriptor.Features == null || Descriptor.Features.Length == 0) + throw new InvalidOperationException("Invalid descriptor: Empty feature set!"); + if (Descriptor.Label == null) + throw new InvalidOperationException("Invalid descriptor: Empty label!"); + + var dataset = (this.PreserveOrder ? examples : examples.Shuffle()); + + var doubles = Descriptor.Convert(dataset); + var tuple = doubles.ToExamples(); + + return Generate(tuple.Item1, tuple.Item2); + } + + /// + /// Generate model from descriptor and examples + /// + /// Object type + /// Desriptor + /// Examples + /// Model + public IModel Generate(Descriptor descriptor, IEnumerable examples) where T : class + { + return Generate(descriptor, examples as IEnumerable); + } + + /// Generate model based on a set of examples. + /// The Matrix to process. + /// The Vector to process. + /// Model. + public abstract IModel Generate(Matrix x, Vector y); + + + } + + /// Additional information for model events. + public class ModelEventArgs : EventArgs + { + /// Constructor. + /// The model. + /// (Optional) the message. + public ModelEventArgs(IModel model, string message = "") + { + Message = message; + Model = model; + } + /// Gets or sets the model. + /// The model. + public IModel Model { get; private set; } + /// Gets or sets the message. + /// The message. + public string Message { get; private set; } + /// Makes. + /// The model. + /// (Optional) the message. + /// The ModelEventArgs. + internal static ModelEventArgs Make(IModel model, string message = "") + { + return new ModelEventArgs(model, message); + } + } +} diff --git a/Src/numl/numl.csproj b/Src/numl/numl.csproj index be4fab2..7abcc3f 100644 --- a/Src/numl/numl.csproj +++ b/Src/numl/numl.csproj @@ -1,5 +1,4 @@ - - + netstandard1.3 0.9.15 @@ -8,6 +7,7 @@ Seth Juarez;Chris Kalle;Basia Fusinka numl numl + numl machine learning;framework http://numl.net/images/ico.png http://numl.net @@ -15,8 +15,16 @@ true numl is a machine learning library intended to ease the use of using standard modeling techniques for both prediction and clustering ©2017, Seth Juarez + 1.6.0 + false + false + false + false + false + + diff --git a/Src/numl/numl.xproj b/Src/numl/numl.xproj deleted file mode 100644 index 3944bd1..0000000 --- a/Src/numl/numl.xproj +++ /dev/null @@ -1,21 +0,0 @@ - - - - 14.0 - $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) - - - - - 554363c6-5979-4c9a-90e6-e70af2d5cc09 - numl - .\obj - .\bin\ - v4.5.2 - - - - 2.0 - - - diff --git a/Src/numlsample.GettingStarted/Data/SampleData.cs b/Src/numlsample.GettingStarted/Data/SampleData.cs new file mode 100644 index 0000000..5d621ee --- /dev/null +++ b/Src/numlsample.GettingStarted/Data/SampleData.cs @@ -0,0 +1,97 @@ +using System.Collections.Generic; + +namespace numlsample.GettingStarted.Data +{ + public static class SampleData + { + public static Tennis[] GetTennisData(bool predetermined = true) + { + if (predetermined) + { + return new Tennis[] { + new Tennis { Play = true, Outlook=Outlook.Sunny, Temperature = Temperature.Low, Windy=true}, + new Tennis { Play = false, Outlook=Outlook.Sunny, Temperature = Temperature.High, Windy=true}, + new Tennis { Play = false, Outlook=Outlook.Sunny, Temperature = Temperature.High, Windy=false}, + new Tennis { Play = true, Outlook=Outlook.Overcast, Temperature = Temperature.Low, Windy=true}, + new Tennis { Play = true, Outlook=Outlook.Overcast, Temperature = Temperature.High, Windy= false}, + new Tennis { Play = true, Outlook=Outlook.Overcast, Temperature = Temperature.Low, Windy=false}, + new Tennis { Play = false, Outlook=Outlook.Rainy, Temperature = Temperature.Low, Windy=true}, + new Tennis { Play = true, Outlook=Outlook.Rainy, Temperature = Temperature.Low, Windy=false} + }; + } + else + { + var result = new List(); + var outlookOptions = new List() + { + Outlook.Sunny, + Outlook.Overcast, + Outlook.Rainy + }; + + for (int i = 0; i < 1000; i++) + { + // Completely random (should generate around 50% accuracy, closer to 50% with more iterations) + //Tennis tennis = GetTennis_CompletelyRandom(outlookOptions); + + // Calm wind means play (generates 100.0% accuracy) + //Tennis tennis = GetTennis_CalmWindAlwaysPlay(outlookOptions); + + // Some kind of meaning behind whether to play or not, + // so this should be a higher accuracy than completely random, + // but not close to 100% + Tennis tennis = GetTennis_UsuallyPlayFairConditions(outlookOptions); + + result.Add(tennis); + } + + return result.ToArray(); + } + } + + private static Tennis GetTennis_CompletelyRandom(List outlookOptions) + { + var tennis = new Tennis() + { + Play = Randm.Helper.PickBool(0.5), + Outlook = Randm.Helper.PickOne(outlookOptions), + Temperature = Randm.Helper.PickBool(0.5) ? Temperature.Low : Temperature.High, + Windy = Randm.Helper.PickBool(0.5), + }; + + return tennis; + } + + private static Tennis GetTennis_CalmWindAlwaysPlay(List outlookOptions) + { + var tennis = new Tennis() + { + Outlook = Randm.Helper.PickOne(outlookOptions), + Temperature = Randm.Helper.PickBool(0.5) ? Temperature.Low : Temperature.High, + Windy = Randm.Helper.PickBool(0.5), + }; + tennis.Play = !tennis.Windy; + + return tennis; + } + + private static Tennis GetTennis_UsuallyPlayFairConditions(List outlookOptions) + { + var tennis = new Tennis() + { + Outlook = Randm.Helper.PickOne(outlookOptions), + Temperature = Randm.Helper.PickBool(0.7) ? Temperature.Low : Temperature.High, + Windy = Randm.Helper.PickBool(0.7), + }; + + if (tennis.Windy && tennis.Outlook == Outlook.Rainy) + tennis.Play = Randm.Helper.PickBool(0.1); + else if (tennis.Outlook == Outlook.Overcast) + tennis.Play = Randm.Helper.PickBool(0.8); + else + tennis.Play = Randm.Helper.PickBool(0.9); + + return tennis; + } + } +} diff --git a/Src/numlsample.GettingStarted/Data/Tennis.cs b/Src/numlsample.GettingStarted/Data/Tennis.cs new file mode 100644 index 0000000..4112a28 --- /dev/null +++ b/Src/numlsample.GettingStarted/Data/Tennis.cs @@ -0,0 +1,34 @@ +using numl.Model; + +namespace numlsample.GettingStarted.Data +{ + public enum Outlook + { + Sunny, + Overcast, + Rainy + } + + public enum Temperature + { + Low, + High + } + + public class Tennis + { + [Feature] + public Outlook Outlook { get; set; } + [Feature] + public Temperature Temperature { get; set; } + [Feature] + public bool Windy { get; set; } + [Label] + public bool Play { get; set; } + + public override string ToString() + { + return $"Tennis - Outlook: {Outlook}, Temp: {Temperature}, Windy: {Windy}"; + } + } +} diff --git a/Src/numlsample.GettingStarted/ExampleCode/QuickStart.cs b/Src/numlsample.GettingStarted/ExampleCode/QuickStart.cs new file mode 100644 index 0000000..a867995 --- /dev/null +++ b/Src/numlsample.GettingStarted/ExampleCode/QuickStart.cs @@ -0,0 +1,54 @@ +using System; +using numl; +using numl.Model; +using numl.Supervised.DecisionTree; +using numlsample.GettingStarted.Data; + +namespace numlsample.GettingStarted.ExampleCode +{ + /// + /// This is the very simple, git-er-dun quickstart. + /// + public static class QuickStart + { + public static void Go() + { + // Start with our data + Tennis[] data = SampleData.GetTennisData(); + + // Create the corresponding descriptor + var descriptor = Descriptor.Create(); + + // Choose our generator + var generator = new DecisionTreeGenerator(descriptor); + generator.SetHint(false); + + Console.WriteLine($"Using the {generator.GetType().Name}\n"); + + // Create the model by learning from the data using the generator + LearningModel learningModel = Learner.Learn(data, 0.80, 1000, generator); + + Console.WriteLine(learningModel); + + // Now we could predict using the learning info's Model. + var toPredict = new Tennis() + { + Outlook = Outlook.Rainy, + Temperature = Temperature.Low, + Windy = true, + // Play = ? - This is what we will predict + }; + + var prediction = learningModel.Model.Predict(toPredict); + + // And we're spent... + Console.WriteLine($"ToPredict: {toPredict}"); + Console.WriteLine($"Prediction: Play = {prediction.Play}\n"); + + Console.WriteLine("Press any key...\n"); + Console.ReadKey(); + } + + + } +} diff --git a/Src/numlsample.GettingStarted/ExampleCode/SimpleNumlWorkflow.cs b/Src/numlsample.GettingStarted/ExampleCode/SimpleNumlWorkflow.cs new file mode 100644 index 0000000..f1bedcc --- /dev/null +++ b/Src/numlsample.GettingStarted/ExampleCode/SimpleNumlWorkflow.cs @@ -0,0 +1,354 @@ +using System; +using System.Collections.Generic; +using numl; +using numl.Model; +using numl.Supervised; +using numl.Supervised.DecisionTree; +using numlsample.GettingStarted.Data; + +namespace numlsample.GettingStarted.ExampleCode +{ + /// + /// + /// This describes a very basic workflow of using numl with various + /// ML algorithms (generators, models). It is essentially as follows: + /// + /// + /// First, you start with your data and Descriptor. This is how you are + /// declaring the features that you are analyzing and the outcome that you + /// want to predict. + /// + /// + /// Next, you choose an IGenerator, which is basically what ML algorithm you + /// want to use. + /// + /// + /// The Learner will then train a IModel + /// + /// + /// + public static class SimpleNumlWorkflow + { + public static void Go() + { + // These all use the same simple workflow, + // with only minor tweaks of swapped generators. + + // First try giving them a run, then tweak your default arguments. + + DoSimple_DecisionTree(simpleData: false); + DoSimple_LinearRegression(simpleData: false); + DoSimple_NaiveBayes(simpleData: false); + DoSimple_LogisticRegression(simpleData: false); + DoSimple_Perceptron(simpleData: false); + DoSimple_NeuralNetwork(simpleData: false); + } + + /// + /// Implementation method that executes a simple numl workflow, + /// easily swapping generators/models, etc. This is primarily to + /// keep things DRY, even though this is just example code. + /// + /// The type of your data + /// func to retrieve data, e.g. () => SampleData.GetTennisData() + /// func to describe your type of data, e.g. () => Descriptor.Create() + /// func to create/initialize your generator. See example methods. + /// func to get/create the data you want to predict. See example methods. + /// logging func for displaying prediction output on console. + static void SimpleNumlWorkflowImpl( + Func> getData, + Func getDescriptor, + Func getGenerator, + Func getToPredict, + Func getPredictionDesc, + double trainingPercentage, + int repeat + ) + where TData : class + { + // Start with descriptor + var descriptor = getDescriptor(); + + // Choose our generator + var generator = getGenerator(descriptor); + Console.WriteLine($"Starting {generator.GetType().Name}\n"); + + // Load our data + var data = getData(); + + // Create the model by learning from the data using the generator + var learningModel = Learner.Learn(data, trainingPercentage, repeat, generator); + Console.WriteLine(learningModel); + + // Now we could predict using the learning info's Model. + var toPredict = getToPredict(); + + var prediction = learningModel.Model.Predict(toPredict); + //var tennisPrediciton = learningModel.Model.Predict(areWeGonnaPlayTennis); + + Console.WriteLine($"To Predict: {toPredict})"); + Console.WriteLine($"Prediction: {getPredictionDesc(prediction)}\n"); + + Console.WriteLine("Press any key...\n"); + Console.ReadKey(); + } + + /// + /// Do the SimpleNumlWorkflow using a DecisionTree generator/model. + /// + /// If true, uses a preset small sample data set; else generates a larger, pseudo-random one. + /// + /// + static void DoSimple_DecisionTree(bool simpleData = true, int depth = 5, + int width = 2) + { + SimpleNumlWorkflowImpl( + getData: () => SampleData.GetTennisData(simpleData), + + getDescriptor: () => Descriptor.Create(), + + getGenerator: (descriptor) => + { + var generator = new DecisionTreeGenerator(descriptor) + { + Depth = depth, + Width = width + }; + generator.SetHint(false); + return generator; + }, + + getToPredict: () => + { + return new Tennis() + { + Outlook = Outlook.Rainy, + Temperature = Temperature.Low, + Windy = true + }; + }, + + getPredictionDesc: (t) => "Play: " + t.Play, + + trainingPercentage: 0.8d, + + repeat: 1000 + ); + } + + /// + /// Do the SimpleNumlWorkflow using a LinearRegression generator/model. + /// + /// If true, uses a preset small sample data set; else generates a larger, pseudo-random one. + /// + /// + /// + static void DoSimple_LinearRegression(bool simpleData = true, int maxIterations = 500, + double learningRate = 0.01d, double lambda = 1) + { + SimpleNumlWorkflowImpl( + getData: () => SampleData.GetTennisData(simpleData), + + getDescriptor: () => Descriptor.Create(), + + // This is where we choose the actual algorithm + getGenerator: (descriptor) => + { + var generator = new numl.Supervised.Regression.LinearRegressionGenerator() + { + Descriptor = descriptor, + MaxIterations = maxIterations, + LearningRate = learningRate, + Lambda = lambda + }; + return generator; + }, + + getToPredict: () => + { + return new Tennis() + { + Outlook = Outlook.Rainy, + Temperature = Temperature.Low, + Windy = true + }; + }, + + getPredictionDesc: (t) => "Play: " + t.Play.ToString(), + + trainingPercentage: 0.8d, + + repeat: 10 + ); + } + + static void DoSimple_NaiveBayes(bool simpleData = true, int width = 2) + { + SimpleNumlWorkflowImpl( + getData: () => SampleData.GetTennisData(simpleData), + + getDescriptor: () => Descriptor.Create(), + + // This is where we choose the actual algorithm + getGenerator: (descriptor) => + { + var generator = new numl.Supervised.NaiveBayes.NaiveBayesGenerator(width) + { + Descriptor = descriptor, + }; + + return generator; + }, + + getToPredict: () => + { + return new Tennis() + { + Outlook = Outlook.Rainy, + Temperature = Temperature.Low, + Windy = true + }; + }, + + getPredictionDesc: (t) => "Play: " + t.Play.ToString(), + + trainingPercentage: 0.8d, + + repeat: 25 + ); + } + + /// + /// Do the SimpleNumlWorkflow using a LogisticRegression generator/model. + /// + /// If true, uses a preset small sample data set; else generates a larger, pseudo-random one. + /// + /// + /// + /// + static void DoSimple_LogisticRegression(bool simpleData = true, int polynomialFeatures = 5, + int maxIterations = 500, double learningRate = 0.03d, double lambda = 1) + { + SimpleNumlWorkflowImpl( + getData: () => SampleData.GetTennisData(simpleData), + + getDescriptor: () => Descriptor.Create(), + + // This is where we choose the actual algorithm + getGenerator: (descriptor) => + { + var generator = new numl.Supervised.Regression.LogisticRegressionGenerator() + { + Descriptor = descriptor, + PolynomialFeatures = polynomialFeatures, + MaxIterations = maxIterations, + LearningRate = learningRate, + Lambda = lambda + }; + + return generator; + }, + + getToPredict: () => + { + return new Tennis() + { + Outlook = Outlook.Rainy, + Temperature = Temperature.Low, + Windy = true + }; + }, + + getPredictionDesc: (t) => "Play: " + t.Play.ToString(), + + trainingPercentage: 0.8d, + + repeat: 25 + ); + } + + /// + /// Do the SimpleNumlWorkflow using a Perceptron generator/model. + /// + /// If true, uses a preset small sample data set; else generates a larger, pseudo-random one. + /// + static void DoSimple_Perceptron(bool simpleData = true, bool normalize = true) + { + SimpleNumlWorkflowImpl( + getData: () => SampleData.GetTennisData(simpleData), + + getDescriptor: () => Descriptor.Create(), + + // This is where we choose the actual algorithm + getGenerator: (descriptor) => + { + var generator = new numl.Supervised.Perceptron.PerceptronGenerator(normalize) + { + Descriptor = descriptor + }; + return generator; + }, + + getToPredict: () => + { + return new Tennis() + { + Outlook = Outlook.Rainy, + Temperature = Temperature.Low, + Windy = true + }; + }, + + getPredictionDesc: (t) => "Play: " + t.Play.ToString(), + + trainingPercentage: 0.8d, + + repeat: 1000 + ); + } + + /// + /// Do the SimpleNumlWorkflow using a Neural Network generator/model. + /// + /// If true, uses a preset small sample data set; else generates a larger, pseudo-random one. + /// + /// + static void DoSimple_NeuralNetwork(bool simpleData = true, int maxIterations = 10000, + double learningRate = 0.9d) + { + SimpleNumlWorkflowImpl( + getData: () => SampleData.GetTennisData(simpleData), + + getDescriptor: () => Descriptor.Create(), + + // This is where we choose the actual algorithm + getGenerator: (descriptor) => + { + var generator = new numl.Supervised.NeuralNetwork.NeuralNetworkGenerator() + { + Descriptor = descriptor, + MaxIterations = maxIterations, + LearningRate = learningRate + }; + return generator; + }, + + getToPredict: () => + { + return new Tennis() + { + Outlook = Outlook.Rainy, + Temperature = Temperature.Low, + Windy = true + }; + }, + + getPredictionDesc: (t) => "Play: " + t.Play.ToString(), + + trainingPercentage: 0.8d, + + repeat: 50 + ); + } + } +} diff --git a/Src/numlsample.GettingStarted/Program.cs b/Src/numlsample.GettingStarted/Program.cs new file mode 100644 index 0000000..644e4f3 --- /dev/null +++ b/Src/numlsample.GettingStarted/Program.cs @@ -0,0 +1,18 @@ +using numlsample.GettingStarted.ExampleCode; + +namespace numlsample.GettingStarted +{ + public class Program + { + public static void Main(string[] args) + { + // Comment/uncomment these to get the basics + + // Start here. + //QuickStart.Go(); + + // These examples use the same simple workflow with numl + SimpleNumlWorkflow.Go(); + } + } +} diff --git a/Src/numlsample.GettingStarted/Properties/AssemblyInfo.cs b/Src/numlsample.GettingStarted/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..89c0814 --- /dev/null +++ b/Src/numlsample.GettingStarted/Properties/AssemblyInfo.cs @@ -0,0 +1,18 @@ +using System.Reflection; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("numlsample.GettingStarted")] +[assembly: AssemblyTrademark("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("70589b72-0e1b-4d92-b92d-b0ee30b2e73c")] diff --git a/Src/numlsample.GettingStarted/Randm.cs b/Src/numlsample.GettingStarted/Randm.cs new file mode 100644 index 0000000..86706c1 --- /dev/null +++ b/Src/numlsample.GettingStarted/Randm.cs @@ -0,0 +1,251 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace numlsample.GettingStarted +{ + public class Randm + { + public Randm() + { + NewRandomizer(); + } + + private Random Randomizer { get; set; } + + #region Singleton Pattern Members + private static volatile Randm _Helper; + private static object _Lock = new object(); + public static Randm Helper + { + get + { + if (_Helper == null) + { + lock (_Lock) + { + if (_Helper == null) + _Helper = new Randm(); + } + } + + return _Helper; + } + } + #endregion + + public void NewRandomizer() + { + //var generator = new RNGCryptoServiceProvider(); + ////using (RNGCryptoServiceProvider generator = new RNGCryptoServiceProvider()) + //{ + // byte[] randomBytes = new byte[4]; + // generator.GetBytes(randomBytes); + // int randomInt = BitConverter.ToInt32(randomBytes, 0); + // Randomizer = new Random(randomInt); + //} + + Randomizer = new Random(DateTime.Now.Millisecond + DateTime.Now.Minute + DateTime.Now.Second * (DateTime.Now.Second + 1)); + } + + public T PickOne(T aObject, T bObject, double aWeight = 0.5d, double bWeight = 0.5d) + { + T dummy = default(T); + return PickOne(aObject, bObject, out dummy, aWeight, bWeight); + } + + public T PickOne(T aObject, T bObject, out T loser, double aWeight = 0.5d, double bWeight = 0.5d) + { + if (aObject == null) + throw new ArgumentNullException("aObject"); + if (bObject == null) + throw new ArgumentNullException("bObject"); + if (aWeight < 0) + throw new ArgumentException("aWeight"); + if (bWeight < 0) + throw new ArgumentException("bWeight"); + + double probabilityA = aWeight / (aWeight + bWeight); + var randomDouble = Randomizer.NextDouble(); + if (randomDouble < probabilityA) + { + loser = bObject; + return aObject; + } + else + { + loser = aObject; + return bObject; + } + } + public object PickOne(object aObject, object bObject, out object loser, double aWeight = 0.5d, + double bWeight = 0.5d) + { + return PickOne(aObject, bObject, out loser, aWeight, bWeight); + } + + public T PickOne(params WeightedOption[] weightedOptions) + { + if (weightedOptions == null) + throw new ArgumentNullException(nameof(weightedOptions)); + + var result = default(T); + + var nonZeroWeightedOptions = weightedOptions.Where(wo => wo.Weight > 0); + + //We're going to add up all the weights. Each of the options' weight will set the boundary + //of the number line that means that option is picked. + var weightSum = nonZeroWeightedOptions.Sum(wo => wo.Weight); + var pickedRandomNumber = Randomizer.NextDouble() * weightSum; + var optionsList = nonZeroWeightedOptions.ToList(); + var optionFound = false; + + for (int i = 0; i < optionsList.Count; i++) + { + var option = optionsList[i]; + var sumPrevOptionWeights = + i == 0 ? + 0 : + optionsList.Take(i).Sum(wo => wo.Weight); + var min = sumPrevOptionWeights; + var max = sumPrevOptionWeights + option.Weight; + //is the picked random number in our range of min/max? If so, then our option was chosen. + if (pickedRandomNumber >= min && + pickedRandomNumber <= max) + { + optionFound = true; + result = option.Obj; + break; + } + } + + if (!optionFound) + throw new InvalidOperationException("Option was not found in weighted random picking."); + + return result; + } + + public object PickOne(params WeightedOption[] weightedOptions) + { + var castedOptions = weightedOptions.Select(wo => WeightedOption._(wo.Weight, wo.Obj)); + var result = PickOne(castedOptions.ToArray()); + return result; + } + + public int NextInt(int minInclusive, int maxExclusive) + { + if (minInclusive >= maxExclusive) + throw new InvalidOperationException($"{nameof(minInclusive)} must be less than {nameof(maxExclusive)}"); + + return Randomizer.Next(minInclusive, maxExclusive); + } + + public double NextDouble(double minInclusive, double maxExclusive) + { + if (minInclusive >= maxExclusive) + throw new InvalidOperationException($"{nameof(minInclusive)} must be less than {nameof(maxExclusive)}"); + + //thanks! http://www.experts-exchange.com/Programming/Languages/C_Sharp/Q_28243467.html + var d = Randomizer.NextDouble(); + + var result = ((d * (maxExclusive - minInclusive)) + minInclusive); + + return result; + } + + public T PickOne(IEnumerable enumerable) + { + if (enumerable == null) + throw new ArgumentNullException("enumerable"); + + var count = enumerable.Count(); + if (count == 0) + return default(T); + + var i = NextInt(0, count); + return enumerable.ElementAt(i); + } + + /// + /// Picks random letters given a number of letters to pick. (uppercase and lowercase) + /// + /// + /// + public string PickLetters(int numOfLetters) + { + if (numOfLetters <= 0) + throw new ArgumentException($"{nameof(numOfLetters)} must be greater than 0."); + + var letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + string randomString = ""; + + for (int i = 0; i < numOfLetters; i++) + randomString += PickRandomCharacter(letters); + + + return randomString; + } + + public string PickRandomInt_RingAround() + { + if (PickBool(0.5)) + { + var result = @"2035"; + return result; + } + else + { + var result = @"2035"; + return result; + } + } + + public char PickRandomCharacter(string source) + { + if (source == null) + throw new ArgumentNullException("source"); + + var randomIndex = Randomizer.Next(0, source.Length); + var randomChar = source[randomIndex]; + return randomChar; + } + + public bool PickBool(double probabilityTrue) + { + var next = numl.Math.Probability.Sampling.GetUniform(); + //var next = Randomizer.NextDouble(); + + var result = (next <= probabilityTrue); + + return result; + } + } + + public class WeightedOption + { + public static WeightedOption _(double weight, T obj) + { + var result = new WeightedOption() + { + Weight = weight, + Obj = obj + }; + return result; + } + public double Weight { get; set; } + public T Obj { get; set; } + } + + public class WeightedOption : WeightedOption + { + public new static WeightedOption _(double weight, object obj) + { + var result = new WeightedOption() + { + Weight = weight, + Obj = obj + }; + return result; + } + } +} diff --git a/Src/numlsample.GettingStarted/numlsample.GettingStarted.csproj b/Src/numlsample.GettingStarted/numlsample.GettingStarted.csproj new file mode 100644 index 0000000..b890e1a --- /dev/null +++ b/Src/numlsample.GettingStarted/numlsample.GettingStarted.csproj @@ -0,0 +1,21 @@ + + + + netcoreapp1.0 + numlsample.GettingStarted + Exe + numlsample.GettingStarted + 1.0.4 + $(PackageTargetFallback);dnxcore50 + false + false + false + + + + + + + + +