From 7f97812915c194a3c42857e576dc0bf2a2fde06b Mon Sep 17 00:00:00 2001 From: Nuno Fachada Date: Sat, 30 May 2020 16:59:09 +0100 Subject: [PATCH] Update citation reference --- README.md | 23 +++++++++++++++++++---- generateData.m | 24 ++++++++++++++---------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 7b1a5e5..e3f48be 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ [![Latest release](https://img.shields.io/github/release/fakenmc/generateData.svg)](https://github.com/fakenmc/generateData/releases) [![MIT Licence](https://img.shields.io/badge/license-MIT-yellowgreen.svg)](https://opensource.org/licenses/MIT/) +[![View Generate Data for Clustering on File Exchange](https://www.mathworks.com/matlabcentral/images/matlab-file-exchange.svg)](https://www.mathworks.com/matlabcentral/fileexchange/37435-generate-data-for-clustering) # generateData @@ -100,14 +101,28 @@ rand("state", 123); randn("state", 123); ``` +## Previous behaviors and reproducibility of results + +Before [v2.0.0](https://github.com/fakenmc/generateData/tree/v2.0.0), lines +supporting clusters were parameterized with slopes instead of angles. We found +this caused difficulties when choosing line orientation, thus the change to +angles, which are much easier to work with. +Version [v1.3.0](https://github.com/fakenmc/generateData/tree/v1.3.0) still +uses slopes, for those who prefer this behavior. + +For reproducing results in studies published before May 2020, use version +[v1.2.0](https://github.com/fakenmc/generateData/tree/v1.2.0) instead. +Subsequent versions were optimized in a way that changed the order in which +the required random values are generated, thus producing slightly different +results. + ## Reference If you use this function in your work, please cite the following reference: -- Fachada, N., Figueiredo, M.A.T., Lopes, V.V., Martins, R.C., Rosa, -A.C., [Spectrometric differentiation of yeast strains using minimum volume -increase and minimum direction change clustering criteria](http://www.sciencedirect.com/science/article/pii/S0167865514000889), -Pattern Recognition Letters, vol. 45, pp. 55-61 (2014), doi: http://dx.doi.org/10.1016/j.patrec.2014.03.008 +- Fachada, N., & Rosa, A. C. (2020). +[generateData—A 2D data generator](https://doi.org/10.1016/j.simpa.2020.100017). +Software Impacts, 4:100017. doi: [10.1016/j.simpa.2020.100017](https://doi.org/10.1016/j.simpa.2020.100017) ## License diff --git a/generateData.m b/generateData.m index 94b9477..e17fd0a 100644 --- a/generateData.m +++ b/generateData.m @@ -11,11 +11,11 @@ totalPoints, ... varargin ... ) -% GENERATEDATA Generates 2D data for clustering. Data is created along +% GENERATEDATA Generates 2D data for clustering. Data is created along % straight lines, which can be more or less parallel % depending on the angleStd parameter. % -% [data clustPoints idx centers angles lengths] = +% [data clustPoints idx centers angles lengths] = % GENERATEDATA(angleMean, angleStd, numClusts, xClustAvgSep, ... % yClustAvgSep, lengthMean, lengthStd, lateralStd, ... % totalPoints, ...) @@ -31,7 +31,7 @@ % Line lengths are drawn from the folded normal % distribution. % lengthStd - Standard deviation of line lengths. -% lateralStd - Cluster "fatness", i.e., the standard deviation of the +% lateralStd - Cluster "fatness", i.e., the standard deviation of the % distance from each point to its projection on the % line. The way this distance is obtained is controlled by % the optional 'pointOffset' parameter. @@ -64,9 +64,9 @@ % of each point. % centers - Matrix (numClusts x 2) containing centers from where % clusters were generated. -% angles - Vector (numClusts x 1) containing the effective angles +% angles - Vector (numClusts x 1) containing the effective angles % of the lines used to generate clusters. -% lengths - Vector (numClusts x 1) containing the effective lengths +% lengths - Vector (numClusts x 1) containing the effective lengths % of the lines used to generate clusters. % % ---------------------------------------------------------- @@ -74,8 +74,8 @@ % % [data cp idx] = GENERATEDATA(pi / 2, pi / 8, 5, 15, 15, 5, 1, 2, 200); % -% This creates 5 clusters with a total of 200 points, with a mean angle -% of pi/2 (std=pi/8), separated in average by 15 units in both x and y +% This creates 5 clusters with a total of 200 points, with a mean angle +% of pi/2 (std=pi/8), separated in average by 15 units in both x and y % directions, with mean length of 5 units (std=1) and a "fatness" or % spread of 2 units. % @@ -84,8 +84,12 @@ % scatter(data(:, 1), data(:, 2), 8, idx); % Copyright (c) 2012-2020 Nuno Fachada -% Distributed under the MIT License (See accompanying file LICENSE or copy +% Distributed under the MIT License (See accompanying file LICENSE or copy % at http://opensource.org/licenses/MIT) +% +% Reference: +% Fachada, N., & Rosa, A. C. (2020). generateData—A 2D data generator. +% Software Impacts, 4:100017. doi: 10.1016/j.simpa.2020.100017 % Known distributions for sampling points along lines pointDists = {'unif', 'norm'}; @@ -225,7 +229,7 @@ % each point perpAngles = angles(i) + sign(points_dist) * pi / 2; perpVecs = [cos(perpAngles) sin(perpAngles)]; - + % Set vector magnitudes perpVecs = abs(points_dist) .* perpVecs; @@ -253,4 +257,4 @@ % Update idx idx(cumSumPoints(i) + 1 : cumSumPoints(i + 1)) = i; -end; \ No newline at end of file +end;