From ec55732ec0a816e347b1e6951b0d78999b016338 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 30 May 2024 12:00:28 +0200 Subject: [PATCH 01/15] first commit --- src/datasets/vision/stacked_mnist.jl | 56 ++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 src/datasets/vision/stacked_mnist.jl diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl new file mode 100644 index 00000000..ab50aa26 --- /dev/null +++ b/src/datasets/vision/stacked_mnist.jl @@ -0,0 +1,56 @@ +using Images +using DataDeps +using Flux +using Random + +# Define the StackedMNIST type +struct StackedMNIST + features::Array{<:Any, 3} + targets::Vector{Int} + index::Vector{Tuple{Int, Int, Int}} +end + +function StackedMNIST( + Tx::Type, + split::Symbol = :train; dir = nothing) + mnist = MNIST(Tx, split; dir = dir) + features = mnist.features + targets = vec(mnist.targets) + + num_images = 2 * size(features, 3) + index1 = vcat(1:size(features, 3), 1:size(features, 3)) + index2 = vcat(1:size(features, 3), 1:size(features, 3)) + index3 = vcat(1:size(features, 3), 1:size(features, 3)) + + Random.shuffle!(index1) + Random.shuffle!(index2) + Random.shuffle!(index3) + + index = [(index1[i], index2[i], index3[i]) for i in 1:num_images] + + StackedMNIST(features, targets, index) +end + +# Define the length function +Base.length(sm::StackedMNIST) = sm.num_images + +# Define the getindex function +function Base.getindex(sm::StackedMNIST, idx::Int) + img = zeros(N0f8, 28, 28, 3) + target = 0 + + for i in 1:3 + img_ = sm.features[:, :, sm.index[idx][i]] + target_ = sm.targets[sm.index[idx][i]] + img[:, :, i] .= N0f8.(img_) + target += target_ * 10^(2 - (i - 1)) + end + + # Manually construct the RGB image + red_channel = img[:, :, 1] + green_channel = img[:, :, 2] + blue_channel = img[:, :, 3] + rgb_img = RGB.(red_channel, green_channel, blue_channel) + + return rgb_img, target +end From 1a381bb2c77060f7b3eaa18acfb2cc3b09e3a6db Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 30 May 2024 12:02:29 +0200 Subject: [PATCH 02/15] first commit --- src/datasets/vision/stacked_mnist.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl index ab50aa26..f561fbfb 100644 --- a/src/datasets/vision/stacked_mnist.jl +++ b/src/datasets/vision/stacked_mnist.jl @@ -1,8 +1,3 @@ -using Images -using DataDeps -using Flux -using Random - # Define the StackedMNIST type struct StackedMNIST features::Array{<:Any, 3} From cdf7216b86819c56f763128b25005e4327c93378 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 30 May 2024 14:36:34 +0200 Subject: [PATCH 03/15] first commit --- src/datasets/vision/stacked_mnist.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl index f561fbfb..4265883a 100644 --- a/src/datasets/vision/stacked_mnist.jl +++ b/src/datasets/vision/stacked_mnist.jl @@ -1,10 +1,17 @@ # Define the StackedMNIST type -struct StackedMNIST +struct StackedMNIST <: SupervisedDataset features::Array{<:Any, 3} targets::Vector{Int} index::Vector{Tuple{Int, Int, Int}} end +# Convenience constructors for StackedMNIST +function StackedMNIST(; split = :train, Tx = UInt8, dir = nothing) + StackedMNIST(Tx = Tx, split = split, dir = dir) +end +StackedMNIST(split::Symbol; kws...) = StackedMNIST(; split, kws...) +StackedMNIST(Tx::Type; kws...) = StackedMNIST(; Tx, kws...) + function StackedMNIST( Tx::Type, split::Symbol = :train; dir = nothing) From 203558f912469c827ddb6edc222dccf3c42dc6b8 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 30 May 2024 14:45:24 +0200 Subject: [PATCH 04/15] first commit --- src/datasets/vision/stacked_mnist.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl index 4265883a..b186ec12 100644 --- a/src/datasets/vision/stacked_mnist.jl +++ b/src/datasets/vision/stacked_mnist.jl @@ -1,14 +1,11 @@ # Define the StackedMNIST type -struct StackedMNIST <: SupervisedDataset +struct StackedMNIST features::Array{<:Any, 3} targets::Vector{Int} index::Vector{Tuple{Int, Int, Int}} end # Convenience constructors for StackedMNIST -function StackedMNIST(; split = :train, Tx = UInt8, dir = nothing) - StackedMNIST(Tx = Tx, split = split, dir = dir) -end StackedMNIST(split::Symbol; kws...) = StackedMNIST(; split, kws...) StackedMNIST(Tx::Type; kws...) = StackedMNIST(; Tx, kws...) From 678a5592dd128205eba2f8fd604def6022b7bb48 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Tue, 13 Aug 2024 18:08:23 +0200 Subject: [PATCH 05/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- docs/src/datasets/vision.md | 1 + src/MLDatasets.jl | 5 ++ src/datasets/vision/stacked_mnist.jl | 101 +++++++++++++++++++++++---- 3 files changed, 95 insertions(+), 12 deletions(-) diff --git a/docs/src/datasets/vision.md b/docs/src/datasets/vision.md index f6ab6f28..b776e871 100644 --- a/docs/src/datasets/vision.md +++ b/docs/src/datasets/vision.md @@ -25,6 +25,7 @@ CIFAR100 EMNIST FashionMNIST MNIST +Stacked_MNIST Omniglot SVHN2 ``` diff --git a/src/MLDatasets.jl b/src/MLDatasets.jl index 412b79c6..28f693d9 100644 --- a/src/MLDatasets.jl +++ b/src/MLDatasets.jl @@ -13,6 +13,8 @@ using FileIO import CSV using LazyModules: @lazy using Statistics +using Random +using Colors include("require.jl") # export @require @@ -90,6 +92,8 @@ export FashionMNIST include("datasets/vision/mnist_reader/MNISTReader.jl") include("datasets/vision/mnist.jl") export MNIST +include("datasets/vision/stacked_mnist.jl") +export StackedMNIST include("datasets/vision/omniglot.jl") export Omniglot include("datasets/vision/svhn2.jl") @@ -175,6 +179,7 @@ function __init__() __init__emnist() __init__fashionmnist() __init__mnist() + __init__stackedmist() __init__omniglot() __init__svhn2() diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl index b186ec12..a2eb0671 100644 --- a/src/datasets/vision/stacked_mnist.jl +++ b/src/datasets/vision/stacked_mnist.jl @@ -1,33 +1,110 @@ -# Define the StackedMNIST type -struct StackedMNIST +function __init__stackedmist() + DEPNAME = "StackedMNIST" + TRAINIMAGES = "train-images-idx3-ubyte.gz" + TRAINLABELS = "train-labels-idx1-ubyte.gz" + TESTIMAGES = "t10k-images-idx3-ubyte.gz" + TESTLABELS = "t10k-labels-idx1-ubyte.gz" + register(DataDep(DEPNAME, + """Dataset: The Stacked MNIST dataset is derived from the standard MNIST dataset with an increased number of discrete modes. 240,000 RGB images in the size of 28×28 are synthesized by stacking three random digit images from MNIST along the color channel, resulting in 1,000 explicit modes in a uniform distribution corresponding to the number of possible triples of digits. + Authors: Metz et al. + Website: https://paperswithcode.com/dataset/stacked-mnist + + [Metz L et al., 2016] + Metz L, Poole B, Pfau D, Sohl-Dickstein J. Unrolled generative adversarial networks. arXiv preprint arXiv:1611.02163. 2016 Nov 7. + """, + "", + [TRAINIMAGES, TRAINLABELS, TESTIMAGES, TESTLABELS] + )) +end + +""" + StackedMNIST(; Tx=Float32, split=:train, dir=nothing) + StackedMNIST([Tx, split]) + +The StackedMNIST dataset is a variant of the classic MNIST dataset where each observation is a combination of three randomly shuffled MNIST digits, stacked as RGB channels. + +# Arguments + +- `Tx`: The data type for the features. Defaults to `Float32`. If `Tx <: Integer`, the features will range between 0 and 255; otherwise, they will be scaled between 0 and 1. +- `split`: The data partition to load, either `:train` or `:test`. Defaults to `:train`. +- `dir`: The directory where the dataset is stored. If `nothing`, the default location is used. + +# Fields + +- `features`: A 3D array of MNIST images with dimensions `(28, 28, num_images)`, where `num_images` is twice the number of images in the selected split due to the shuffling and stacking process. +- `targets`: A vector of integers, where each integer represents the combined label for the stacked RGB image. +- `index`: A vector of tuples, each containing three integers. These tuples indicate which original MNIST images were combined to create each RGB image. + +# Methods + +- [`convert2image`](@ref) converts features to RGB images. +- `Base.length(sm::StackedMNIST)`: Returns the number of images in the dataset. +- `Base.getindex(sm::StackedMNIST, idx::Int)`: Returns the RGB image and its corresponding target label at the specified index. + +# Examples + +The images in the StackedMNIST dataset are loaded as a multi-dimensional array of type `Tx`. The dataset's `features` field is a 3D array in WHN format (width, height, num_images). Labels are stored as a vector of integers in `StackedMNIST().targets`. The images are constructed by stacking three randomly chosen MNIST digits as RGB channels. Resulting in 1,000 explicit modes in a uniform distribution corresponding to the number of possible triples of digits. + +```julia-repl +julia> using MLDatasets: StackedMNIST + +julia> dataset = StackedMNIST(:train) +StackedMNIST: +features => 28×28×60000 Array{Float32, 3} +targets => 60000-element Vector{Int64} +index => 60000-element Vector{Tuple{Int, Int, Int}} + +julia> dataset[1:5].targets +5-element Vector{Int64}: +721 +238 +153 +409 +745 + +julia> img, label = dataset[1] +RGB Image with dimensions 28×28, label: 721 + +julia> dataset = StackedMNIST(UInt8, :test) +StackedMNIST: + features => 28×28×10000 Array{UInt8, 3} + split => :test + targets => 10000-element Vector{Int64} + index => 10000-element Vector{Tuple{Int, Int, Int}} +``` +""" +struct StackedMNIST <: SupervisedDataset features::Array{<:Any, 3} - targets::Vector{Int} + split::Symbol index::Vector{Tuple{Int, Int, Int}} end # Convenience constructors for StackedMNIST +StackedMNIST(; split = :train, Tx = Float32, dir = nothing) = StackedMNIST(Tx, split; dir) StackedMNIST(split::Symbol; kws...) = StackedMNIST(; split, kws...) StackedMNIST(Tx::Type; kws...) = StackedMNIST(; Tx, kws...) function StackedMNIST( Tx::Type, - split::Symbol = :train; dir = nothing) + split::Symbol = :train, + ; dir = nothing) mnist = MNIST(Tx, split; dir = dir) features = mnist.features - targets = vec(mnist.targets) + split = mnist.split + #targets = vec(mnist.targets) num_images = 2 * size(features, 3) index1 = vcat(1:size(features, 3), 1:size(features, 3)) index2 = vcat(1:size(features, 3), 1:size(features, 3)) index3 = vcat(1:size(features, 3), 1:size(features, 3)) - Random.shuffle!(index1) - Random.shuffle!(index2) - Random.shuffle!(index3) + shuffle!(index1) + shuffle!(index2) + shuffle!(index3) index = [(index1[i], index2[i], index3[i]) for i in 1:num_images] - StackedMNIST(features, targets, index) + StackedMNIST(features, split, index) end # Define the length function @@ -40,16 +117,16 @@ function Base.getindex(sm::StackedMNIST, idx::Int) for i in 1:3 img_ = sm.features[:, :, sm.index[idx][i]] - target_ = sm.targets[sm.index[idx][i]] + #target_ = sm.targets[sm.index[idx][i]] img[:, :, i] .= N0f8.(img_) - target += target_ * 10^(2 - (i - 1)) + #target += target_ * 10^(2 - (i - 1)) end # Manually construct the RGB image red_channel = img[:, :, 1] green_channel = img[:, :, 2] blue_channel = img[:, :, 3] - rgb_img = RGB.(red_channel, green_channel, blue_channel) + rgb_img = Colors.RGB.(red_channel, green_channel, blue_channel) return rgb_img, target end From eca720cb808506a6f7054dd8b1c8351c6a9decfa Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 15 Aug 2024 14:02:32 +0200 Subject: [PATCH 06/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- src/datasets/vision/stacked_mnist.jl | 73 ++++++++++++++++------------ test/runtests.jl | 13 +++-- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl index a2eb0671..6dafc91f 100644 --- a/src/datasets/vision/stacked_mnist.jl +++ b/src/datasets/vision/stacked_mnist.jl @@ -74,9 +74,10 @@ StackedMNIST: ``` """ struct StackedMNIST <: SupervisedDataset - features::Array{<:Any, 3} + features::Any split::Symbol - index::Vector{Tuple{Int, Int, Int}} + targets::Vector{Tuple{Int, Int, Int}} + size::Int end # Convenience constructors for StackedMNIST @@ -87,46 +88,58 @@ StackedMNIST(Tx::Type; kws...) = StackedMNIST(; Tx, kws...) function StackedMNIST( Tx::Type, split::Symbol = :train, - ; dir = nothing) + ; size = 60000, dir = nothing) mnist = MNIST(Tx, split; dir = dir) - features = mnist.features split = mnist.split - #targets = vec(mnist.targets) - num_images = 2 * size(features, 3) - index1 = vcat(1:size(features, 3), 1:size(features, 3)) - index2 = vcat(1:size(features, 3), 1:size(features, 3)) - index3 = vcat(1:size(features, 3), 1:size(features, 3)) + mnist_targets = vec(mnist.targets) + targets = Vector{Tuple{Int, Int, Int}}(undef, size) + features = Array{Tx, 4}(undef, 28, 28, 3, size) + # Randomly select 3 numbers from the list 60,000 times and store them as tuples - shuffle!(index1) - shuffle!(index2) - shuffle!(index3) + function random_three_unique(vec) + indices = randperm(length(vec))[1:3] + return (vec[indices[1]], vec[indices[2]], vec[indices[3]]) + end - index = [(index1[i], index2[i], index3[i]) for i in 1:num_images] + for i in 1:size + label1, label2, label3 = random_three_unique(mnist_targets) + index1 = findall(x -> x == label1, mnist_targets) + random_index1 = rand(index1) + red_channel = mnist.features[:, :, random_index1] + + index2 = findall(x -> x == label2, mnist_targets) + random_index2 = rand(index2) + green_channel = mnist.features[:, :, random_index2] + + index3 = findall(x -> x == label3, mnist_targets) + random_index3 = rand(index3) + blue_channel = mnist.features[:, :, random_index3] + + targets[i] = label1, label2, label3 + # Combine the channels into an RGB image and store in the features array + features[:, :, 1, i] = red_channel + features[:, :, 2, i] = green_channel + features[:, :, 3, i] = blue_channel + end - StackedMNIST(features, split, index) + StackedMNIST(features, split, targets, size) end # Define the length function -Base.length(sm::StackedMNIST) = sm.num_images +Base.length(sm::StackedMNIST) = sm.size # Define the getindex function function Base.getindex(sm::StackedMNIST, idx::Int) - img = zeros(N0f8, 28, 28, 3) - target = 0 - - for i in 1:3 - img_ = sm.features[:, :, sm.index[idx][i]] - #target_ = sm.targets[sm.index[idx][i]] - img[:, :, i] .= N0f8.(img_) - #target += target_ * 10^(2 - (i - 1)) - end + return sm.features[idx], d.targets[idx] +end - # Manually construct the RGB image - red_channel = img[:, :, 1] - green_channel = img[:, :, 2] - blue_channel = img[:, :, 3] - rgb_img = Colors.RGB.(red_channel, green_channel, blue_channel) +# Function to extract and show an RGB image +function show_rgb_image(features, index) + red_channel = features[:, :, 1, index] # Extract and convert red channel + green_channel = features[:, :, 2, index] # Extract and convert green channel + blue_channel = features[:, :, 3, index] # Extract and convert blue channel - return rgb_img, target + img_rgb = Colors.RGB.(red_channel, green_channel, blue_channel) # Combine into RGB image + return img_rgb # Plot as an RGB image end diff --git a/test/runtests.jl b/test/runtests.jl index f1111310..fe81115d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,6 +19,7 @@ dataset_tests = [ "datasets/text.jl", "datasets/vision/fashion_mnist.jl", "datasets/vision/mnist.jl", + "datasets/vision/stacked_mnist.jl" ] no_ci_dataset_tests = [ @@ -29,7 +30,7 @@ no_ci_dataset_tests = [ "datasets/vision/emnist.jl", "datasets/vision/omniglot.jl", "datasets/vision/svhn2.jl", - "datasets/meshes.jl", + "datasets/meshes.jl" ] @assert isempty(intersect(dataset_tests, no_ci_dataset_tests)) @@ -39,7 +40,7 @@ container_tests = [ # "containers/tabledataset.jl", # "containers/hdf5dataset.jl", # "containers/jld2dataset.jl", - "containers/cacheddataset.jl", + "containers/cacheddataset.jl" ] @testset "Datasets" begin @@ -57,8 +58,10 @@ container_tests = [ end end -@testset "Containers" begin for t in container_tests - include(t) -end end +@testset "Containers" begin + for t in container_tests + include(t) + end +end nothing From ea66be1b23767db4496864ed6146be8e4f272de9 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 15 Aug 2024 18:18:03 +0200 Subject: [PATCH 07/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- src/datasets/vision/stacked_mnist.jl | 51 +++++++++++++++++----------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl index 6dafc91f..6b77550d 100644 --- a/src/datasets/vision/stacked_mnist.jl +++ b/src/datasets/vision/stacked_mnist.jl @@ -31,46 +31,44 @@ The StackedMNIST dataset is a variant of the classic MNIST dataset where each ob # Fields -- `features`: A 3D array of MNIST images with dimensions `(28, 28, num_images)`, where `num_images` is twice the number of images in the selected split due to the shuffling and stacking process. -- `targets`: A vector of integers, where each integer represents the combined label for the stacked RGB image. -- `index`: A vector of tuples, each containing three integers. These tuples indicate which original MNIST images were combined to create each RGB image. +- `features`: A 4D array of MNIST images with dimensions `(28, 28, 3, num_images)`, where `num_images` is the number of images in the selected split. +- `targets`: A vector of tuples, each containing three integers representing the combined labels for the stacked RGB image. +- `size`: The total number of images in the dataset. # Methods -- [`convert2image`](@ref) converts features to RGB images. +- `convert2image`: Converts feature arrays to RGB images. - `Base.length(sm::StackedMNIST)`: Returns the number of images in the dataset. - `Base.getindex(sm::StackedMNIST, idx::Int)`: Returns the RGB image and its corresponding target label at the specified index. # Examples -The images in the StackedMNIST dataset are loaded as a multi-dimensional array of type `Tx`. The dataset's `features` field is a 3D array in WHN format (width, height, num_images). Labels are stored as a vector of integers in `StackedMNIST().targets`. The images are constructed by stacking three randomly chosen MNIST digits as RGB channels. Resulting in 1,000 explicit modes in a uniform distribution corresponding to the number of possible triples of digits. +The images in the StackedMNIST dataset are loaded as a multi-dimensional array of type `Tx`. The dataset's `features` field is a 4D array in WHCN format (width, height, channels, num_images). Labels are stored as a vector of tuples in `StackedMNIST().targets`. The images are constructed by stacking three randomly chosen MNIST digits as RGB channels, resulting in 1,000 explicit modes corresponding to the number of possible triples of digits. ```julia-repl julia> using MLDatasets: StackedMNIST julia> dataset = StackedMNIST(:train) StackedMNIST: -features => 28×28×60000 Array{Float32, 3} -targets => 60000-element Vector{Int64} -index => 60000-element Vector{Tuple{Int, Int, Int}} +features => 28×28×3×60000 Array{Float32, 4} +targets => 60000-element Vector{Tuple{Int, Int, Int}} julia> dataset[1:5].targets -5-element Vector{Int64}: -721 -238 -153 -409 -745 +5-element Vector{Tuple{Int, Int, Int}}: +(7, 2, 1) +(2, 3, 8) +(1, 5, 3) +(4, 0, 9) +(7, 4, 5) julia> img, label = dataset[1] -RGB Image with dimensions 28×28, label: 721 +RGB Image with dimensions 28×28, label: (7, 2, 1) julia> dataset = StackedMNIST(UInt8, :test) StackedMNIST: - features => 28×28×10000 Array{UInt8, 3} + features => 28×28×3×10000 Array{UInt8, 4} split => :test - targets => 10000-element Vector{Int64} - index => 10000-element Vector{Tuple{Int, Int, Int}} + targets => 10000-element Vector{Tuple{Int, Int, Int}} ``` """ struct StackedMNIST <: SupervisedDataset @@ -84,6 +82,9 @@ end StackedMNIST(; split = :train, Tx = Float32, dir = nothing) = StackedMNIST(Tx, split; dir) StackedMNIST(split::Symbol; kws...) = StackedMNIST(; split, kws...) StackedMNIST(Tx::Type; kws...) = StackedMNIST(; Tx, kws...) +function StackedMNIST(size::Integer; split = :train, Tx = Float32, dir = nothing) + StackedMNIST(Tx, split; size = size, dir = dir) +end function StackedMNIST( Tx::Type, @@ -131,7 +132,7 @@ Base.length(sm::StackedMNIST) = sm.size # Define the getindex function function Base.getindex(sm::StackedMNIST, idx::Int) - return sm.features[idx], d.targets[idx] + return sm.features[idx], sm.targets[idx] end # Function to extract and show an RGB image @@ -143,3 +144,15 @@ function show_rgb_image(features, index) img_rgb = Colors.RGB.(red_channel, green_channel, blue_channel) # Combine into RGB image return img_rgb # Plot as an RGB image end + +function convert2image(::Type{<:StackedMNIST}, x::AbstractArray{<:Integer}) + # Reinterpret the input array as N0f8 and convert it to StackedMNIST-compatible format + return convert2image(StackedMNIST, reinterpret(N0f8, convert(Array{UInt8}, x))) +end + +function convert2image(::Type{<:StackedMNIST}, x::AbstractArray{T, N}) where {T, N} + @assert N == 3 || N == 4 + x = permutedims(x, (2, 1, 3:N...)) + img_rgb = Colors.RGB{T}.(x[:, :, 1, :], x[:, :, 2, :], x[:, :, 3, :]) + return reshape(img_rgb, size(img_rgb, 1), size(img_rgb, 2), size(img_rgb, 3)) +end From ce0c13956c4b3c93b1f69dddd8e4355d59d527b7 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 15 Aug 2024 18:22:27 +0200 Subject: [PATCH 08/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 2e69ae02..e5e77998 100644 --- a/Project.toml +++ b/Project.toml @@ -22,6 +22,7 @@ MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605" Pickle = "fbb45041-c46e-462f-888f-7c521cafbc2c" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" From 26bc517d906ea0bbc5eceaab8616e68d0da0fdd5 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 15 Aug 2024 18:26:47 +0200 Subject: [PATCH 09/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index e5e77998..a5ad13f2 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.7.14" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" Chemfiles = "46823bd8-5fb3-5f92-9aa0-96921f3dd015" +Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" From e25b882f66d1ff3b7fb73ddaf3231298db652d05 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 15 Aug 2024 18:46:18 +0200 Subject: [PATCH 10/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index fe81115d..aaff0628 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -45,6 +45,7 @@ container_tests = [ @testset "Datasets" begin @testset "$(split(t,"/")[end])" for t in dataset_tests + @info "Including $t" include(t) end From fe700f52c2cceb70d9771201ae370eab3349f67d Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 15 Aug 2024 19:00:28 +0200 Subject: [PATCH 11/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- test/runtests.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index aaff0628..828a06bc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,8 +18,7 @@ dataset_tests = [ "datasets/misc.jl", "datasets/text.jl", "datasets/vision/fashion_mnist.jl", - "datasets/vision/mnist.jl", - "datasets/vision/stacked_mnist.jl" + "datasets/vision/mnist.jl" #"datasets/vision/stacked_mnist.jl" ] no_ci_dataset_tests = [ From fc5449fdc83616b654a7dc6d60d9e13d878cb713 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 15 Aug 2024 19:08:10 +0200 Subject: [PATCH 12/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- test/runtests.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 828a06bc..aaff0628 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,7 +18,8 @@ dataset_tests = [ "datasets/misc.jl", "datasets/text.jl", "datasets/vision/fashion_mnist.jl", - "datasets/vision/mnist.jl" #"datasets/vision/stacked_mnist.jl" + "datasets/vision/mnist.jl", + "datasets/vision/stacked_mnist.jl" ] no_ci_dataset_tests = [ From abf4e70af0bc5d1feb7a48a0bbdd9483c9c65816 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Thu, 15 Aug 2024 19:15:56 +0200 Subject: [PATCH 13/15] including dataset in src/MLDatasets.jl and docstring and an entry in the documentation in docs/src/datasets --- docs/src/datasets/vision.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/datasets/vision.md b/docs/src/datasets/vision.md index b776e871..f3ba58f2 100644 --- a/docs/src/datasets/vision.md +++ b/docs/src/datasets/vision.md @@ -25,7 +25,7 @@ CIFAR100 EMNIST FashionMNIST MNIST -Stacked_MNIST +StackedMNIST Omniglot SVHN2 ``` From 6280f185047b33d125d2223ea496093907e63f42 Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 12:24:35 +0200 Subject: [PATCH 14/15] adding tests --- src/datasets/vision/stacked_mnist.jl | 4 +++- test/runtests.jl | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl index 6b77550d..1962f6ca 100644 --- a/src/datasets/vision/stacked_mnist.jl +++ b/src/datasets/vision/stacked_mnist.jl @@ -79,7 +79,9 @@ struct StackedMNIST <: SupervisedDataset end # Convenience constructors for StackedMNIST -StackedMNIST(; split = :train, Tx = Float32, dir = nothing) = StackedMNIST(Tx, split; dir) +function StackedMNIST(; split = :train, Tx = Float32, size = 60000, dir = nothing) + StackedMNIST(Tx, split; size, dir) +end StackedMNIST(split::Symbol; kws...) = StackedMNIST(; split, kws...) StackedMNIST(Tx::Type; kws...) = StackedMNIST(; Tx, kws...) function StackedMNIST(size::Integer; split = :train, Tx = Float32, dir = nothing) diff --git a/test/runtests.jl b/test/runtests.jl index aaff0628..260cdb1b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,7 +21,7 @@ dataset_tests = [ "datasets/vision/mnist.jl", "datasets/vision/stacked_mnist.jl" ] - +#= no_ci_dataset_tests = [ "datasets/graphs_no_ci.jl", "datasets/text_no_ci.jl", @@ -32,7 +32,8 @@ no_ci_dataset_tests = [ "datasets/vision/svhn2.jl", "datasets/meshes.jl" ] - +=# +#= @assert isempty(intersect(dataset_tests, no_ci_dataset_tests)) container_tests = [ @@ -42,6 +43,7 @@ container_tests = [ # "containers/jld2dataset.jl", "containers/cacheddataset.jl" ] +=# @testset "Datasets" begin @testset "$(split(t,"/")[end])" for t in dataset_tests From 6765b0193dccbd97462e13c7d7b51af4b894641e Mon Sep 17 00:00:00 2001 From: josemanuel22 Date: Fri, 16 Aug 2024 13:17:06 +0200 Subject: [PATCH 15/15] adding tests --- src/datasets/vision/stacked_mnist.jl | 2 +- test/runtests.jl | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/datasets/vision/stacked_mnist.jl b/src/datasets/vision/stacked_mnist.jl index 1962f6ca..5d260aba 100644 --- a/src/datasets/vision/stacked_mnist.jl +++ b/src/datasets/vision/stacked_mnist.jl @@ -134,7 +134,7 @@ Base.length(sm::StackedMNIST) = sm.size # Define the getindex function function Base.getindex(sm::StackedMNIST, idx::Int) - return sm.features[idx], sm.targets[idx] + return (features = sm.features[:, :, :, idx], targets = sm.targets[idx]) end # Function to extract and show an RGB image diff --git a/test/runtests.jl b/test/runtests.jl index 260cdb1b..aaff0628 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,7 +21,7 @@ dataset_tests = [ "datasets/vision/mnist.jl", "datasets/vision/stacked_mnist.jl" ] -#= + no_ci_dataset_tests = [ "datasets/graphs_no_ci.jl", "datasets/text_no_ci.jl", @@ -32,8 +32,7 @@ no_ci_dataset_tests = [ "datasets/vision/svhn2.jl", "datasets/meshes.jl" ] -=# -#= + @assert isempty(intersect(dataset_tests, no_ci_dataset_tests)) container_tests = [ @@ -43,7 +42,6 @@ container_tests = [ # "containers/jld2dataset.jl", "containers/cacheddataset.jl" ] -=# @testset "Datasets" begin @testset "$(split(t,"/")[end])" for t in dataset_tests