Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ImageNet #146

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
7852e48
Add Images and ImageMagick deps using LazyModules
adrhill Jun 23, 2022
20be9a6
Add Image preprocessing script
adrhill Jun 23, 2022
59afe92
Add ImageNet dataset
adrhill Jun 23, 2022
1f4dfaf
Rename ImageNetReader file to match struct name
adrhill Jun 23, 2022
dfdeaa5
Formatting fixes
adrhill Jun 23, 2022
d2ded7e
Remove lowpass on image before resizing
adrhill Jun 23, 2022
4809296
Use `FileDataset` and replace ImageMagick with JpegTurbo
adrhill Jun 23, 2022
cac14d2
Add missing reference URL to comment
adrhill Jun 23, 2022
1e850ba
Remove use of `imresize`
adrhill Jun 23, 2022
2aa0170
Replace Images dependency by ImageCore
adrhill Jun 23, 2022
06ad214
Use StackViews.jl for batching
adrhill Jun 23, 2022
3097302
Load ImageCore and StackViews non-lazily
adrhill Jun 24, 2022
02e966d
Bake `Tx` into FileDataset's `loadfn`
adrhill Jun 24, 2022
9fb811c
Fix indexing bug in `center_crop_view`
adrhill Jun 24, 2022
4e0e8d4
Move installation guide into separate markdown file
adrhill Jul 8, 2022
0daca90
Include feedback from code review
adrhill Jul 8, 2022
09feb3d
Support custom preprocessing functions
adrhill Feb 2, 2023
df14fea
Sort classes by WordNet ID
adrhill Feb 2, 2023
c92ae00
Update docstring
adrhill Feb 2, 2023
8637ebe
Merge branch 'master' into ah/imagenet
adrhill Feb 2, 2023
944bd83
Update docstrings
adrhill Feb 2, 2023
fe38d43
Remove StackViews dependency
adrhill Feb 7, 2023
6af86c6
Remove normalization constants
adrhill Feb 7, 2023
95b13d9
Add more metadata
adrhill Feb 7, 2023
09d5be4
Add `img_size` argument
adrhill Feb 8, 2023
ae1929d
Format to SciML code style, matching #205
adrhill Feb 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@ FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
ImageShow = "4e3cecfd-b093-5904-9786-8bbb286a6a31"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
JpegTurbo = "b835a17e-a41a-41e7-81f0-2f016b05efe0"
LazyModules = "8cdb02fc-e678-4876-92c5-9defec4f444e"
MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
Pickle = "fbb45041-c46e-462f-888f-7c521cafbc2c"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StackViews = "cae243ae-269e-4f55-b966-ac2d0dc13c15"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
Expand All @@ -33,15 +36,18 @@ FixedPointNumbers = "0.8"
GZip = "0.5"
Glob = "1.3"
HDF5 = "0.16.2"
ImageCore = "0.9"
ImageShow = "0.3"
JLD2 = "0.4.21"
JSON3 = "1"
JpegTurbo = "0.1"
LazyModules = "0.3"
MAT = "0.10"
MLUtils = "0.2.0"
NPZ = "0.4.1"
Pickle = "0.3"
Requires = "1"
StackViews = "0.1"
Tables = "1.6"
julia = "1.6"

Expand Down
1 change: 1 addition & 0 deletions docs/src/datasets/vision.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ convert2image
```@docs
CIFAR10
CIFAR100
ImageNet
adrhill marked this conversation as resolved.
Show resolved Hide resolved
EMNIST
FashionMNIST
MNIST
Expand Down
9 changes: 9 additions & 0 deletions src/MLDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ import CSV
@lazy import HDF5="f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
# @lazy import JLD2

# Required for ImageNet
@lazy import JpegTurbo="b835a17e-a41a-41e7-81f0-2f016b05efe0" # Open jpg-files
@lazy import ImageCore="a09fc81d-aa75-5fe9-8630-4744c3626534" # Preprocessing
@lazy import StackViews="cae243ae-269e-4f55-b966-ac2d0dc13c15" # Batching of images
adrhill marked this conversation as resolved.
Show resolved Hide resolved

export getobs, numobs # From MLUtils.jl

include("abstract_datasets.jl")
Expand Down Expand Up @@ -86,6 +91,9 @@ include("datasets/vision/cifar100.jl")
export CIFAR100
include("datasets/vision/svhn2.jl")
export SVHN2
include("datasets/vision/imagenet_reader/ImageNetReader.jl")
include("datasets/vision/imagenet.jl")
export ImageNet

## Text

Expand Down Expand Up @@ -147,6 +155,7 @@ function __init__()
__init__fashionmnist()
__init__mnist()
__init__svhn2()
__init__imagenet()
adrhill marked this conversation as resolved.
Show resolved Hide resolved
end

end #module
204 changes: 204 additions & 0 deletions src/datasets/vision/imagenet.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
const IMAGENET_WEBSITE = "https://image-net.org/"

function __init__imagenet()
DEPNAME = "ImageNet"
return register(
ManualDataDep(
DEPNAME,
# TODO: currently markdown formatting is not applied
"""
The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017) can be downloaded at
$IMAGENET_WEBSITE after signing up and accepting the terms of access.
It is therefore required that you download this dataset manually.

## Existing installation
The dataset structure is assumed to look as follows:
```
ImageNet
├── train
├── val
│ ├── n01440764
│ │ ├── ILSVRC2012_val_00000293.JPEG
│ │ ├── ILSVRC2012_val_00002138.JPEG
│ │ └── ...
│ ├── n01443537
│ └── ...
├── test
└── devkit
├── data
│ ├── meta.mat
│ └── ...
└── ...
```
If your existing copy of the ImageNet dataset uses another file structure,
CarloLucibello marked this conversation as resolved.
Show resolved Hide resolved
we recommend to create symbolic links, e.g. using `ln` on Unix-like operating
systems:
```bash
cd ~/.julia/datadeps
mkdir -p ImageNet/val
ln -s my/path/to/imagenet/val ImageNet/val
mkdir -p ImageNet/devkit/data
ln -s my/path/to/imagenet/devkit/data ImageNet/devkit/data
```

## New installation
Download the following files from the ImageNet website ($IMAGENET_WEBSITE):
* `ILSVRC2012_devkit_t12`
* `ILSVRC2012_img_train.tar`, only required for `:train` split
* `ILSVRC2012_img_val.tar`, only required for `:val` split

After downloading the data, move and extract the training and validation images to
labeled subfolders running the following shell script:
```bash
# Extract the training data:
mkdir -p ImageNet/train && tar -xvf ILSVRC2012_img_train.tar -C ImageNet/train
# Unpack all 1000 compressed tar-files, one for each category:
cd ImageNet/train
find . -name "*.tar" | while read NAME ; do mkdir -p "\${NAME%.tar}"; tar -xvf "\${NAME}" -C "\${NAME%.tar}"; rm -f "\${NAME}"; done

# Extract the validation data:
cd ../..
mkdir -p ImageNet/val && tar -xvf ILSVRC2012_img_val.tar -C ImageNet/val

# Run script from soumith to create all class directories and moves images into corresponding directories:
cd ImageNet/val
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash

# Extract metadata from the devkit:
cd ../..
mkdir -p ImageNet/devkit && tar -xvf ILSVRC2012_img_val.tar -C ImageNet/devkit
```
""",
# shell script based on PyTorch example "ImageNet training in PyTorch":
# https://github.com/pytorch/examples/blob/d5478765d38210addf474dd73faf0d103052027a/imagenet/extract_ILSVRC.sh
),
)
end

"""
ImageNet(; Tx=Float32, split=:train, dir=nothing)
ImageNet([Tx, split])
adrhill marked this conversation as resolved.
Show resolved Hide resolved

The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017).
This is the most highly-used subset of ImageNet. It spans 1000 object classes and contains
1,281,167 training images, 50,000 validation images and 100,000 test images.
Each image is in 224x224x3 format using RGB color space.

- Authors: Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh,
Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein,
Alexander C. Berg, Li Fei-Fei
- Website: $IMAGENET_WEBSITE
- Reference: Russakovsky et al., ImageNet Large Scale Visual Recognition Challenge
adrhill marked this conversation as resolved.
Show resolved Hide resolved
(https://arxiv.org/abs/1409.0575)

# Arguments

$ARGUMENTS_SUPERVISED_ARRAY
- `split`: selects the data partition. Can take the values `:train:` or `:test`.

# Fields

$FIELDS_SUPERVISED_ARRAY
- `split`.

# Methods

$METHODS_SUPERVISED_ARRAY
- [`convert2image`](@ref) converts features to `RGB` images.

# Examples

```julia-repl
julia> using MLDatasets: ImageNet

julia> dataset = ImageNet(:val)
dataset ImageNet:
metadata => Dict{String, Any} with 4 entries
split => :val
files => 50000-element Vector{String}
targets => 50000-element Vector{Int64}
Tx => Float32

julia> dataset[1:5].targets
5-element Vector{Int64}:
1
1
1
1
1

julia> X, y = dataset[1:5];

julia> size(X)
(224, 224, 3, 5)

julia> dataset.metadata
Dict{String, Any} with 4 entries:
"class_WNIDs" => ["n02119789", "n02100735", "n02110185", "n02096294", "n02102040", "n02066245", "n02509815", "n02124075", "n02417914", "n02123394" … "n02815834", "n09229709", "n07697313", "n03888605", "n03355925", "n03…
"class_description" => ["small grey fox of southwestern United States; may be a subspecies of Vulpes velox", "an English breed having a plumed tail and a soft silky coat that is chiefly white", "breed of sled dog developed in …
"class_names" => Vector{SubString{String}}[["kit fox", "Vulpes macrotis"], ["English setter"], ["Siberian husky"], ["Australian terrier"], ["English springer", "English springer spaniel"], ["grey whale", "gray whale", "d…
"wnid_to_label" => Dict("n07693725"=>768, "n03775546"=>829, "n01689811"=>469, "n02100877"=>192, "n02441942"=>48, "n04371774"=>569, "n07717410"=>741, "n03347037"=>919, "n04355338"=>526, "n02097474"=>158…)
```
"""
struct ImageNet <: SupervisedDataset
metadata::Dict{String,Any}
split::Symbol
files::Vector{String}
targets::Vector{Int}
Tx::Type
end

ImageNet(; split=:train, Tx=Float32, dir=nothing) = ImageNet(Tx, split; dir)
ImageNet(split::Symbol; kws...) = ImageNet(; split, kws...)
ImageNet(Tx::Type; kws...) = ImageNet(; Tx, kws...)

function ImageNet(
Tx::Type,
split::Symbol;
dir=nothing,
train_dir="train",
val_dir="val",
test_dir="test",
devkit_dir="devkit",
)
@assert split ∈ (:train, :val, :test)

DEPNAME = "ImageNet"
METADATA_FILENAME = joinpath(devkit_dir, "data", "meta.mat")

TRAINSET_SIZE = 1_281_167
VALSET_SIZE = 50_000
TESTSET_SIZE = 100_000

# Load metadata
file_path = datafile(DEPNAME, METADATA_FILENAME, dir)
metadata = ImageNetReader.read_metadata(file_path)

root_dir = @datadep_str DEPNAME
if split == :train
files = ImageNetReader.readdata(joinpath(root_dir, train_dir))
@assert length(files) == TRAINSET_SIZE
elseif split == :val
files = ImageNetReader.readdata(joinpath(root_dir, val_dir))
@assert length(files) == VALSET_SIZE
else
files = ImageNetReader.readdata(joinpath(root_dir, test_dir))
@assert length(files) == TESTSET_SIZE
end
targets = [metadata["wnid_to_label"][wnid] for wnid in ImageNetReader.load_wnids(files)]
return ImageNet(metadata, split, files, targets, Tx)
end

function convert2image(::Type{<:ImageNet}, x::AbstractArray{<:Integer})
return convert2image(ImageNet, reinterpret(N0f8, convert(Array{UInt8}, x)))
end
convert2image(::Type{<:ImageNet}, x) = ImageNetReader.inverse_preprocess(x)

Base.length(d::ImageNet) = length(d.image_files)
function Base.getindex(d::ImageNet, ::Colon)
# Throw warning here that ImageNet probably will not fit in memory?
CarloLucibello marked this conversation as resolved.
Show resolved Hide resolved
return (features=ImageNetReader.readimage(d.Tx, d.files), targets=d.targets)
end
function Base.getindex(d::ImageNet, i)
return (features=ImageNetReader.readimage(d.Tx, d.files[i]), targets=d.targets[i])
end
48 changes: 48 additions & 0 deletions src/datasets/vision/imagenet_reader/ImageNetReader.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
module ImageNetReader
import ..FileDataset
import ..read_mat
import ..@lazy

@lazy import JpegTurbo = "b835a17e-a41a-41e7-81f0-2f016b05efe0"
CarloLucibello marked this conversation as resolved.
Show resolved Hide resolved
@lazy import ImageCore="a09fc81d-aa75-5fe9-8630-4744c3626534"
@lazy import StackViews="cae243ae-269e-4f55-b966-ac2d0dc13c15"
adrhill marked this conversation as resolved.
Show resolved Hide resolved

const NCLASSES = 1000
const IMGSIZE = (224, 224)
adrhill marked this conversation as resolved.
Show resolved Hide resolved

include("preprocess.jl")

function read_metadata(file::AbstractString)
meta = read_mat(file)["synsets"]
is_child = iszero.(meta["num_children"])
@assert meta["ILSVRC2012_ID"][is_child] == 1:NCLASSES

metadata = Dict{String,Any}()
metadata["class_WNIDs"] = Vector{String}(meta["WNID"][is_child]) # WordNet IDs
metadata["class_names"] = split.(meta["words"][is_child], ", ")
metadata["class_description"] = Vector{String}(meta["gloss"][is_child])
metadata["wnid_to_label"] = Dict(metadata["class_WNIDs"] .=> 1:NCLASSES)
return metadata
end

# The full ImageNet dataset doesn't fit into memory, so we only save filenames
readdata(dir::AbstractString) = FileDataset(identity, dir, "*.JPEG").paths

# Get WordNet ID from path
function load_wnids(files::AbstractVector{<:AbstractString})
return [split(f, "/")[end - 1] for f in files]
end

# Load image from ImageNetFile path and preprocess it to normalized 224x224x3 Array{Tx,3}
function readimage(Tx::Type{<:Real}, file::AbstractString)
im = JpegTurbo.jpeg_decode(ImageCore.RGB{Tx}, file; preferred_size=IMGSIZE)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if all ImageNet images meets the requirement, but note that the actual decomposed result size size(im) might not be preferred_size.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm actually running into warnings with images smaller than preferred_size:

┌ Warning: Failed to infer appropriate scale ratio, use `scale_ratio=2` instead.
│   actual_size = (127, 100)
│   preferred_size = (224, 224)
└ @ JpegTurbo ~/.julia/packages/JpegTurbo/b5MSG/src/decode.jl:165

do you have experience with this @lorenzoh ?

Copy link
Member

@johnnychen94 johnnychen94 Jun 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason for this is that JpegTurbo.jl (or libjpegt-turbo) only supports a very limited range of scale_ratio: they are $M/8$ where $M \in [1, 2, ..., 16]$. Thus the maximal possible scale_ratio is 2. This is exactly why size(img) == preferred_size may not hold in practice.

The supported scale_ratio permits a faster decoding algorithm (by scaling the coefficients instead of the actual images), this is why we can observe the performance boost here.


The perhaps safest (I think) solution is to add a imresize after it:

    img = @suppress_err JpegTurbo.jpeg_decode(file; preferred_size=(224, 224))
    if size(img) != (224, 224)
        img = imresize(img, (224, 224))
    end

The @suppress_err macro is a handy tool from https://github.com/JuliaIO/Suppressor.jl to disable this warning message.

I don't plan to make this imresize happen automatically in JpegTurbo.jl because it would otherwise break people's expectation on "keyword preferred_size can make decoding faster"

return preprocess(Tx, im)
end

# Load batched array of images
cat_batchdim(xs...) = cat(xs...; dims=4)
function readimage(Tx::Type, files::AbstractVector{<:AbstractString})
return StackViews.StackView([readimage(Tx, f) for f in files])
end

end # module
32 changes: 32 additions & 0 deletions src/datasets/vision/imagenet_reader/preprocess.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Image preprocessing for ImageNet models.
# Code adapted from Metalhead 0.5.3's utils.jl

# Take rectangle of pixels of shape `outsize` at the center of image `im`
adjust(i::Integer) = ifelse(iszero(i % 2), 1, 0)
function center_crop_view(im::AbstractMatrix, outsize=IMGSIZE)
h2, w2 = div.(outsize, 2) # half height, half width of view
h_adjust, w_adjust = adjust.(outsize)
return @view im[
(div(end, 2) - h2):(div(end, 2) + h2 - h_adjust),
(div(end, 2) - w2):(div(end, 2) + w2 - w_adjust),
]
end

# Coefficients taken from PyTorch's ImageNet normalization code
const PYTORCH_MEAN = [0.485f0, 0.456f0, 0.406f0]
const PYTORCH_STD = [0.229f0, 0.224f0, 0.225f0]

function preprocess(Tx::Type, im::AbstractMatrix{<:ImageCore.AbstractRGB})
im = center_crop_view(im)
im = (ImageCore.channelview(im) .- PYTORCH_MEAN) ./ PYTORCH_STD
# Convert from CHW (Image.jl's channel ordering) to WHC:
return Tx.(PermutedDimsArray(im, (3, 2, 1)))
adrhill marked this conversation as resolved.
Show resolved Hide resolved
end

function inverse_preprocess(x::AbstractArray{T,N}) where {T,N}
@assert N == 3 || N == 4
return ImageCore.colorview(
ImageCore.RGB,
PermutedDimsArray(x, (3, 2, 1, 4:N...)) .* PYTORCH_STD .+ PYTORCH_MEAN,
)
end