Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a generic row iterator #179

Merged
merged 4 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/TableTransforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ include("assertions.jl")
include("tabletraits.jl")
include("distributions.jl")
include("tableselection.jl")
include("tablerows.jl")
include("transforms.jl")

export
Expand Down
126 changes: 126 additions & 0 deletions src/tablerows.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""
tablerows(table)

Returns an appropriate iterator for table rows.
The rows are iterable, implement the `Tables.AbstractRow` interface
and the following ways of column access:

```julia
row.colname
row."colname"
row[colindex]
row[:colname]
row["colname"]
```
"""
function tablerows(table)
if !Tables.istable(table)
throw(ArgumentError("the argument is not a table"))
end

if Tables.rowaccess(table)
RTableRows(table)
else
CTableRows(table)
end
end

#------------------
# COMMON INTERFACE
#------------------

abstract type TableRow end

# column access
Base.getproperty(row::TableRow, nm::Symbol) = Tables.getcolumn(row, nm)
Base.getproperty(row::TableRow, nm::AbstractString) = Tables.getcolumn(row, Symbol(nm))
Base.getindex(row::TableRow, i::Int) = Tables.getcolumn(row, i)
Base.getindex(row::TableRow, nm::Symbol) = Tables.getcolumn(row, nm)
Base.getindex(row::TableRow, nm::AbstractString) = Tables.getcolumn(row, Symbol(nm))

# iterator interface
Base.length(row::TableRow) = length(Tables.columnnames(row))
Base.iterate(row::TableRow, state=1) =
state > length(row) ? nothing : (Tables.getcolumn(row, state), state + 1)

#--------------
# COLUMN TABLE
#--------------

struct CTableRows{T}
cols::T
nrows::Int

function CTableRows(table)
cols = Tables.columns(table)
nrows = _nrows(cols)
new{typeof(cols)}(cols, nrows)
end
end

# iterator interface
Base.length(rows::CTableRows) = rows.nrows
Base.iterate(rows::CTableRows, state::Int=1) =
state > length(rows) ? nothing : (CTableRow(rows.cols, state), state + 1)

struct CTableRow{T} <: TableRow
cols::T
ind::Int
end

# getters
getcols(row::CTableRow) = getfield(row, :cols)
getind(row::CTableRow) = getfield(row, :ind)

# AbstractRow interface
Tables.columnnames(row::CTableRow) = Tables.columnnames(getcols(row))
Tables.getcolumn(row::CTableRow, i::Int) = Tables.getcolumn(getcols(row), i)[getind(row)]
Tables.getcolumn(row::CTableRow, nm::Symbol) = Tables.getcolumn(getcols(row), nm)[getind(row)]

#-----------
# ROW TABLE
#-----------

struct RTableRows{T}
rows::T

function RTableRows(table)
rows = Tables.rows(table)
new{typeof(rows)}(rows)
end
end

# iterator interface
Base.length(rows::RTableRows) = length(rows.rows)
function Base.iterate(rows::RTableRows, args...)
next = iterate(rows.rows, args...)
if isnothing(next)
nothing
else
row, state = next
(RTableRow(row), state)
end
end

struct RTableRow{T} <: TableRow
row::T
end

# getters
getrow(row::RTableRow) = getfield(row, :row)

# AbstractRow interface
Tables.columnnames(row::RTableRow) = Tables.columnnames(getrow(row))
Tables.getcolumn(row::RTableRow, i::Int) = Tables.getcolumn(getrow(row), i)
Tables.getcolumn(row::RTableRow, nm::Symbol) = Tables.getcolumn(getrow(row), nm)

#-------
# UTILS
#-------

function _nrows(cols)
names = Tables.columnnames(cols)
isempty(names) && return 0
column = Tables.getcolumn(cols, first(names))
length(column)
end
6 changes: 5 additions & 1 deletion src/transforms/filter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ Filters the table returning only the rows where the `func` returns true.
```julia
Filter(row -> sum(row) > 10)
Filter(row -> row.a == true && row.b < 30)
Filter(row -> row."a" == true && row."b" < 30)
Filter(row -> row[1] == true && row[2] < 30)
Filter(row -> row[:a] == true && row[:b] < 30)
Filter(row -> row["a"] == true && row["b"] < 30)
```

## Notes
Expand All @@ -26,7 +30,7 @@ isrevertible(::Type{<:Filter}) = true

function preprocess(transform::Filter, table)
# lazy row iterator
rows = Tables.rows(table)
rows = tablerows(table)

# selected indices
sinds, nrows = Int[], 0
Expand Down
124 changes: 62 additions & 62 deletions test/colspec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,104 +3,104 @@
tupnames = (:a, :b, :c, :d, :e, :f)

# vector of symbols
colspec = TableTransforms.colspec([:a, :c, :e])
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec([:a, :c, :e])
snames = TT.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# tuple of symbols
colspec = TableTransforms.colspec((:a, :c, :e))
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec((:a, :c, :e))
snames = TT.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# vector of strings
colspec = TableTransforms.colspec(["a", "c", "e"])
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec(["a", "c", "e"])
snames = TT.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# tuple of strings
colspec = TableTransforms.colspec(("a", "c", "e"))
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec(("a", "c", "e"))
snames = TT.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# vector of integers
colspec = TableTransforms.colspec([1, 3, 5])
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec([1, 3, 5])
snames = TT.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# tuple of integers
colspec = TableTransforms.colspec((1, 3, 5))
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec((1, 3, 5))
snames = TT.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# regex
colspec = TableTransforms.colspec(r"[ace]")
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec(r"[ace]")
snames = TT.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# colon
colspec = TableTransforms.colspec(:)
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec(:)
snames = TT.choose(colspec, vecnames)
@test snames == [:a, :b, :c, :d, :e, :f]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == [:a, :b, :c, :d, :e, :f]

# nothing
colspec = TableTransforms.colspec(nothing)
snames = TableTransforms.choose(colspec, vecnames)
colspec = TT.colspec(nothing)
snames = TT.choose(colspec, vecnames)
@test snames == Symbol[]
snames = TableTransforms.choose(colspec, tupnames)
snames = TT.choose(colspec, tupnames)
@test snames == Symbol[]

# throws
colspec = TableTransforms.colspec(r"x")
@test_throws AssertionError TableTransforms.choose(colspec, vecnames)
@test_throws AssertionError TableTransforms.choose(colspec, tupnames)
@test_throws AssertionError TableTransforms.colspec(Symbol[])
@test_throws AssertionError TableTransforms.colspec(String[])
@test_throws AssertionError TableTransforms.colspec(Int[])
@test_throws ArgumentError TableTransforms.colspec(())
@test_throws ArgumentError TableTransforms.colspec(missing)
colspec = TT.colspec(r"x")
@test_throws AssertionError TT.choose(colspec, vecnames)
@test_throws AssertionError TT.choose(colspec, tupnames)
@test_throws AssertionError TT.colspec(Symbol[])
@test_throws AssertionError TT.colspec(String[])
@test_throws AssertionError TT.colspec(Int[])
@test_throws ArgumentError TT.colspec(())
@test_throws ArgumentError TT.colspec(missing)

# type stability
colspec = TableTransforms.colspec([:a, :b])
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TableTransforms.colspec((:a, :b))
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TableTransforms.colspec(["a", "b"])
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TableTransforms.colspec(("a", "b"))
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TableTransforms.colspec([1, 2])
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TableTransforms.colspec((1, 2))
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TableTransforms.colspec(r"[ab]")
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TableTransforms.colspec(:)
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TableTransforms.colspec(nothing)
@inferred TableTransforms.choose(colspec, vecnames)
@inferred TableTransforms.choose(colspec, tupnames)
colspec = TT.colspec([:a, :b])
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
colspec = TT.colspec((:a, :b))
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
colspec = TT.colspec(["a", "b"])
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
colspec = TT.colspec(("a", "b"))
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
colspec = TT.colspec([1, 2])
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
colspec = TT.colspec((1, 2))
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
colspec = TT.colspec(r"[ab]")
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
colspec = TT.colspec(:)
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
colspec = TT.colspec(nothing)
@inferred TT.choose(colspec, vecnames)
@inferred TT.choose(colspec, tupnames)
end
2 changes: 1 addition & 1 deletion test/distributions.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
@testset "Distributions" begin
values = randn(1000)
d = TableTransforms.EmpiricalDistribution(values)
d = TT.EmpiricalDistribution(values)
@test 0.0 ≤ cdf(d, rand()) ≤ 1.0
@test minimum(values) ≤ quantile(d, 0.5) ≤ maximum(values)
end
12 changes: 10 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,16 @@ Polynomial(args::T...) where {T<:Real} = Polynomial(collect(args))
include("metatable.jl")

# list of tests
testfiles =
["distributions.jl", "colspec.jl", "assertions.jl", "transforms.jl", "metadata.jl", "tableselection.jl", "shows.jl"]
testfiles = [
"distributions.jl",
"colspec.jl",
"assertions.jl",
"transforms.jl",
"metadata.jl",
"tableselection.jl",
"tablerows.jl",
"shows.jl"
]

@testset "TableTransforms.jl" begin
for testfile in testfiles
Expand Down
Loading
Loading