reorganize repo to more standard format (#5)

* reorganize repo * fix 32-bit tests * omit CR from CRLF at EOL
JuliaStrings · Mar 25, 2024 · 9b4b385 · 9b4b385
1 parent a19c430
commit 9b4b385
Show file tree

Hide file tree

Showing 12 changed files with 207 additions and 180 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -0,0 +1,74 @@
+name: CI
+# Run on master, tags, or any pull request
+on:
+ schedule:
+ - cron: '0 2 * * *' # Daily at 2 AM UTC (8 PM CST)
+ push:
+ branches: [master]
+ tags: ["*"]
+ pull_request:
+concurrency:
+ # Skip intermediate builds: always.
+ # Cancel intermediate builds: only if it is a pull request build.
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+ test:
+ name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ version:
+ - "1.6" # Earliest supported release
+ - "1" # Latest release
+ os:
+ - ubuntu-latest
+ - macOS-latest
+ - windows-latest
+ arch:
+ - x64
+ - x86
+ exclude:
+ # Test 32-bit only on Linux
+ - os: macOS-latest
+ arch: x86
+ - os: windows-latest
+ arch: x86
+ include:
+ # Add specific version used to run the reference tests.
+ # Must be kept in sync with version check in `test/runtests.jl`,
+ # and with the branch protection rules on the repository which
+ # require this specific job to pass on all PRs
+ # (see Settings > Branches > Branch protection rules).
+ - os: ubuntu-latest
+ version: 1.10.0
+ arch: x64
+ steps:
+ - uses: actions/checkout@v4
+ - uses: julia-actions/setup-julia@v1
+ with:
+ version: ${{ matrix.version }}
+ arch: ${{ matrix.arch }}
+ - uses: actions/cache@v4
+ env:
+ cache-name: cache-artifacts
+ with:
+ path: ~/.julia/artifacts
+ key: ${{ runner.os }}-${{ matrix.arch }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
+ restore-keys: |
+ ${{ runner.os }}-${{ matrix.arch }}-test-${{ env.cache-name }}-
+ ${{ runner.os }}-${{ matrix.arch }}-test-
+ ${{ runner.os }}-${{ matrix.arch }}-
+ ${{ runner.os }}-
+ - uses: julia-actions/julia-buildpkg@latest
+ - run: |
+ git config --global user.name Tester
+ git config --global user.email [email protected]
+ - uses: julia-actions/julia-runtest@latest
+ - uses: julia-actions/julia-processcoverage@v1
+ - uses: codecov/codecov-action@v4
+ with:
+ files: lcov.info
+ token: ${{ secrets.CODECOV_TOKEN }}
+ fail_ci_if_error: false
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
@@ -0,0 +1,18 @@
+name: CompatHelper
+on:
+ schedule:
+ - cron: '0 0 * * *' # Everyday at midnight
+ workflow_dispatch:
+jobs:
+ CompatHelper:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: write
+ steps:
+ - name: Pkg.add("CompatHelper")
+ run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
+ - name: CompatHelper.main()
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
+ run: julia -e 'using CompatHelper; CompatHelper.main()'
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
@@ -0,0 +1,15 @@
+name: TagBot
+on:
+ issue_comment:
+ types:
+ - created
+ workflow_dispatch:
+jobs:
+ TagBot:
+ if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: JuliaRegistries/TagBot@v1
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ ssh: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/Manifest.toml b/Manifest.toml
diff --git a/Project.toml b/Project.toml
@@ -4,5 +4,14 @@ authors = ["codegodz <[email protected]>"]
 version = "0.1.0"
 
 [deps]
-StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StringViews = "354b36f9-a18e-4713-926e-db85100087ba"
+
+[compat]
+julia = "1.6"
+StringViews = "1.3"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/README.md b/README.md
@@ -22,13 +22,13 @@ To install use:
 
 ### Features
 Currently we only have some basic features like reading a line and splitting it.
-For examples on how to generate test data and run the codes below see [`src/test.jl`](https://github.com/JuliaStrings/ViewReader.jl/blob/master/src/test.jl)
+For examples on how to generate test data and run the codes below see [`test/runtest.jl`](https://github.com/JuliaStrings/ViewReader.jl/blob/master/test/runtest.jl)
 
 #### 1. eachlineV
-**`eachlineV(file_path::String; buffer_size::Int64=10_000)`**
+**`eachlineV(file_path::String; buffer_size::Int=10_000)`**
 
 
-This function can be used just like the base[ `eachline` ](https://docs.julialang.org/en/v1/base/io-network/#Base.eachline " `eachline` ") in Julia. The argument `buffer_size` determines the size of the underlaying UInt8 vector. The `buffer_size` should be bigger than the longest line in a file. If this is uknown just use a big number like 1M. This function will throw a warning if no new line is found when the eof is not reached yet - giving a clue to increase the `buffer_size`. 
+This function can be used just like the base[ `eachline` ](https://docs.julialang.org/en/v1/base/io-network/#Base.eachline " `eachline` ") in Julia. The argument `buffer_size` determines the size of the underlaying UInt8 vector. The `buffer_size` should be bigger than the longest line in a file. If this is uknown just use a big number like 1M. This function will throw a warning if no new line is found when the eof is not reached yet - giving a clue to increase the `buffer_size`.
 
 **Example**
 
@@ -48,16 +48,16 @@ Similar to the base [`split`](https://docs.julialang.org/en/v1/base/strings/#Bas
 
 **Example**
 
-For example to check how often we see the string "TARGET" at column 3 in a given file 
+For example to check how often we see the string "TARGET" at column 3 in a given file
 ```Julia
 
 c = 0
 for line in eachlineV("../data/test.txt")
- data = splitV(line, '\t') 
+ data = splitV(line, '\t')
  if data[1] == "TARGET"
- c +=1 
+ c +=1
  end
-end 
+end
 println(c)
 ```
 
@@ -82,13 +82,13 @@ c = 0
 for line in eachlineV("../data/numbs.txt")
  for item in splitV(line, '\t')
  c += parseV(UInt32, item)
- end 
+ end
 end
 println(c)
 ```
 
 ### Benchmark
-We added a simple benchmark in [`src/test.jl`](https://github.com/JuliaStrings/ViewReader.jl/blob/master/src/test.jl), for my computer with:
+We added a simple benchmark in [`test/runtest.jl`](https://github.com/JuliaStrings/ViewReader.jl/blob/master/src/test.jl), for my computer with:
 - `gen_string_data(10_000)`
 - `gen_numb_data(10_000)`
 - and a buffer_size of `10_000`
@@ -113,7 +113,5 @@ so the best is just to try some buffer sizes and see where it works optimally
 
 To make this a bit more visual, we compared the base reader to the view reader.
 On the:
-- **x-axis** is the nubmer of lines in a file and 
+- **x-axis** is the nubmer of lines in a file and
 - **y-axis** the time in seconds to iterate over them
-
-![BenchmarkImage](https://www.linkpicture.com/q/reader_benchmark.png)
diff --git a/src/FileReader.jl b/src/FileReader.jl
@@ -1,20 +1,19 @@
 
 using StringViews
-using StaticArrays
 
 ###########################################################################
-# Code to read from a file 
+# Code to read from a file
 ###########################################################################
 
 struct BufferedReader{IOT <: IO}
  io::IOT
- buffer::Int64
- tot_alloc::Int64
+ buffer::Int
+ tot_alloc::Int
  arr::Vector{UInt8}
 end
 
 # Function to flip elements in an array to a specified offset(buffer size here)
-function flip!(arr::Vector{UInt8}, buffer::Int64)
+function flip!(arr::Vector{UInt8}, buffer::Int)
  @inbounds @simd for i in 1:buffer
  arr[i] = arr[i+buffer]
  end
@@ -24,50 +23,50 @@ function read_next_chunk!(reader::BufferedReader)
  # Move last read chunk to front of the array
  # (except in first iter)
  flip!(reader.arr, reader.buffer)
- 
+
  # Store new chunk in second part of the array
- bytes_read::Int = readbytes!(reader.io, view(reader.arr, reader.buffer+1:reader.tot_alloc), reader.buffer) 
+ bytes_read::Int = readbytes!(reader.io, view(reader.arr, reader.buffer+1:reader.tot_alloc), reader.buffer)
 
  # If we read less than the buffer size we have to reset the array
  # values after "bytes_read" as this is old data (previous read)
  if bytes_read < reader.buffer
  @inbounds for i in reader.buffer+bytes_read+1:reader.tot_alloc
  reader.arr[i] = 0x00
- end 
- end 
+ end
+ end
 end
 
-function find_newline(reader::BufferedReader, state::Int64)
+function find_newline(reader::BufferedReader, state::Int)
  cur_stop = copy(state) + 1
- 
- @inbounds for i in (state + 1):reader.tot_alloc 
- if reader.arr[i] == 0x0a 
- return cur_stop:i-1, i 
- end 
- end 
- 
+
+ @inbounds for i in (state + 1):reader.tot_alloc
+ if reader.arr[i] == 0x0a
+ return cur_stop:(i > 1 && reader.arr[i-1] == 0x0d ? i-2 : i-1), i
+ end
+ end
+
  return 0:0, cur_stop
 end
 
-function eachlineV(io::IO; buffer_size::Int64=10_000)
+function eachlineV(io::IO; buffer_size::Int=10_000)
  # Allocate buffer array
  tot_alloc = buffer_size * 2
- buffer_arr = zeros(UInt8, tot_alloc) 
- 
- # We will set up a buffered reader through which we 
+ buffer_arr = zeros(UInt8, tot_alloc)
+
+ # We will set up a buffered reader through which we
  # stream the file bytes, >4x as fast as a regular reader
  reader = BufferedReader(io, buffer_size, buffer_size*2, buffer_arr)
 
- # Also populate the reader with the first chunk already 
+ # Also populate the reader with the first chunk already
  read_next_chunk!(reader)
  return reader
 end
 
-function eachlineV(file_path::String; buffer_size::Int64=10_000)
+function eachlineV(file_path::String; buffer_size::Int=10_000)
  io = open(file_path, "r")
  return eachlineV(io, buffer_size=buffer_size)
 end
- 
+
 
 # Override in case we want to reuse buffers and handles
 function eachlineV(io::IO, buffer_arr::Vector{UInt8})
@@ -79,28 +78,25 @@ function eachlineV(io::IO, buffer_arr::Vector{UInt8})
 end
 
 @inline function Base.iterate(reader::BufferedReader)
- # This is the first iter so only the last half of the array is filled now 
+ # This is the first iter so only the last half of the array is filled now
  # hence start reading from buffer + 1
  r, state = find_newline(reader, reader.buffer)
  return StringView(view(reader.arr, r)), state
 end
 
-@inline function Base.iterate(reader::BufferedReader, state::Int64)
+@inline function Base.iterate(reader::BufferedReader, state::Int)
  r, state = find_newline(reader, state)
  if r.start == 0
  if !eof(reader.io)
  read_next_chunk!(reader)
  r, state = find_newline(reader, state - reader.buffer - 1)
  else
  close(reader.io)
- return nothing 
- end 
+ return nothing
+ end
  end
- # I twould be odd to not reach EOF but still not find 
+ # I twould be odd to not reach EOF but still not find
  # a full line, throw warning
  r.stop == 0 && @warn ("Buffer probably too small")
  return StringView(view(reader.arr, r)), state
 end
-
-
-