Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

23 consolidate input and output handling #26

Merged
merged 13 commits into from
Dec 23, 2024
64 changes: 64 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,40 @@ PATH
remote: .
specs:
csv-util (0.6a.0)
activesupport (~> 5.0)

GEM
remote: https://rubygems.org/
specs:
activesupport (5.2.8.1)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 0.7, < 2)
minitest (~> 5.1)
tzinfo (~> 1.1)
ast (2.4.2)
coderay (1.1.3)
concurrent-ruby (1.3.3)
diff-lcs (1.5.0)
i18n (1.14.5)
concurrent-ruby (~> 1.0)
json (2.7.2)
language_server-protocol (3.17.0.3)
method_source (1.0.0)
minitest (5.24.0)
parallel (1.25.1)
parser (3.3.3.0)
ast (~> 2.4.1)
racc
pry (0.14.2)
coderay (~> 1.1)
method_source (~> 1.0)
racc (1.8.0)
rack (3.1.4)
rainbow (3.1.1)
rake (13.0.6)
regexp_parser (2.9.2)
rexml (3.3.0)
strscan
rspec (3.12.0)
rspec-core (~> 3.12.0)
rspec-expectations (~> 3.12.0)
Expand All @@ -26,6 +49,46 @@ GEM
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.12.0)
rspec-support (3.12.0)
rubocop (1.64.1)
json (~> 2.3)
language_server-protocol (>= 3.17.0)
parallel (~> 1.10)
parser (>= 3.3.0.2)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 1.8, < 3.0)
rexml (>= 3.2.5, < 4.0)
rubocop-ast (>= 1.31.1, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.31.3)
parser (>= 3.3.1.0)
rubocop-capybara (2.21.0)
rubocop (~> 1.41)
rubocop-performance (1.21.1)
rubocop (>= 1.48.1, < 2.0)
rubocop-ast (>= 1.31.1, < 2.0)
rubocop-rails (2.25.0)
activesupport (>= 4.2.0)
rack (>= 1.1)
rubocop (>= 1.33.0, < 2.0)
rubocop-ast (>= 1.31.1, < 2.0)
rubocop-rake (0.6.0)
rubocop (~> 1.0)
rubocop-rspec (3.0.1)
rubocop (~> 1.61)
ruby-progressbar (1.13.0)
strscan (3.1.0)
thread_safe (0.3.6)
tzinfo (1.2.11)
thread_safe (~> 0.1)
unicode-display_width (2.5.0)
upennlib-rubocop (1.2.0)
rubocop (~> 1.24)
rubocop-capybara
rubocop-performance
rubocop-rails
rubocop-rake
rubocop-rspec

PLATFORMS
arm64-darwin-21
Expand All @@ -36,6 +99,7 @@ DEPENDENCIES
pry (~> 0.14.1)
rake (~> 13.0)
rspec (~> 3.0)
upennlib-rubocop (~> 1.0)

BUNDLED WITH
2.4.3
4 changes: 3 additions & 1 deletion csv-util.gemspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# frozen_string_literal: true

require_relative 'lib/csvutil/version'
require_relative 'lib/csv_util/version'

Gem::Specification.new do |spec|
spec.name = 'csv-util'
Expand Down Expand Up @@ -30,11 +30,13 @@ Gem::Specification.new do |spec|
spec.bindir = 'exe'
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
spec.require_paths = ['lib']
spec.add_runtime_dependency 'activesupport', '~> 5.0'

# Uncomment to register a new dependency of your gem
spec.add_development_dependency 'pry', '~> 0.14.1'
spec.add_development_dependency "rake", "~> 13.0"
spec.add_development_dependency "rspec", "~> 3.0"
spec.add_development_dependency 'upennlib-rubocop', '~> 1.0'

# For more information and examples about making a new gem, check out our
# guide at: https://bundler.io/guides/creating_gem.html
Expand Down
33 changes: 12 additions & 21 deletions exe/csv-cut
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ end
###

options = {}
options[:separator] = ','
options[:out_col_sep] = ','

OptionParser.new do |opts|
opts.banner = "Usage: #{File.basename(__FILE__)} -c col1,col2,col3 CSV_FILE "
Expand All @@ -46,8 +46,12 @@ OptionParser.new do |opts|
exit
end

opts.on '-s', '--separator SEP', 'Separator for mulitple columns' do |sep|
options[:separator] = sep
opts.on '-s', '--output-sep SEPARATOR', 'Output column separator' do |sep|
options[:out_col_sep] = sep
end

opts.on '-x', '--input-sep SEPARATOR', 'CSVReader column separator' do |sep|
options[:in_col_sep] = sep
end

opts.on '-l' , '--list-headers', 'List the headers in CSV and quit' do
Expand All @@ -58,8 +62,8 @@ OptionParser.new do |opts|
options[:columns] = columns.split ','
end

opts.on '-d', '--headers', 'Print column headers' do
options[:headers] = true
opts.on '-d', '--headers', 'Output the cut column headers' do
options[:output_headers] = true
end

opts.on '-h', '--help', 'Prints this help' do
Expand All @@ -74,25 +78,12 @@ end.parse!
# See if there are any arguments left; otherwise, read from stdin
if $stdin.tty? || ARGV.any?
csv_file = ARGV.shift
abort "Please provide a CSV file" unless csv_file
abort 'Please provide a CSV file' unless csv_file
abort "Can't find CSV file" unless File.exist? csv_file
input = File.open csv_file
else
input = $stdin
end


###
# Process the data
###
first_row = true
CSV col_sep: options[:separator] do |csv|
CSV.parse input, headers: true do |row|
if first_row
process_first_row row: row, options: options, csv: csv
first_row = false
end

csv << options[:columns].map { |column| row[column] }
end
end
csv_cut = CSVUtil::Cut.new options[:columns], options: options
csv_cut.process input
76 changes: 19 additions & 57 deletions exe/csv-filter
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ CMD = File.basename __FILE__
# Methods
########################################################################

##
# Print the version of the command to stdout
def print_version
puts "#{CMD} #{CSVUtil::VERSION}"
exit
Expand All @@ -24,40 +26,6 @@ def valid_match? options
options[:pattern].nil? ^ options[:text].nil?
end

def matches_text? value, options
normal_val = options[:insensitive] ? value.downcase : value
test_text = options[:insensitive] ? options[:text].to_s.downcase : options[:text].to_s

return normal_val != test_text if options[:reject]

normal_val == test_text
end

def matches_pattern? value, options
# Regexp options are bit values (0,1,2,4); 0 => No options
# https://ruby-doc.org/3.2.2/Regexp.html#class-Regexp-label-Options
regexp_opts = 0
regexp_opts |= Regexp::IGNORECASE if options[:insensitive] # bitwise or/=

regex = Regexp.new options[:pattern], regexp_opts

return value !~ regex if options[:reject]

value =~ regex
end

def match? row, options
value = row[options[:column]].to_s

return matches_text? value, options if options[:text]

matches_pattern? value, options
end

def process_first_row row:, options:, csv:
csv << row.to_h.keys
end

########################################################################
# Options
########################################################################
Expand All @@ -78,38 +46,42 @@ EOF
options[:version] = true
end

opts.on "-c", "--column COLUMN_NAME", "Column to filter on" do |col|
opts.on '-c', '--column COLUMN_NAME', 'Column to filter on' do |col|
options[:column] = col
end

opts.on("-r", "--regex PATTERN",
"Ruby regular expression; e.g., 'trade.*cards'") do |regex|
opts.on('-r', '--regex PATTERN',
'Ruby regular expression pattern; e.g., \'trad.*cards\'') do |regex|
options[:pattern] = regex
end

opts.on("-j", "--reject", "Reject all matching rows") do
options[:reject] = true
opts.on('-j', '--reject', 'Reject all matching rows') do
options[:reject_matching] = true
end

opts.on "-i", "--case-insensitive", "Whether regex is case-insensitive" do
opts.on '-i', '--case-insensitive', 'Whether regex is case-insensitive' do
options[:insensitive] = true
end

opts.on "-t", "--text TEXT", "An exact string to match" do |text|
opts.on '-t', '--text TEXT', 'An exact string to match' do |text|
options[:text] = text
end

opts.on "-e", "--encoding ENCODING", "Input file encoding (if not ASCII or UTF-8)" do |encoding|
opts.on '-u', '--substring', 'Output rows containing text' do
options[:substring] = true
end

opts.on '-e', '--encoding ENCODING', 'Input file encoding (if not ASCII or UTF-8)' do |encoding|
options[:encoding] = encoding
end

c_help = %Q{List encodings and quit (#{Encoding.list.size} encodings)}
opts.on "--list-encodings", c_help do
opts.on '--list-encodings', c_help do
puts Encoding.list
exit
end

opts.on("-h", "--help", "Print this help") do
opts.on('-h', '--help', 'Print this help') do
puts opts
exit
end
Expand All @@ -124,26 +96,16 @@ print_version if options[:version]
# See if there are any arguments left; otherwise, read from stdin
if $stdin.tty? || ARGV.any?
csv_file = ARGV.shift
abort "Please provide a CSV file" unless csv_file
abort 'Please provide a CSV file' unless csv_file
abort "Can't find CSV file" unless File.exist? csv_file
input = File.open csv_file
else
input = $stdin
end

# Handle options
encoding = options[:encoding] || DEFAULT_ENCODING
abort 'Provide either a regex or text to match' unless valid_match? options
abort 'Specify a column to match' unless options[:column]


first_row = true
CSV headers: true do |csv|
CSV.parse input, headers: true, encoding: "#{encoding}:utf-8" do |row|
if first_row
process_first_row row: row, options: options, csv: csv
first_row = false
end
csv << row if match? row, options
end
end
filter = CSVUtil::Filter.new options[:column], **options
filter.filter input
Loading