Skip to content

Commit

Permalink
refactored
Browse files Browse the repository at this point in the history
  • Loading branch information
OlegPhenomenon committed May 10, 2024
1 parent 546898c commit 8e871d9
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 212 deletions.
94 changes: 23 additions & 71 deletions app/jobs/company_register_status_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,10 @@
class CompanyRegisterStatusJob < ApplicationJob
queue_as :default

FILENAME = 'ettevotja_rekvisiidid__lihtandmed.csv.zip'
UNZIP_FILENAME = 'ettevotja_rekvisiidid__lihtandmed.csv'
DESTINATION = 'lib/tasks/data/'

def perform(days_interval = 14, spam_time_delay = 0.2, batch_size = 100, download_open_data_file_url='https://avaandmed.ariregister.rik.ee/sites/default/files/avaandmed/ettevotja_rekvisiidid__lihtandmed.csv.zip')

download_open_data_file(download_open_data_file_url, DESTINATION + FILENAME)
unzip_file(FILENAME, DESTINATION)

codes_in_csv = collect_company_codes(DESTINATION + UNZIP_FILENAME)

def perform(days_interval = 14, spam_time_delay = 1, batch_size = 100)
sampling_registrant_contact(days_interval).find_in_batches(batch_size: batch_size) do |contacts|
contacts.each do |contact|
if codes_in_csv.include?(contact.ident)
proceed_company_status(contact, spam_time_delay)
else
schedule_force_delete(contact)
end
end
contacts.each { |contact| proceed_company_status(contact, spam_time_delay) }
end

remove_temp_file(DESTINATION + UNZIP_FILENAME)
end

private
Expand All @@ -36,65 +18,39 @@ def proceed_company_status(contact, spam_time_delay)
company_status = contact.return_company_status
contact.update!(company_register_status: company_status, checked_company_at: Time.zone.now)

puts company_status
case company_status
when Contact::REGISTERED
lift_force_delete(contact) if check_for_force_delete(contact)
when Contact::LIQUIDATED
ContactInformMailer.company_liquidation(contact: contact).deliver_now
when Contact::BANKRUPT || Contact::DELETED
schedule_force_delete(contact)
end
end

def collect_company_codes(open_data_file_path)
codes_in_csv = []
CSV.foreach(open_data_file_path, headers: true, col_sep: ';', quote_char: '"', liberal_parsing: true) do |row|
codes_in_csv << row['ariregistri_kood']
end

codes_in_csv
end

def download_open_data_file(url, filename)
uri = URI(url)

Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
request = Net::HTTP::Get.new(uri)
response = http.request(request)

if response.code == '200'
File.open(filename, 'wb') do |file|
file.write(response.body)
end
else
puts "Failed to download file: #{response.code} #{response.message}"
end
when Contact::REGISTERED
lift_force_delete(contact) if check_for_force_delete(contact)
when Contact::LIQUIDATED
ContactInformMailer.company_liquidation(contact: contact).deliver_now
else
# Here is case when company is not found in the register or it is deleted (Contact::DELETED status) or bankrupt (Contact::BANKRUPT status)
schedule_force_delete(contact)
end

puts "File saved as #{filename}"
status = company_status.blank? ? Contact::DELETED : company_status
puts status
update_validation_company_status(contact:contact , status: status)
end

def unzip_file(filename, destination)
::Zip::File.open(destination + filename) do |zip_file|
zip_file.each do |entry|
entry.extract(File.join(destination, entry.name)) { true }
end
end
def sampling_registrant_contact(days_interval)
Registrant.where(ident_type: 'org', ident_country_code: 'EE').where(
"(company_register_status IS NULL OR checked_company_at IS NULL) OR
(company_register_status = ? AND checked_company_at < ?) OR
company_register_status IN (?)",
Contact::REGISTERED, days_interval.days.ago, [Contact::LIQUIDATED, Contact::BANKRUPT, Contact::DELETED]
)

puts "Archive invoke to #{destination}"
end

def sampling_registrant_contact(days_interval)
Registrant.where(ident_type: 'org', ident_country_code: 'EE')
.where('(company_register_status IS NULL) OR
(company_register_status = ? AND (checked_company_at IS NULL OR checked_company_at <= ?)) OR
(company_register_status = ? AND (checked_company_at IS NULL OR checked_company_at <= ?))',
Contact::REGISTERED, days_interval.days.ago, Contact::LIQUIDATED, 1.day.ago)
def update_validation_company_status(contact:, status:)
contact.update(company_register_status: status, checked_company_at: Time.zone.now)
end

def schedule_force_delete(contact)
contact.domains.each do |domain|
next if domain.schedule_force_delete?

domain.schedule_force_delete(
type: :fast_track,
notify_by_email: true,
Expand All @@ -115,8 +71,4 @@ def lift_force_delete(contact)
domain.lift_force_delete
end
end

def remove_temp_file(distination)
FileUtils.rm(distination) if File.exist?(distination)
end
end
18 changes: 5 additions & 13 deletions app/models/contact/company_register.rb
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
module Contact::CompanyRegister
extend ActiveSupport::Concern

COMPANY_STATUSES = {
'r' => 'registered',
'l' => 'liquidated',
'n' => 'bankrupt',
'k' => 'deleted',
}.freeze

REGISTERED = 'registered'.freeze
LIQUIDATED = 'liquidated'.freeze
BANKRUPT = 'bankrupt'.freeze
DELETED = 'deleted'.freeze
REGISTERED = 'R'.freeze
LIQUIDATED = 'L'.freeze
BANKRUPT = 'N'.freeze
DELETED = 'K'.freeze

def company_is_relevant?
company_register_status == REGISTERED && company_register_status == LIQUIDATED
Expand All @@ -20,8 +13,7 @@ def company_is_relevant?
def return_company_status
return if return_company_data.blank?

status = return_company_data.first[:status].downcase
COMPANY_STATUSES[status]
return_company_data.first[:status]
end

def return_company_data
Expand Down
37 changes: 0 additions & 37 deletions lib/tasks/check_for_company_status.rake

This file was deleted.

154 changes: 63 additions & 91 deletions lib/tasks/company_status.rake
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,45 @@ require 'rake_option_parser_boilerplate'


namespace :company_status do
# bundle exec rake company_status:check_for_exists -- --open_data_file_path=lib/tasks/data/ettevotja_rekvisiidid__lihtandmed.csv --missing_companies_output_path=lib/tasks/data/missing_companies_in_business_registry.csv --deleted_companies_output_path=lib/tasks/data/deleted_companies_from_business_registry.csv --download_path=https://avaandmed.ariregister.rik.ee/sites/default/files/avaandmed/ettevotja_rekvisiidid__lihtandmed.csv.zip
# bundle exec rake company_status:check_all -- --open_data_file_path=lib/tasks/data/ettevotja_rekvisiidid__lihtandmed.csv --missing_companies_output_path=lib/tasks/data/missing_companies_in_business_registry.csv --deleted_companies_output_path=lib/tasks/data/deleted_companies_from_business_registry.csv --download_path=https://avaandmed.ariregister.rik.ee/sites/default/files/avaandmed/ettevotja_rekvisiidid__lihtandmed.csv.zip
desc 'Get Estonian companies status from Business Registry.'

DELETED_FROM_REGISTRY_STATUS = 'K'
FILENAME = 'opendata_business_registry.csv.zip'
DESTINATION = 'lib/tasks/data/'
COMPANY_STATUS = 'ettevotja_staatus'
BUSINESS_REGISTRY_CODE = 'ariregistri_kood'

task :check_for_exists => :environment do
task :check_all => :environment do
options = initialize_rake_task

open_data_file_path = options[:open_data_file_path]
missing_companies_in_business_registry_path = options[:missing_companies_output_path]
deleted_companies_from_business_registry_path = options[:deleted_companies_output_path]
download_path = options[:download_path]
output_file_path = 'lib/tasks/data/temp_missing_companies_output.csv'
downloaded_filename = File.basename(URI(download_path).path)

puts "*** Run 1 step. Downloading fresh open data file. ***"

download_open_data_file(download_path, FILENAME)
unzip_file(FILENAME, DESTINATION)

# Remove old file
remove_old_file(output_file_path)

puts "*** Run 2 step. Collecting companies what are not in the open data file. ***"
collect_companies_whats_not_in_open_data_file(open_data_file_path, output_file_path)

puts "*** Run 3 step. Fetching detailed information from business registry. ***"
sort_missing_companies_to_different_files(output_file_path, missing_companies_in_business_registry_path, deleted_companies_from_business_registry_path)

puts '*** Run 4 step. Remove temporary files. ***'
remove_old_file(output_file_path)
FileUtils.rm(FILENAME) if File.exist?(FILENAME)
remove_old_file(DESTINATION + downloaded_filename)
download_open_data_file(download_path, downloaded_filename)
unzip_file(downloaded_filename, DESTINATION)

puts "*** Run 2 step. I am collecting data from open business registry sources. ***"
company_data = collect_company_data(open_data_file_path)

puts "*** Run 3 step. I process companies, update their information, and sort them into different files based on whether the companies are missing or removed from the business registry ***"
Registrant.where(ident_type: 'org', ident_country_code: 'EE').find_each do |contact|
if company_data.key?(contact.ident)
update_company_status(contact: contact, status: company_data[contact.ident][COMPANY_STATUS])
puts "Company: #{contact.name} with ident: #{contact.ident} and ID: #{contact.id} has status: #{company_data[contact.ident][COMPANY_STATUS]}"
else
update_company_status(contact: contact, status: 'K')
sort_companies_to_files(
contact: contact,
missing_companies_in_business_registry_path: missing_companies_in_business_registry_path,
deleted_companies_from_business_registry_path: deleted_companies_from_business_registry_path,
)
end
end

puts '*** Done ***'
end
Expand Down Expand Up @@ -90,6 +96,15 @@ namespace :company_status do
puts "Archive invoke to #{destination}"
end

def collect_company_data(open_data_file_path)
company_data = {}

CSV.foreach(open_data_file_path, headers: true, col_sep: ';', quote_char: '"', liberal_parsing: true) do |row|
company_data[row[BUSINESS_REGISTRY_CODE]] = row
end

company_data
end

def download_open_data_file(url, filename)
uri = URI(url)
Expand All @@ -110,86 +125,43 @@ namespace :company_status do
puts "File saved as #{filename}"
end

def collect_companies_whats_not_in_open_data_file(open_data_file_path, output_file_path)
codes_in_csv = collect_company_codes(open_data_file_path)
put_missing_companies_to_file(output_file_path, codes_in_csv)
end

def collect_company_codes(open_data_file_path)
codes_in_csv = []
CSV.foreach(open_data_file_path, headers: true, col_sep: ';', quote_char: '"', liberal_parsing: true) do |row|
codes_in_csv << row['ariregistri_kood']
end

codes_in_csv
def update_company_status(contact:, status:)
contact.update(company_register_status: status, checked_company_at: Time.zone.now)
end

def put_missing_companies_to_file(output_file_path, codes_in_csv)
CSV.open(output_file_path, 'wb', write_headers: true, headers: ["ID", "Code", "Name"]) do |csv|
Contact.where(ident_type: 'org', ident_country_code: 'EE').find_each do |contact|
# [16526891, 14836742, 12489420, 12226399, 12475122].each do |test_ident|
# Contact.where(ident: test_ident).limit(100).each do |contact|
unless codes_in_csv.include?(contact.ident)
csv << [contact.id, contact.ident, contact.name]
end
# end
end
end
def put_company_to_missing_file(contact:, path:)
write_to_csv_file(csv_file_path: path, headers: ["ID", "Ident", "Name"], attrs: [contact.id, contact.ident, contact.name])
end

def sort_missing_companies_to_different_files(output_file_path, missing_companies_in_business_registry_path, deleted_companies_from_business_registry_path)
contact_no_in_business_registry = []
contact_which_were_deleted = []

collect_missing_companies_ids(output_file_path).each do |id|
puts "Fetching data for ID: #{id}"

contact = Contact.find(id.to_i)

resp = contact.return_company_details

if resp.empty?
contact_no_in_business_registry << [contact.id, contact.ident, contact.name]
else
status = resp.first.status.upcase
kandeliik_type = resp.first.kandeliik.last.last.kandeliik
kandeliik_tekstina = resp.first.kandeliik.last.last.kandeliik_tekstina
kande_kpv = resp.first.kandeliik.last.last.kande_kpv

if status == DELETED_FROM_REGISTRY_STATUS
contact_which_were_deleted << [contact.id, contact.ident, contact.name, status, kandeliik_type, kandeliik_tekstina, kande_kpv]
end
def sort_companies_to_files(contact:, missing_companies_in_business_registry_path:, deleted_companies_from_business_registry_path:)
sleep 1
resp = contact.return_company_details

if resp.empty?
put_company_to_missing_file(contact: contact, path: missing_companies_in_business_registry_path)
puts "Company: #{contact.name} with ident: #{contact.ident} and ID: #{contact.id} is missing in registry, company id: #{contact.id}"
else
status = resp.first.status.upcase
kandeliik_type = resp.first.kandeliik.last.last.kandeliik
kandeliik_tekstina = resp.first.kandeliik.last.last.kandeliik_tekstina
kande_kpv = resp.first.kandeliik.last.last.kande_kpv

if status == DELETED_FROM_REGISTRY_STATUS
csv_file_path = deleted_companies_from_business_registry_path
headers = ["ID", "Ident", "Name", "Status", "Kandeliik Type", "Kandeliik Tekstina", "kande_kpv"]
attrs = [contact.id, contact.ident, contact.name, status, kandeliik_type, kandeliik_tekstina, kande_kpv]
write_to_csv_file(csv_file_path: csv_file_path, headers: headers, attrs: attrs)

puts "Company: #{contact.name} with ident: #{contact.ident} and ID: #{contact.id} has status #{status}, company id: #{contact.id}"
end

sleep 1
end

save_missing_companies(contact_no_in_business_registry, missing_companies_in_business_registry_path)
save_deleted_companies(contact_which_were_deleted, deleted_companies_from_business_registry_path)
end

def collect_missing_companies_ids(output_file_path)
ids = []
CSV.foreach(output_file_path, headers: true, quote_char: '"', liberal_parsing: true) do |row|
ids << row['ID']
end
def write_to_csv_file(csv_file_path:, headers:, attrs:)
write_headers = !File.exist?(csv_file_path)

ids
end

def save_missing_companies(contact_no_in_business_registry, missing_companies_in_business_registry_path)
CSV.open(missing_companies_in_business_registry_path, 'wb', write_headers: true, headers: ["ID", "Code", "Name"]) do |csv|
contact_no_in_business_registry.each do |entry|
csv << entry
end
end
end

def save_deleted_companies(contact_which_were_deleted, deleted_companies_from_business_registry_path)
CSV.open(deleted_companies_from_business_registry_path, 'wb', write_headers: true, headers: ["ID", "Ident", "Name", "Status", "Kandeliik Type", "Kandeliik Tekstina", "kande_kpv"]) do |csv|
contact_which_were_deleted.each do |entry|
csv << entry
end
CSV.open(csv_file_path, "ab", write_headers: write_headers, headers: headers) do |csv|
csv << attrs
end
end
end

0 comments on commit 8e871d9

Please sign in to comment.