-
Notifications
You must be signed in to change notification settings - Fork 0
/
guardian-glacier-transfer
566 lines (495 loc) · 18.7 KB
/
guardian-glacier-transfer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
#!/usr/bin/env ruby
require 'active_record'
require 'find'
require 'logger'
require 'stronghold'
require 'todo_runner'
require 'yaml'
require 'json'
require 'zip'
require 'rsync'
require 'shellwords'
require_relative('lib/secrets_manager')
require_relative('lib/chunk_sizer')
require_relative('lib/checksum')
##
# Monkey patching Rsync::Result to get useful error messages.
class Rsync::Result
def raw
@raw
end
def full_error
return if self.success?
raw.split($/).grep(/^rsync( error)?:/).join $/
end
end
# Copied from https://github.com/rubyzip/rubyzip
class ZipFileGenerator
Zip.write_zip64_support = true
attr_writer :remove_files
def initialize(input_dir, output_file, remove_files: false)
@input_dir = input_dir
@output_file = output_file
@remove_files = remove_files
end
def remove_files?
@remove_files
end
def write
entries = Dir.entries(@input_dir) - %w(. ..)
::Zip::File.open(@output_file, ::Zip::File::CREATE) do |zipfile|
write_entries entries, '', zipfile
end
Dir.rmdir @input_dir if remove_files?
end
private
def write_entries(entries, path, zipfile)
entries.each do |e|
zipfile_path = path == '' ? e : File.join(path, e)
disk_file_path = File.join(@input_dir, zipfile_path)
puts "Deflating #{disk_file_path}"
if File.directory? disk_file_path
recursively_deflate_directory(disk_file_path, zipfile, zipfile_path)
if remove_files?
FileUtils.chmod_R 0755, disk_file_path
Dir.rmdir(disk_file_path) #if remove_files?
end
else
put_into_archive(disk_file_path, zipfile, zipfile_path)
if remove_files?
FileUtils.chmod 0755, disk_file_path
File.delete(disk_file_path) # if remove_files?
end
end
end
end
def recursively_deflate_directory(disk_file_path, zipfile, zipfile_path)
zipfile.mkdir zipfile_path
subdir = Dir.entries(disk_file_path) - %w(. ..)
write_entries subdir, zipfile_path, zipfile
end
def put_into_archive(disk_file_path, zipfile, zipfile_path)
zipfile.get_output_stream(zipfile_path) do |f|
f.write(File.open(disk_file_path, 'rb').read)
end
end
end
class GlacierArchive < ActiveRecord::Base
has_many :table_relationship
end
LOGGER = Logger.new(STDOUT)
LOGGER.level = ENV['GUARDIAN_LOG_LEVEL'] || Logger::INFO
ARCHIVE_CHECKSUM_ALGORITHM = 'sha256'
REQUIRED_TODO_VALUES = %i{ todo_base source workspace compressed_destination glacier_description glacier_vault application method }
TodoRunner.define do
def prep_workspace todo_data
workspace = todo_data[:workspace]
raise "Workspace already exists for #{todo_data} (#{workspace}" if File.exist?(workspace)
Dir.mkdir(workspace)
return workspace
end
def fetch_source(source, workspace, application, method)
case "#{application}_#{method}"
when 'bulwark_gitannex'
bg_secrets = SecretsManager.load_secrets('secrets/bulwark_gitannex.secret')
SecretsManager.set(bg_secrets)
`git clone #{source} #{workspace}/#{File.basename(source)}`
Dir.chdir("#{workspace}/#{File.basename(source)}") do
`./.repoadmin/bin/init.sh`
`git annex get .`
`git annex unlock .`
end
SecretsManager.unset(bg_secrets)
return "#{workspace}/#{File.basename(source)}"
when 'openn_rsync'
rsync_opts = "-rltDv --no-owner --no-group"
source_path = source =~ %r{/$} ? source : "#{source}/"
dest = File.join(workspace, File.basename(source))
Dir.mkdir(dest) unless File.exist?(dest)
dest_path = Shellwords.escape("#{dest}/")
result = Rsync.run(source_path, dest_path, rsync_opts)
raise " failure, method=openn_rsync: #{result.full_error}" unless result.success?
return dest
else
raise "Invalid application #{application} specified."
end
end
def verify_fetch(todo_data)
valid = true
path = todo_data[:fetched_source]
application = todo_data[:application]
method = todo_data[:method]
case "#{application}_#{method}"
when 'bulwark_gitannex'
pwd = FileUtils.pwd
FileUtils.cd(path, :verbose => true)
stacktrace = `git annex fsck --fast --from ceph01`
FileUtils.cd(pwd, :verbose => true)
raise "Fetch failed for #{application} for #{path}" if stacktrace.include?('fail')
valid = true if stacktrace.end_with?("ok\n(recording state in git...)\n")
when 'openn_rsync'
manifest_configs = find_openn_manifests(path)
manifest_configs.each do |manifest_data|
valid &&= verify_manifest(path, manifest_data[:manifest], manifest_data[:algorithm])
(todo_data[:manifest_data] ||= []) << manifest_data
end
LOGGER.info("Manifest validation result for #{path}: #{valid}")
else
raise "Invalid application #{application} specified."
end
return valid
end
def zip_package(todo_data)
zip_package = ZipFileGenerator.new(todo_data[:fetched_source], todo_data[:compressed_destination])
zip_package.remove_files = true
zip_package.write
true
end
def verify_zip(todo_data)
return true unless verify_compressed_archive?(todo_data[:verify_compressed_archive])
valid = true
application = todo_data[:application]
method = todo_data[:method]
case "#{application}_#{method}"
when 'bulwark_gitannex'
dest = todo_data[:verification_destination]
archive = todo_data[:compressed_destination]
unzip_archive(archive, dest)
Dir.chdir(dest) do
stacktrace = `git annex fsck --fast --from ceph01`
raise "Fetch failed for #{application} for #{path}" if stacktrace.include?('fail')
valid = true if stacktrace.end_with?("ok\n(recording state in git...)\n")
end
when 'openn_rsync'
raise "Cannot verify zip content data in #{todo_data[:manifest_data]}" if (todo_data[:manifest_data] || []).empty?
dest = todo_data[:verification_destination]
archive = todo_data[:compressed_destination]
unzip_archive(archive, dest)
Dir.chdir(dest) do
todo_data[:manifest_data].each do |hash|
manifest_valid = Checksum.validate_file(hash[:manifest_sha256], hash[:manifest], ARCHIVE_CHECKSUM_ALGORITHM)
if manifest_valid
valid &&= verify_manifest(dest, hash[:manifest], hash[:algorithm])
else
LOGGER.error("Decompressed manifest not valid for #{todo_data[:todo_base]} (#{hash[:manifest]})")
valid = false
end
end
end
end
return valid
end
def glacier_transfer(todo_data)
glacier_secrets = SecretsManager.load_secrets('secrets/glacier.secret')
SecretsManager.set(glacier_secrets)
validate_transfer_data todo_data
file_path = todo_data[:compressed_destination]
archive_description = todo_data[:glacier_description]
vault_id = todo_data[:glacier_vault]
chunk_size = ChunkSizer.calculate(File.stat(file_path).size)
client = Stronghold::Client.new(:multipart_chunk_size => chunk_size)
backup_ids = client.create_backup vault_id, file_path, archive_description
SecretsManager.unset(glacier_secrets)
return backup_ids
end
##
# Change to directory +package_directory+, and validate its contents using
# +manifest+ and +algorithm+.
#
# Manifest must be in the following format, with paths to the files to check
# *relative to the package directory*.
#
# 4442df919c38bb1a180c9aae4250ce22 data/W.745/sap/W745_000001_sap.jpg
# ce5a9ec44b527093a1c694c070549984 data/W.745/master/W745_000001_1200.tif
# 8b8e816cf55cae00f3410353d0b88e26 data/metadata.xml
# a645f8f04dc7dc7b9112fa0244a81a3f data/W.745/300/W745_000001_300.tif
# c475516ef7787384cc4cb595f6d3c97a data/W.745/thumb/W745_000001_thumb.jpg
#
# IMPORTANT: Note that this method uses +Dir.chdir+ to switch to
# +package_directory+ and performs the check in that context. The manifest
# paths must be relative to it. The path to +manifest+ may be absolute or,
# <i>if the manifest is in the package directory</i>, relative to the
# +package_directory+.
#
# @param package_directory [String] path to the fetched source
# @param manifest [String] path to the checksum manifest
# @param algorithm [String] algorithm to use check +package_directory+ contents
# @return [Boolean] true if the whole manifest passes
def verify_manifest(package_directory, manifest, algorithm)
valid = true
message_io = STDOUT
Dir.chdir(package_directory) do
LOGGER.info("Validating manifest #{manifest}")
valid = Checksum::validate_manifest(manifest, algorithm, message_io: message_io)
end
valid
end
def verify_compressed_archive?(value)
value.to_s.strip.downcase == 'true'
end
##
# Validate the +todo_data+. Ensure that required values are present and that
# the +verify_compressed_archive+ and +verification_destination+ values are
# valid.
#
# Required values are:
#
# :todo_base
# :source
# :workspace
# :compressed_destination
# :glacier_description
# :glacier_vault
# :application
# :method
#
# +verify_compressed_archive+ value must be blank (+nil+ or +''+), +false+,
# or +true+. If +verify_compressed_archive+ is +true+,
# +verification_destination+ is required.
#
def validate_todo_data(todo_data)
##----------------
## Required values
##----------------
required = %i{ todo_base
source
workspace
compressed_destination
glacier_description
glacier_vault
application
method
}
missing = required.select { |key| todo_data[key].to_s.strip.empty? }.map(&:inspect)
raise "Required todo values missing: #{missing.join(', ')}" unless missing.empty?
##------------------------------------
## Verify compressed archived checking
##------------------------------------
verify = todo_data[:verify_compressed_archive].to_s.strip.downcase
return if ['', 'false'].include?(verify) # No verification to do; end validation
# We have a non-false verify value; unless it's 'true', it's not a valid value.
raise ":verify_compressed_archive must be '', 'true'. or 'false'; got: '#{verify}'" unless verify == 'true'
# ':verify_compressed_archive' is true; make sure :verification_destination
# is defined.
if todo_data[:verification_destination].to_s.strip.empty?
raise "':verification_destination' must be supplied if ':verify_compressed_archive' is 'true'"
end
end
def validate_transfer_data(todo_data)
%i{ compressed_destination glacier_description glacier_vault }.each do |key|
raise ArgumentError, "Required transfer value not present #{key.inspect}: " \
"#{todo_data}" unless todo_data[key]
end
unless File.exist? todo_data[:compressed_destination]
raise ArgumentError, "Cannot find :compressed_destination: " \
"#{todo_data[:compressed_destination]}"
end
end
def load_data(todo_file)
begin
YAML::load todo_file
rescue
raise ArgumentError, "Unable to read todo_file as YAML: #{todo_file.path}"
end
end
Zip.write_zip64_support = true
def unzip_archive archive, dest
unzip_dir = File.join(dest, File.basename(archive, File.extname(archive)))
raise "Refusing to unzip to existing directory: #{unzip_dir}" if File.exist?(unzip_dir)
Dir.mkdir(dest) unless File.exist?(dest)
# adapted from https://stackoverflow.com/a/19754884
Zip::File.open(archive) do |zip_file|
zip_file.each do |f|
full_path = File.join(dest, f.name)
LOGGER.debug("Extracting #{f.name} to #{full_path}")
FileUtils.mkdir_p(File.dirname(full_path))
zip_file.extract(f, full_path) unless File.exist?(full_path)
end
end
end
MANIFEST_MAP = {
'manifest-sha1.txt' => 'sha1',
'manifest-sha256.txt' => 'sha256',
'manifest-sha384.txt' => 'sha384',
'manifest-sha512.txt' => 'sha512',
'manifest-md5.txt' => 'md5'
}.freeze
##
# Look in +path+ for the expected types of OPenn manifest (found in
# {MANIFEST_MAP}).
#
# == Example:
#
# find_openn_manifests('/path/to/W745') # => { 'manifest-md5.txt' => 'md5' }
#
# @param path [String] path to the fetched OPenn object
# @return [Hash]
def find_openn_manifests(path)
configs = [] # found pairs of manifests and algorithms
Dir.chdir(path) do
MANIFEST_MAP.each do |manifest, algorithm|
if File.file?(manifest)
manifest_sha256 = Checksum.get_checksum(manifest, 'sha256')
configs << { manifest: manifest, algorithm: algorithm, manifest_sha256: manifest_sha256 }
end
end
end
return configs
end
def remove_zip_artifacts(todo_data)
return if todo_data[:cleanup_directories].nil?
# TODO: Make sure data[:workspace] is deleted
todo_data[:cleanup_directories].split('|').each do |dir|
next unless File.exist?(dir)
LOGGER.debug("Processing for removal: #{dir}")
Dir.chdir(dir) do
(Dir.entries('.') - %w(. ..)).each do |path|
LOGGER.debug("Removal entry is: #{path}")
git_annex_drop(path) if todo_data[:method] == 'gitannex' && File.extname(path) == '.git'
LOGGER.debug("Removing: #{path}")
FileUtils.rm_rf(path, :secure => true)
end
end
LOGGER.debug("Removing: #{dir}")
FileUtils.rmdir(dir)
end
end
def update_glacier_description(manifest_data, value_hash = {})
return manifest_data if value_hash.empty?
glacier_description = JSON.parse(manifest_data[:glacier_description])
glacier_description.update(value_hash)
manifest_data[:glacier_description] = glacier_description.to_json
manifest_data
end
def git_annex_drop(path)
Dir.chdir(path) do
`git annex drop --all --force`
end
end
def update_fort_db(values_hash, vault_name)
db = SecretsManager.load_secrets('secrets/db.secret')
ActiveRecord::Base.establish_connection(
:adapter => db['MYSQL_ADAPTER'],
:host => db['MYSQL_HOST'],
:username=> db['MYSQL_USER'],
:password=> db['MYSQL_PASSWORD'],
:database => db['MYSQL_DATABASE']
)
values_hash.each do |key, value|
GlacierArchive.create(:description => value, :archive_id => key, :vault => vault_name)
end
SecretsManager.unset(db)
end
guardian = SecretsManager.load_secrets('secrets/guardian.secret')
start :validate_todo_file
task :validate_todo_file, on_fail: :FAIL, next_step: :fetch_source do |todo_file|
begin
data = YAML.load_file todo_file
validate_todo_data(data)
true
rescue Exception => ex
LOGGER.fatal("ERROR: #{ex.message}")
LOGGER.debug(ex.backtrace.map { |s| "\tfrom #{s}" }.join("\n"))
false
end
end
task :fetch_source, on_fail: :FAIL, next_step: :verify_fetch do |todo_file|
begin
data = YAML.load todo_file
LOGGER.info("Creating workspace for #{data[:todo_base]}: '#{data[:workspace]}'")
prep_workspace(data)
LOGGER.info("Fetching archive for #{data[:todo_base]}...")
fetched_source = fetch_source(data[:source], data[:workspace], data[:application], data[:method])
# save the directory to zip to the todo file (used in next step)
data[:fetched_source] = fetched_source
todo_file.rewrite(data.to_yaml)
true
rescue Exception => ex
LOGGER.fatal("ERROR: #{ex.message}")
LOGGER.debug(ex.backtrace.map { |s| "\tfrom #{s}" }.join("\n"))
false
end
end
task :verify_fetch, on_fail: :FAIL, next_step: :zip do |todo_file|
begin
data = YAML.load todo_file
LOGGER.info("Verifying fetched data for #{data[:todo_base]}...")
valid = verify_fetch(data)
if valid
LOGGER.info("Fetched data verified :#{data[:fetched_source]}")
else
LOGGER.error("Verification failure: #{data[:fetched_source]}")
end
# if we've added manifest_data, rewrite the todo file
todo_file.rewrite(data.to_yaml) unless (data[:manifest_data] || []).empty?
valid
rescue Exception => ex
LOGGER.fatal("ERROR: #{ex.message}")
LOGGER.debug(ex.backtrace.map { |s| "\tfrom #{s}" }.join("\n"))
false
end
end
task :zip, on_fail: :FAIL, next_step: :verify_zip do |todo_file|
begin
data = YAML.load todo_file
LOGGER.info("Assembling archive for #{data[:todo_base]}...")
zip_package(data)
LOGGER.info('Archive assembled.')
checksum = Checksum.get_checksum(data[:compressed_destination], ARCHIVE_CHECKSUM_ALGORITHM)
LOGGER.info("Zip checksum: #{checksum}")
update_glacier_description(data, {archive_checksum: checksum, archive_checksum_algorithm: ARCHIVE_CHECKSUM_ALGORITHM})
todo_file.rewrite(data.to_yaml)
true
rescue Exception => ex
LOGGER.fatal("ERROR: #{ex.message}")
LOGGER.debug(ex.backtrace.map { |s| "\tfrom #{s}" }.join("\n"))
false
end
end
task :verify_zip, on_fail: :FAIL, next_step: :glacier do |todo_file|
begin
data = YAML.load todo_file
LOGGER.info("Verifying zipped archive for #{data[:todo_base]}")
if verify_compressed_archive?(data[:verify_compressed_archive])
valid = verify_zip(data)
if valid
LOGGER.info("Verified zip contents for #{data[:todo_base]}")
update_glacier_description(data, { archive_contents_verified: true })
todo_file.rewrite(data.to_yaml)
else
LOGGER.error("Invalid contents found in zip for #{data[:todo_base]}")
end
# implicitly return zip validity
valid
else
# we're not verifying; just continue
true
end
rescue Exception => ex
LOGGER.fatal("ERROR: #{ex.message}")
LOGGER.debug(ex.backtrace.map { |s| "\tfrom #{s}" }.join("\n"))
false
end
end
task :glacier, on_fail: :FAIL, next_step: :SUCCESS do |todo_file|
begin
data = load_data(todo_file)
LOGGER.info("Initializing transfer to Glacier for #{data[:todo_base]}...")
transfer_values = glacier_transfer(data)
LOGGER.info("Transfer complete. Updating database with transfer information...")
update_fort_db(transfer_values, data[:glacier_vault])
LOGGER.info('Database updated.')
LOGGER.info('Removing zip artifacts...')
remove_zip_artifacts(data)
LOGGER.info("Artifacts removed. Fetch and transfer of #{data[:todo_base]} complete.")
true
rescue Exception => ex
LOGGER.fatal("ERROR: #{ex.message}")
LOGGER.debug(ex.backtrace.map { |s| "\tfrom #{s}" }.join("\n"))
false
end
end
end
LOGGER.info('Todo Runner initialized, starting run...')
TodoRunner.run(*ARGV)
LOGGER.info('Run complete.')