Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ruby upgrade and v2 changes #12

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ruby-version
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
2.7.1
3.1.6

2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
source "https://rubygems.org"

gem "datura", git: "https://github.com/CDRH/datura.git", tag: "v0.2.0-beta"
gem "datura", git: "https://github.com/CDRH/datura.git", branch: "dev"

52 changes: 35 additions & 17 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,35 +1,53 @@
GIT
remote: https://github.com/CDRH/datura.git
revision: 3748b4ade4fb78b79ec830eb8fcf308fdb805710
tag: v0.2.0-beta
revision: 0b0368e26608675698c2c9179e7e897936fb7384
branch: dev
specs:
datura (0.2.0.pre.beta)
datura (0.2.0)
byebug (~> 11.0)
colorize (~> 0.8.1)
nokogiri (~> 1.8)
rest-client (~> 2.0.2)
nokogiri (~> 1.10)
pdf-reader (~> 2.12)
rest-client (~> 2.1)

GEM
remote: https://rubygems.org/
specs:
Ascii85 (1.1.1)
afm (0.2.2)
bigdecimal (3.1.8)
byebug (11.1.3)
colorize (0.8.1)
domain_name (0.5.20190701)
unf (>= 0.0.5, < 1.0.0)
http-cookie (1.0.3)
domain_name (0.6.20240107)
hashery (2.1.2)
http-accept (1.7.0)
http-cookie (1.0.7)
domain_name (~> 0.5)
mime-types (3.3.1)
logger (1.6.1)
mime-types (3.6.0)
logger
mime-types-data (~> 3.2015)
mime-types-data (3.2020.0512)
mini_portile2 (2.4.0)
mime-types-data (3.2024.1001)
mini_portile2 (2.8.7)
netrc (0.11.0)
nokogiri (1.10.10)
mini_portile2 (~> 2.4.0)
rest-client (2.0.2)
nokogiri (1.16.7)
mini_portile2 (~> 2.8.2)
racc (~> 1.4)
pdf-reader (2.12.0)
Ascii85 (~> 1.0)
afm (~> 0.2.1)
hashery (~> 2.0)
ruby-rc4
ttfunk
racc (1.8.1)
rest-client (2.1.0)
http-accept (>= 1.7.0, < 2.0)
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
unf (0.1.4)
unf_ext
unf_ext (0.0.7.7)
ruby-rc4 (0.1.5)
ttfunk (1.8.0)
bigdecimal (~> 3.1)

PLATFORMS
ruby
Expand Down
26 changes: 15 additions & 11 deletions scripts/overrides/csv_to_es_gallery.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ def array_to_string (array,sep)
return array.map { |i| i.to_s }.join(sep)
end

def build_image_id
built_image_id = []
built_image_id << @row["identifier"]
built_image_id << ".jpg"
return array_to_string(built_image_id,"")
def build_cover_image
built_cover_image = []
built_cover_image << @row["identifier"]
built_cover_image << ".jpg"
return array_to_string(built_cover_image,"")
end


Expand All @@ -30,7 +30,7 @@ def page_data
def assemble_collection_specific
@json["ethnicgroup_k"] = ethnicgroup_data
@json["pages_k"] = page_data
@json["format_k"] = "image"
#@json["format_k"] = "image"
end

# ethnic groups go in ethnicgroup_k field
Expand Down Expand Up @@ -67,22 +67,26 @@ def topics
end
# end themes

def image_id
build_image_id
def cover_image
build_cover_image
end

def category
"Gallery"
end

def subcategory
def category2
@row["subtype"].capitalize()
end

def date_display
@row["date_display"]
end

def format
"image"
end

# person, creator.name
def creator
if @row["creator.name"]
Expand Down Expand Up @@ -114,9 +118,9 @@ def person
end
end

def source
def has_source
if @row["source"]
@row["source"]
{ "title" => @row["source"] }
end
end

Expand Down
2 changes: 1 addition & 1 deletion scripts/overrides/csv_to_es_personography.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def category
"People"
end

def subcategory
def category2
"Person"
end

Expand Down
80 changes: 57 additions & 23 deletions scripts/overrides/tei_to_es.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def preprocessing
# do something after pulling the fields
def preprocessing
people_file_location = File.join(@options["collection_dir"], "source/csv/personography.csv")
@people = CSV.read(people_file_location, {
@people = CSV.read(people_file_location, **{
encoding: "utf-8",
headers: true,
return_headers: true
Expand All @@ -78,16 +78,16 @@ def assemble_collection_specific
@json["religion_k"] = get_list(@xpaths["religion"])
@json["person_selected_k"] = build_selected_person
@json["person_sender_k"] = build_sender

@json["format_k"] = build_format
@json["title_a_k"] = get_text(@xpaths["title_a"])
@json["title_m_k"] = get_text(@xpaths["title_m"])
@json["title_j_k"] = get_text(@xpaths["title_j"])
# some of the below moved to citation field
# @json["title_a_k"] = get_text(@xpaths["title_a"])
# @json["title_m_k"] = get_text(@xpaths["title_m"])
# @json["title_j_k"] = get_text(@xpaths["title_j"])
@json["author_cite_k"] = get_text(@xpaths["creator"])
@json["volume_k"] = get_text(@xpaths["volume"])
@json["pages_k"] = get_text(@xpaths["pages"])
@json["issue_k"] = get_text(@xpaths["issue"])
@json["pub_place_k"] = get_text(@xpaths["pub_place"])
#@json["volume_k"] = get_text(@xpaths["volume"])
#@json["pages_k"] = get_text(@xpaths["pages"])
#@json["issue_k"] = get_text(@xpaths["issue"])
#@json["pub_place_k"] = get_text(@xpaths["pub_place"])
# date field could be moved to citation field but
@json["pub_date_k"] = get_text(@xpaths["pub_date"])
@json["pub_date2_k"] = get_text(@xpaths["pub_date2"])
end
Expand All @@ -107,7 +107,7 @@ def build_selected_person
list = []
#people_in_doc = get_list(@xpaths["person"])

people_in_doc = get_list(@xpaths["person"]) + get_list(@xpaths["sender"]) + get_list(@xpaths["recipient"]) + get_list(@xpaths["creator"])
people_in_doc = get_list(@xpaths["person"]).to_a + get_list(@xpaths["sender"]).to_a + get_list(@xpaths["recipient"]).to_a + get_list(@xpaths["creator"]).to_a



Expand Down Expand Up @@ -135,17 +135,17 @@ def build_sender

def build_format
formats = get_elements(@xpaths["bibliography"]).map do |ele|
if (get_text("bibl/title/@level", xml: ele).include? "a") && (get_text("bibl/title/@level", xml: ele).include? "m")
if (get_text("bibl/title[@level='m']/@type", xml: ele).include? "main")
if (get_text("bibl/title/@level", xml: ele).to_s.include? "a") && (get_text("bibl/title/@level", xml: ele).to_s.include? "m")
if (get_text("bibl/title[@level='m']/@type", xml: ele).to_s.include? "main")
"book"
elsif (get_text("bibl/title[@level='a']/@type", xml: ele).include? "main")
elsif (get_text("bibl/title[@level='a']/@type", xml: ele).to_s.include? "main")
"other"
elsif get_text(@xpaths["subcategory"]) == "Despatches"
"despatch"
else
"no format defined"
end
elsif (get_text("bibl/title/@level", xml: ele).include? "a") && (get_text("bibl/title/@level", xml: ele).include? "j")
elsif (get_text("bibl/title/@level", xml: ele).to_s.include? "a") && (get_text("bibl/title/@level", xml: ele).to_s.include? "j")
"article"
else
"no format defined"
Expand All @@ -158,14 +158,33 @@ def build_format
# Field Builders #
########################

def source
build_source
def has_source
{
"title" => build_source
}
end

def citation
puts get_text(@xpaths["pub_date"])
puts Datura::Helpers.date_standardize(get_text(@xpaths["pub_date"]), false)
{
"title_a" => get_text(@xpaths["title_a"]),
"title_m" => get_text(@xpaths["title_m"]),
"title_j" => get_text(@xpaths["title_j"]),
"volume" => get_text(@xpaths["volume"]),
"page_start" => get_text(@xpaths["pages"]),
"issue" => get_text(@xpaths["issue"]),
"place" => get_text(@xpaths["pub_place"]),
"publisher" => get_text(@xpaths["publisher"])
# omitting date for now because it's not possible to parse all values
# "date" => Datura::Helpers.date_standardize(get_text(@xpaths["pub_date"]), false)
}
end

def person
combined_people_array = get_elements(@xpaths["person"]) + get_elements(@xpaths["sender"]) + get_elements(@xpaths["recipient"]) + get_elements(@xpaths["creator"])
eles = combined_people_array.map do |p|
if (get_text(".", xml: p) != "" && get_text(".", xml: p) != nil)
if (get_text(".", xml: p) != nil && get_text(".", xml: p) != nil)
{
"id" => get_text("@ref", xml: p),
"name" => get_text(".", xml: p),
Expand All @@ -182,7 +201,7 @@ def person
def recipient
eles = get_elements(@xpaths["recipient"]).map do |p|
persname = get_text(".", xml: p)
if persname != ""
if persname != nil
{
"id" => get_text("@id", xml: p),
"name" => persname,
Expand All @@ -198,9 +217,9 @@ def build_source
#format_k = build_format
title_j = get_text(@xpaths["title_j"])
title_m = get_text(@xpaths["title_m"])
if title_j != ""
if title_j != nil
source = title_j
elsif title_m != ""
elsif title_m != nil
source = title_m
else
source = "No source defined"
Expand All @@ -209,7 +228,6 @@ def build_source
end

def call_analysis_file(filename)

analysis_xml_file = @options["collection_dir"] + "/source/analysis/" + filename + ".xml"

if (File.exist?(analysis_xml_file))
Expand All @@ -222,8 +240,12 @@ def call_analysis_file(filename)

end

def format
build_format
end

def text
analysis = get_text(@xpaths["analysis_file"])
analysis = get_text(@xpaths["analysis_file"]).to_s
# puts analysis
analysis_text = call_analysis_file(analysis)

Expand All @@ -249,4 +271,16 @@ def text
Datura::Helpers.normalize_space(text_all.join(" "))
end

def spatial
places = []
if get_list(@xpaths["places"])
get_list(@xpaths["places"]).each do |place|
places << {
"short_name" => place
}
end
end
places
end

end