-
Notifications
You must be signed in to change notification settings - Fork 4
/
DPLA.pm
171 lines (130 loc) · 4.24 KB
/
DPLA.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
package DPLA;
use warnings;
use strict;
use MARC::Record;
use Data::Dump qw/dump/;
use JSON::XS;
use Encode;
use base 'Scraper';
my $debug = $ENV{DEBUG} || 0;
sub diag {
warn "# ", @_, $/;
}
# http://dp.la/dev/wiki/Item_API
#
# Base Fields: Mapping to a set of common terms
# Field name Field description
# dpla.keyword Almost all of a record's fields get copied to this field
# dpla.title The title and/or subtitle of the item. Exact matching.
# dpla.title_keyword The title and/or subtitle of the item. Keyword matching.
# dpla.creator The creator(s), contributor(s), editor(s), etc. of the item. Exact matching
# dpla.creator_keyword The creator(s), contributor(s), editor(s), etc. of the item. Keyword matching
# dpla.date The item's date of publication.
# dpla.description The item's description. This often includes the item's Table of Contents. Exact matching.
# dpla.description_keyword The item's description. This often includes the item's Table of Contents. Keyword matching.
# dpla.subject A catchall for subject information. LCSH, Dewey, and other tag related fields are copied to this field. Exact matching.
# dpla.subject_keyword A catchall for subject information. LCSH, Dewey, and other tag related fields are copied to this field. Keyword matching.
# dpla.publisher The name of the publisher. Exact matching.
# dpla.language The primary language of the item. Exact matching.
# dpla.isbn The item's ISBN. Exact matching.
# dpla.oclc The item's OCLC identifier. Exact matching.
# dpla.lccn The item's LCCN. Exact matching.
# dpla.call_num The item's call number. Exact matching.
# dpla.content_link A link to the item's content. Exact matching.
# dpla.contributor The contributing partner. Exact matching.
# dpla.resource_type The resource's type. Common values include item and collection. Exact matching.
# Koha Z39.50 query:
#
# Bib-1 @and @and @and @and @and @and @and @or
# @attr 1=4 title
# @attr 1=7 isbn
# @attr 1=8 issn
# @attr 1=1003 author
# @attr 1=16 dewey
# @attr 1=21 subject-holding
# @attr 1=12 control-no
# @attr 1=1007 standard-id
# @attr 1=1016 any
sub usemap {{
4 => 'dpla.title_keyword',
7 => 'dpla.isbn',
8 => 'dpla.keyword', # XXX fake
1003 => 'dpla.creator_keyword',
# 16 => '',
21 => 'dpla.subject',
# 12 => '',
# 1007 => '',
1016 => 'dpla.keyword',
RPN => {
And => '&',
Or => '&', # FIXME sigh, not really supported?
},
prefix_term => sub {
my ( $prefix, $term ) = @_;
return 'filter=' . $prefix . ':' . $term;
}
}};
sub search {
my ( $self, $query ) = @_;
die "need query" unless defined $query;
my $url = 'http://api.dp.la/v0.03/item/?' . $query;
diag "get $url";
my $mech = $self->mech;
$mech->get( $url );
my $json = decode_json $mech->content;
diag "# json = ", dump($json) if $debug;
my $hits = 0;
if ( exists $json->{num_found} ) {
$hits = $json->{num_found};
} else {
diag "get't find num_found in ", $mech->content;
return;
}
diag "got $hits results";
$self->{_json} = $json;
return $self->{hits} = $hits;
}
sub next_marc {
my ($self,$format) = @_;
$format ||= 'marc';
my $item = shift @{ $self->{_json}->{docs} };
my $marc = MARC::Record->new;
$marc->encoding('utf-8');
my $fields; # empty marc
foreach my $key ( sort keys %$item ) {
my $v = $item->{$key};
warn "# item ",dump( $key, $v ) if $debug;
if ( $key =~ m/^(\d\d\d)(\w)$/ ) {
my ($f,$sf) = ($1,$2);
# XXX do magic and unroll into proper MARC record
$v = [ $v ] unless ref $v eq 'ARRAY';
if ( $fields ) {
if ( $fields->[0]->[0] ne $f ) {
$marc->add_fields( @$fields );
warn "# add_fields ",dump($fields) if $debug;
$fields = undef;
}
}
foreach my $i ( 0 .. $#$v ) {
$fields->[$i]->[0] = $f;
$fields->[$i]->[1] = ' ';
$fields->[$i]->[2] = ' ';
push @{ $fields->[$i] }, $sf, decode('utf-8', $v->[$i]); # decode here fixes API encoding error
}
} else {
warn "# IGNORED: $key ", dump($item->{$key}), "\n";
}
}
$marc->add_fields( @$fields );
diag "# marc ", $marc->as_formatted;
warn dump( $marc->as_usmarc );
$self->mech->back; # return to search results for next page
my $id = $item->{'dpla.id'};
if ( ! $id ) {
warn "no dpla.id in ",dump($item);
return;
}
$self->save_marc( "$id.marc", $marc->as_usmarc );
return $id;
}
1;