-
Notifications
You must be signed in to change notification settings - Fork 4
/
vuFind.pm
124 lines (92 loc) · 2.15 KB
/
vuFind.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package vuFind;
use warnings;
use strict;
use MARC::Record;
use Data::Dump qw/dump/;
use JSON::XS;
use Encode;
use base 'Scraper';
my $debug = $ENV{DEBUG} || 0;
sub diag {
warn "# ", @_, $/;
}
# Koha Z39.50 query:
#
# Bib-1 @and @and @and @and @and @and @and @or
# @attr 1=4 title
# @attr 1=7 isbn
# @attr 1=8 issn
# @attr 1=1003 author
# @attr 1=16 dewey
# @attr 1=21 subject-holding
# @attr 1=12 control-no
# @attr 1=1007 standard-id
# @attr 1=1016 any
sub usemap {{
4 => 'title',
7 => 'isn',
8 => 'isn',
1003 => 'author',
# 16 => '',
21 => 'subject',
# 12 => '',
# 1007 => '',
1016 => 'all',
RPN => {
And => '&bool[]=AND&',
Or => '&bool[]=OR&',
},
prefix_term => sub {
my ( $prefix, $term ) = @_;
return 'type[]=' . $prefix . '&lookfor[]=' . $term;
}
}};
sub search {
my ( $self, $query ) = @_;
die "need query" unless defined $query;
# http://catalog.hathitrust.org/Search/Home?lookfor=croatia%20AND%20zagreb&type=title
my $url = 'http://catalog.hathitrust.org/Search/Home?' . $query;
diag "get $url";
$self->mech->get( $url );
my $hits = 0;
if ( $self->mech->content =~ m{of\s*<span class="strong">(\d+)</span>\s*Results for}s ) {
$hits = $1;
} else {
diag "get't find results in ", $self->mech->content;
return;
}
diag "got $hits results";
$self->populate_records;
return $self->{hits} = $hits;
}
sub populate_records {
my ($self) = @_;
foreach my $link ( $self->mech->find_all_links( url_regex => qr{/Record/\d+} ) ) {
my $url = $link->url;
push @{ $self->{records} }, $url;
warn "## ++ $url\n";
}
}
sub next_marc {
my ($self,$format) = @_;
$format ||= 'marc';
my $url = shift @{ $self->{records} };
if ( ! $url ) {
diag "fetch next page";
$self->save_content;
$self->mech->follow_link( text_regex => qr/Next/ );
$self->populate_records;
$url = shift @{ $self->{records} };
if ( ! $url ) {
warn "ERROR no more results\n";
return;
}
}
my $id = $1 if $url =~ m{Record/(\d+)};
$self->mech->get( $url . '.mrc' );
my $marc = decode('utf-8', $self->mech->content );
$self->save_marc( "$id.marc", $marc );
$self->mech->back; # return to search results for next page
return $id;
}
1;