forked from mudge/python-delicious
-
Notifications
You must be signed in to change notification settings - Fork 18
/
pinboard.py
638 lines (556 loc) · 25.2 KB
/
pinboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
#!/usr/bin/env python
"""Python-Pinboard
Python module for access to pinboard <http://pinboard.in/> via its API.
Recommended: Python 2.6 or later (untested on previous versions)
This library was built on top of Paul Mucur's original work on the python-delicious
which was supported for python 2.3. Morgan became a contributor and ported this library
to pinboard.in when it was announced in December 2010 that delicious servers may be
shutting down.
The port to pinboard resulted in the inclusion of gzip support
"""
__version__ = "1.0"
__license__ = "BSD"
__copyright__ = "Copyright 2011, Morgan Craft"
__author__ = "Morgan Craft <http://www.morgancraft.com/>"
#TODO:
# Should text be properly escaped for XML? Or that not this module's
# responsibility?
# Create test suite
_debug = False
# The user agent string sent to pinboard.in when making requests. If you are
# using this module in your own application, you should probably change this.
USER_AGENT = "Python-Pinboard/%s +http://morgancraft.com/service_layer/python-pinboard/" % __version__
import urllib
import urllib2
import sys
import re
import time
## added to handle gzip compression from server
import StringIO
import gzip
from xml.dom import minidom
try:
StringTypes = basestring
except:
try:
# Python 2.2 does not have basestring
from types import StringTypes
except:
# Python 2.0 and 2.1 do not have StringTypes
from types import StringType, UnicodeType
StringTypes = None
try:
ListType = list
TupleType = tuple
except:
from types import ListType, TupleType
# Taken from Mark Pilgrim's amazing Universal Feed Parser
# <http://feedparser.org/>
try:
UserDict = dict
except NameError:
from UserDict import UserDict
try:
import datetime
except:
datetime = None
# The URL of the Pinboard API
PINBOARD_API = "https://api.pinboard.in/v1"
AUTH_HANDLER_REALM = 'API'
AUTH_HANDLER_URI = "https://api.pinboard.in/"
def open(username=None, password=None, token=None):
"""Open a connection to a pinboard.in account
Arguments:
username -- pinboard.in user name; for canonical authentication
both user and password should be specified
password -- pinboard.in password
token -- API token; username and password will be ignored
if the token is defined
Usage:
>>> open('johnd', 'secret$777')
>>> open(username='johnd', password='secret$777')
>>> open(token='johnd:258329B14EB83FD1E449')
Returns:
New pinboard.PinboardAccount instance."""
return PinboardAccount(username, password, token)
def connect(username=None, password=None, token=None):
"""Open a connection to a pinboard.in account
(alias for pinboard.open())."""
return open(username, password, token)
# Custom exceptions
class PinboardError(Exception):
"""Error in the Python-Pinboard module"""
pass
class ThrottleError(PinboardError):
"""Error caused by pinboard.in throttling requests"""
def __init__(self, url, message):
self.url = url
self.message = message
def __str__(self):
return "%s: %s" % (self.url, self.message)
class AddError(PinboardError):
"""Error adding a post to pinboard.in"""
pass
class DeleteError(PinboardError):
"""Error deleting a post from pinboard.in"""
pass
class BundleError(PinboardError):
"""Error bundling tags on pinboard.in"""
pass
class DeleteBundleError(PinboardError):
"""Error deleting a bundle from pinboard.in"""
pass
class RenameTagError(PinboardError):
"""Error renaming a tag in pinboard.in"""
pass
class DateParamsError(PinboardError):
'''Date params error'''
pass
class PinboardAccount(UserDict):
"""A pinboard.in account"""
# Used to track whether all posts have been downloaded yet.
__allposts = 0
__postschanged = 0
# Time of last request so that the one second limit can be enforced.
__lastrequest = None
# Pinboard API token
# (see http://blog.pinboard.in/2012/07/api_authentication_tokens/)
__token = None
# Special methods
def __init__(self, username=None, password=None, token=None):
UserDict.__init__(self)
# Authenticate the URL opener so that it can access Pinboard
if _debug:
sys.stderr.write("Initialising Pinboard Account object.\n")
if token:
self.__token = urllib.quote_plus(token)
opener = urllib2.build_opener()
else:
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password("API", "https://api.pinboard.in/", \
username, password)
opener = urllib2.build_opener(auth_handler)
opener.addheaders = [("User-agent", USER_AGENT), ('Accept-encoding', 'gzip')]
urllib2.install_opener(opener)
if _debug:
sys.stderr.write("URL opener with HTTP authenticiation installed globally.\n")
self["last_updated"] = self.last_update()
if _debug:
sys.stderr.write("Time of last update loaded into class dictionary.\n")
def __getitem__(self, key):
try:
return UserDict.__getitem__(self, key)
except KeyError:
if key == "tags":
return self.tags()
elif key == "dates":
return self.dates()
elif key == "posts":
return self.posts()
elif key == "bundles":
return self.bundles()
def __setitem__(self, key, value):
if key == "posts":
if _debug:
sys.stderr.write("The value of posts has been changed.\n")
self.__postschanged = 1
return UserDict.__setitem__(self, key, value)
def __request(self, url):
# Make sure that it has been at least 1 second since the last
# request was made. If not, halt execution for approximately one
# seconds.
if self.__lastrequest and (time.time() - self.__lastrequest) < 2:
if _debug:
sys.stderr.write("It has been less than two seconds since the last request; halting execution for one second.\n")
time.sleep(1)
if _debug and self.__lastrequest:
sys.stderr.write("The delay between requests was %d.\n" % (time.time() - self.__lastrequest))
self.__lastrequest = time.time()
if _debug:
sys.stderr.write("Opening %s.\n" % url)
if self.__token:
sep = '&' if '?' in url else '?'
url = "%s%sauth_token=%s" % (url, sep, self.__token)
try:
## for pinboard a gzip request is made
raw_xml = urllib2.urlopen(url)
compresseddata = raw_xml.read()
## bing unpackaging gzipped stream buffer
compressedstream = StringIO.StringIO(compresseddata)
gzipper = gzip.GzipFile(fileobj=compressedstream)
xml = gzipper.read()
except urllib2.URLError, e:
raise e
self["headers"] = {}
for header in raw_xml.headers.headers:
(name, value) = header.split(": ")
self["headers"][name.lower()] = value[:-2]
if raw_xml.headers.status == "429":
raise ThrottleError(url, \
"429 HTTP status code returned by pinboard.in")
if _debug:
sys.stderr.write("%s opened successfully.\n" % url)
return minidom.parseString(xml)
def last_update(self):
"""Return the last time that the pinboard account was updated."""
return self.__request("%s/posts/update" % \
PINBOARD_API).firstChild.getAttribute("time")
def posts(self, tag="", date="", todt="", fromdt="", count=0, offset=0, only_toread=False):
"""Return pinboard.in bookmarks as a list of dictionaries.
This should be used without arguments as rarely as possible by
combining it with the last_updated attribute to only get all posts when
there is new content as it places a large load on the pinboard.in
servers.
Keyword arguments:
tag -- only return posts with tag (default "")
date -- only return posts for a certain date (default "")
fromdt -- lower limit of date range (default "")
todt -- upper limit of date range (default "")
count -- number of posts to return, if 0 all posts are returned (default 0)
offset -- post to start returning after first post (default 0)
only_toread -- only return posts where toread="yes" (default False)
"""
query = {}
## if a date is passed then a ranged set of date params CANNOT be passed
if date and (todt or fromdt):
raise DateParamsError
if not count and not date and not todt and not fromdt and not tag:
path = "all"
# If attempting to load all of the posts from pinboard.in, and
# a previous download has been done, check to see if there has
# been an update; if not, then just return the posts stored
# inside the class.
if _debug:
sys.stderr.write("Checking to see if a previous download has been made.\n")
if not self.__postschanged and self.__allposts and \
self.last_update() == self["last_updated"]:
if _debug:
sys.stderr.write("It has; returning old posts instead.\n")
return self["posts"]
elif not self.__allposts:
if _debug:
sys.stderr.write("Making note of request for all posts.\n")
self.__allposts = 1
elif date:
path = "get"
elif todt or fromdt:
path = "all"
elif count and offset:
path = "all"
else:
path = "recent"
if count and not offset:
query["count"] = count
if count and offset:
query["start"] = offset
query["results"] = count
if tag:
query["tag"] = tag
##todt
if todt and (isinstance(todt, ListType) or isinstance(todt, TupleType)):
query["todt"] = "-".join([str(x) for x in todt[:3]])
elif todt and (todt and isinstance(todt, datetime.datetime) or \
isinstance(todt, datetime.date)):
query["todt"] = "-".join([str(todt.year), str(todt.month), str(todt.day)])
elif todt:
query["todt"] = todt
## fromdt
if fromdt and (isinstance(fromdt, ListType) or isinstance(fromdt, TupleType)):
query["fromdt"] = "-".join([str(x) for x in fromdt[:3]])
elif fromdt and (fromdt and isinstance(fromdt, datetime.datetime) or \
isinstance(fromdt, datetime.date)):
query["fromdt"] = "-".join([str(fromdt.year), str(fromdt.month), str(fromdt.day)])
elif fromdt:
query["fromdt"] = fromdt
if date and (isinstance(date, ListType) or isinstance(date, TupleType)):
query["dt"] = "-".join([str(x) for x in date[:3]])
elif date and (datetime and isinstance(date, datetime.datetime) or \
isinstance(date, datetime.date)):
query["dt"] = "-".join([str(date.year), str(date.month), str(date.day)])
elif date:
query["dt"] = date
postsxml = self.__request("%s/posts/%s?%s" % (PINBOARD_API, path, \
urllib.urlencode(query))).getElementsByTagName("post")
posts = []
if _debug:
sys.stderr.write("Parsing posts XML into a list of dictionaries.\n")
# For each post, extract every attribute (splitting tags into sub-lists)
# and insert as a dictionary into the `posts` list.
for post in postsxml:
postdict = {}
for (name, value) in post.attributes.items():
if name == u"tag":
name = u"tags"
value = value.split(" ")
if name == u"time":
postdict[u"time_parsed"] = time.strptime(value, "%Y-%m-%dT%H:%M:%SZ")
postdict[name] = value
# If only_toread = True and post['toread'] = "yes", return post.
# If only_toread = False, disregard post['toread'] property.
if self.has_key("posts") and isinstance(self["posts"], ListType) \
and postdict not in self["posts"] \
and not only_toread or (only_toread and ("toread" in postdict) and postdict["toread"] == "yes"):
self["posts"].append(postdict)
if not only_toread or (only_toread and "toread" in postdict and postdict["toread"] == "yes"):
posts.append(postdict)
if _debug:
sys.stderr.write("Inserting posts list into class attribute.\n")
if not self.has_key("posts"):
self["posts"] = posts
if _debug:
sys.stderr.write("Resetting marker so module doesn't think posts has been changed.\n")
self.__postschanged = 0
return posts
def suggest(self, url):
query = {'url': url}
tags = self.__request("%s/posts/suggest?%s" % (PINBOARD_API, urllib.urlencode(query)))
popular = [t.firstChild.data for t in tags.getElementsByTagName('popular')]
recommended = [t.firstChild.data for t in tags.getElementsByTagName('recommended')]
return {'popular': popular, 'recommended': recommended}
def tags(self):
"""Return a dictionary of tags with the number of posts in each one"""
tagsxml = self.__request("%s/tags/get?" % \
PINBOARD_API).getElementsByTagName("tag")
tags = []
if _debug:
sys.stderr.write("Parsing tags XML into a list of dictionaries.\n")
for tag in tagsxml:
tagdict = {}
for (name, value) in tag.attributes.items():
if name == u"tag":
name = u"name"
elif name == u"count":
value = int(value)
tagdict[name] = value
if self.has_key("tags") and isinstance(self["tags"], ListType) \
and tagdict not in self["tags"]:
self["tags"].append(tagdict)
tags.append(tagdict)
if _debug:
sys.stderr.write("Inserting tags list into class attribute.\n")
if not self.has_key("tags"):
self["tags"] = tags
return tags
def bundles(self):
"""Return a dictionary of all bundles"""
bundlesxml = self.__request("%s/tags/bundles/all" % \
PINBOARD_API).getElementsByTagName("bundle")
bundles = []
if _debug:
sys.stderr.write("Parsing bundles XML into a list of dictionaries.\n")
for bundle in bundlesxml:
bundledict = {}
for (name, value) in bundle.attributes.items():
bundledict[name] = value
if self.has_key("bundles") and isinstance(self["bundles"], ListType) \
and bundledict not in self["bundles"]:
self["bundles"].append(bundledict)
bundles.append(bundledict)
if _debug:
sys.stderr.write("Inserting bundles list into class attribute.\n")
if not self.has_key("bundles"):
self["bundles"] = bundles
return bundles
def dates(self, tag=""):
"""Return a dictionary of dates with the number of posts at each date"""
if tag:
query = urllib.urlencode({"tag":tag})
else:
query = ""
datesxml = self.__request("%s/posts/dates?%s" % \
(PINBOARD_API, query)).getElementsByTagName("date")
dates = []
if _debug:
sys.stderr.write("Parsing dates XML into a list of dictionaries.\n")
for date in datesxml:
datedict = {}
for (name, value) in date.attributes.items():
if name == u"date":
datedict[u"date_parsed"] = time.strptime(value, "%Y-%m-%d")
elif name == u"count":
value = int(value)
datedict[name] = value
if self.has_key("dates") and isinstance(self["dates"], ListType) \
and datedict not in self["dates"]:
self["dates"].append(datedict)
dates.append(datedict)
if _debug:
sys.stderr.write("Inserting dates list into class attribute.\n")
if not self.has_key("dates"):
self["dates"] = dates
return dates
# Methods to modify pinboard.in content
def add(self, url, description, extended="", tags=(), date="", toread="no", replace="no", shared="yes"):
"""Add a new post to pinboard.in"""
query = {}
query["url"] = url
query["description"] = description
query["toread"] = toread
query["replace"] = replace
query["shared"] = shared
if extended:
query["extended"] = extended
if tags and (isinstance(tags, TupleType) or isinstance(tags, ListType)):
query["tags"] = " ".join(tags)
elif tags and (StringTypes and isinstance(tags, StringTypes)) or \
(not StringTypes and (isinstance(tags, StringType) or \
isinstance(tags, UnicodeType))):
query["tags"] = tags
# This is a rather rudimentary way of parsing date strings into
# ISO8601 dates: if the date string is shorter than the required
# 20 characters then it is assumed that it is a partial date
# such as "2005-3-31" or "2005-3-31T20:00" and it is split into a
# list along non-numerals. Empty elements are then removed
# and then this is passed to the tuple/list case where
# the tuple/list is padded with necessary 0s and then formatted
# into an ISO8601 date string. This does not take into account
# time zones.
if date and (StringTypes and isinstance(date, StringTypes)) or \
(not StringTypes and (isinstance(date, StringType) or \
isinstance(date, UnicodeType))) and len(date) < 20:
date = re.split("\D", date)
while '' in date:
date.remove('')
if date and (isinstance(date, ListType) or isinstance(date, TupleType)):
date = list(date)
if len(date) > 2 and len(date) < 6:
for i in range(6 - len(date)):
date.append(0)
query["dt"] = "%.4d-%.2d-%.2dT%.2d:%.2d:%.2dZ" % tuple(date)
elif date and (datetime and (isinstance(date, datetime.datetime) \
or isinstance(date, datetime.date))):
query["dt"] = "%.4d-%.2d-%.2dT%.2d:%.2d:%.2dZ" % date.utctimetuple()[:6]
elif date:
query["dt"] = date
try:
response = self.__request("%s/posts/add?%s" % (PINBOARD_API, \
urllib.urlencode(query)))
if response.firstChild.getAttribute("code") != u"done":
raise AddError
if _debug:
sys.stderr.write("Post, %s (%s), added to pinboard.in\n" \
% (description, url))
except:
if _debug:
sys.stderr.write("Unable to add post, %s (%s), to pinboard.in\n" \
% (description, url))
def bundle(self, bundle, tags):
"""Bundle a set of tags together"""
query = {}
query["bundle"] = bundle
if tags and (isinstance(tags, TupleType) or isinstance(tags, ListType)):
query["tags"] = " ".join(tags)
elif tags and isinstance(tags, StringTypes):
query["tags"] = tags
try:
response = self.__request("%s/tags/bundles/set?%s" % (PINBOARD_API, \
urllib.urlencode(query)))
if response.firstChild.getAttribute("code") != u"done":
raise BundleError
if _debug:
sys.stderr.write("Tags, %s, bundled into %s.\n" \
% (repr(tags), bundle))
except:
if _debug:
sys.stderr.write("Unable to bundle tags, %s, into %s to pinboard.in\n" \
% (repr(tags), bundle))
def delete(self, url):
"""Delete post from pinboard.in by its URL"""
try:
response = self.__request("%s/posts/delete?%s" % (PINBOARD_API, \
urllib.urlencode({"url":url})))
if response.firstChild.getAttribute("code") != u"done":
raise DeleteError
if _debug:
sys.stderr.write("Post, %s, deleted from pinboard.in\n" \
% url)
except:
if _debug:
sys.stderr.write("Unable to delete post, %s, from pinboard.in\n" \
% url)
def delete_bundle(self, name):
"""Delete bundle from pinboard.in by its name"""
try:
response = self.__request("%s/tags/bundles/delete?%s" % (PINBOARD_API, \
urllib.urlencode({"bundle":name})))
if response.firstChild.getAttribute("code") != u"done":
raise DeleteBundleError
if _debug:
sys.stderr.write("Bundle, %s, deleted from pinboard.in\n" \
% name)
except:
if _debug:
sys.stderr.write("Unable to delete bundle, %s, from pinboard.in\n" \
% name)
def rename_tag(self, old, new):
"""Rename a tag"""
query = {"old":old, "new":new}
try:
response = self.__request("%s/tags/rename?%s" % (PINBOARD_API, \
urllib.urlencode(query)))
if response.firstChild.getAttribute("code") != u"done":
raise RenameTagError
if _debug:
sys.stderr.write("Tag, %s, renamed to %s\n" \
% (old, new))
except:
if _debug:
sys.stderr.write("Unable to rename %s tag to %s in pinboard.in\n" \
% (old, new))
def delete_tag(self, name):
"""Delete a tag from pinboard.in by its name"""
try:
response = self.__request("%s/tags/delete?%s" % (PINBOARD_API, \
urllib.urlencode({"tag":name})))
if response.firstChild.getAttribute("code") != u"done":
raise DeleteBundleError
if _debug:
sys.stderr.write("Tag, %s, deleted from pinboard.in\n" \
% name)
except:
if _debug:
sys.stderr.write("Unable to delete tag, %s, from pinboard.in\n" \
% name)
if __name__ == "__main__":
if sys.argv[1:][0] == '-v' or sys.argv[1:][0] == '--version':
print __version__
#REVISION HISTORY
## leaving as legacy for now, this should probably removed now for pinboard.in
#0.1 - 29/3/2005 - PEM - Initial version.
#0.2 - 30/3/2005 - PEM - Now using urllib's urlencode to handle query building
# and the class now extends dict (or failing that: UserDict).
#0.3 - 30/3/2005 - PEM - Rewrote doc strings and improved the metaphor that the
# account is a dictionary by adding posts, tags and dates to the account
# object when they are called. This has the added benefit of reducing
# requests to delicious as one need only call posts(), dates() and tags()
# once and they are stored inside the class instance until deletion.
#0.4 - 30/3/2005 - PEM - Added private __request method to handle URL requests
# to del.icio.us and implemented throttle detection.
#0.5 - 30/3/2005 - PEM - Now implements every part of the API specification
#0.6 - 30/3/2005 - PEM - Heavily vetted code to conform with PEP 8: use of
# isinstance(), use of `if var` and `if not var` instead of comparison to
# empty strings and changed all string delimiters to double primes for
# consistency.
#0.7 - 31/3/2005 - PEM - Made it so that when a fetching operation such as
# posts() or tags() is used, only new posts are added to the class dictionary
# in part to increase efficiency and to prevent, say, an all posts call of
# posts() being overwritten by a specific request such as posts(tag="ruby")
# Added more intelligent date handling for adding posts; will now attempt to
# format any *reasonable* string, tuple or list into an ISO8601 date. Also
# changed the command to get the lastupdate as it was convoluted. The
# all posts command now checks to see if del.icio.us has been updated since
# it was last called, again, this is to reduce the load on the servers and
# increase speed a little. Changed the version string to a pre-1.0 release
# Subversion-generated one because I am lazy.
#0.8 - 1/4/2005 - PEM - Improved intelligence of posts caching: will only
# re-download all posts if the posts attribute has been changed. Added
# the mandatory delay between requests of at least one second. Changed the
# crude string replace method to encode ampersands with a more intelligent
# regular expression.
#0.9 - 2/4/2005 - PEM - Now uses datetime objects when possible.
#0.10 - 4/4/2005 - PEM - Uses the time module when the datetime module is
# unavailable (such as versions of Python prior to 2.3). Now uses time
# tuples instead of datetime objects when outputting for compatibility and
# consistency. Time tuples are a new attribute: "date_parsed", with the
# original string format of the date (or datetime) in "date" etc. Now stores
# the headers of each request.