-
Notifications
You must be signed in to change notification settings - Fork 2
/
autocatch.py
executable file
·282 lines (243 loc) · 8.81 KB
/
autocatch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#!/usr/bin/python
import os
import sys
import re
import datetime
import time
from time import sleep
import zipfile
import hashlib
import binascii
import mimetypes
from lxml import etree
import evernote.edam.type.ttypes as Types
from evernote.api.client import EvernoteClient
from evernote.api.client import NoteStore
from evernote.edam.error.ttypes import EDAMSystemException
from evernote.edam.error.ttypes import EDAMUserException
bad_title_name = "Note with removed title"
def plunk(_):
sys.stdout.write(_)
sys.stdout.flush()
def bale(note_file, e):
print "Sorry, I could not import note %s" % note_file.note
print " Error Message is " + str(e)
print " skipping..."
def get_zip_file_name():
zip_file_names = [zip_file_name for zip_file_name in os.listdir(".") \
if zip_file_name.startswith("Catch Notes") \
and zip_file_name.endswith(".zip")]
if not len(zip_file_names):
return None
return zip_file_names[0]
def extract_tags(content):
tags = re.findall(r'\B#\w+', content)
tags = list(set(tags))
return [tag[1:] for tag in tags]
def get_milliseconds_from_utc(utc):
dt = datetime.datetime.strptime(utc, "%Y%m%dT%H%M%SZ")
return int(time.mktime(dt.timetuple())) * 1000
def pause(seconds):
print
seconds += 10
original_wait = seconds
while seconds:
plunk("pushing data too fast for evernote, pausing for %s minutes and %s seconds \r" % (seconds/60, seconds % 60))
sleep(1)
seconds -= 1
plunk("paused for %s minutes and %s seconds because we were pushing data too fast for evernote\r" % (original_wait/60, original_wait % 60))
print
class NoteAlreadyExists(Exception):
pass
class NoteFile:
def __init__(self, file_name):
self.full_name = file_name
self.paths = file_name.split('/')
if len(self.paths) < 4:
raise Exception("not a note path: " + file_name)
self.space = self.paths[-3] if self.paths[-3] != "notes" else self.paths[-4]
self.note = self.paths[-2]
self.file = self.paths[-1]
if self.file in ("note.txt", "note.html"):
raise Exception(self.file + " files are not considered NoteFiles: " + file_name)
if self.file in ("notes.txt", "notes.html", "notes.enex"):
raise Exception("Not interested in notes files: " + file_name)
if self.file == "note.enex" and self.space == "All Notes":
raise Exception("Not interested in All Notes note.enex files: " + file_name)
class NoteImporter:
def __init__(self, token):
self.token = token
for sandbox in (True, False):
self.sandbox = sandbox
self.client = EvernoteClient(token=token, sandbox=sandbox)
try:
self.note_store = self.client.get_note_store()
self.notebooks = self.note_store.listNotebooks()
break
except EDAMSystemException as e:
if e.errorCode == 19:
pause(e.rateLimitDuration)
except:
if not sandbox:
print "Trouble accessing your evernote account with the provided developer token: '%s'" % token
raise
zip_file_name = get_zip_file_name()
if not zip_file_name:
raise Exception("I did not see an Catch Notes .zip file,\n please run the script in the directory of the exported Catch Notes .zip file")
self.zip_file = zipfile.ZipFile(zip_file_name, "r")
self.space_files = self.get_space_files()
self.already_createds = {}
def set_already_createds_for_notebook(self, guid):
filter = NoteStore.NoteFilter()
filter.notebookGuid = guid
spec = NoteStore.NotesMetadataResultSpec()
spec.includeCreated = True
note_start = 0
max_notes = 250
createds = []
while True:
noteinfos = self.note_store.findNotesMetadata(filter, note_start, note_start + max_notes, spec)
if not len(noteinfos.notes):
break
createds.extend([noteinfo.created for noteinfo in noteinfos.notes])
note_start += max_notes
self.already_createds[guid] = {}
for created in createds:
self.already_createds[guid][created] = True
def get_space_files(self):
space_files = {}
for file_name in self.zip_file.namelist():
try:
note_file = NoteFile(file_name)
except Exception as rejection:
continue
if not note_file.space in space_files:
space_files[note_file.space] = {}
if not note_file.note in space_files[note_file.space]:
space_files[note_file.space][note_file.note] = []
space_files[note_file.space][note_file.note].append(note_file)
return space_files
def create_notebook(self, space):
existing_notebooks = [notebook for notebook in self.notebooks if space == notebook.name]
if len(existing_notebooks):
self.set_already_createds_for_notebook(existing_notebooks[0].guid)
return existing_notebooks[0].guid, False
notebook = Types.Notebook()
notebook.name = space
notebook = self.note_store.createNotebook(notebook)
return notebook.guid, True
def create_note(self, notebook_guid, note_file, attachments, title_override=None):
data = self.zip_file.read(note_file.full_name)
info = etree.fromstring(data)
note = Types.Note()
note.notebookGuid = notebook_guid
note.title = title_override or \
info.xpath("//note/title")[0].text.replace("\t", " ").strip().encode('utf-8')
note.created = get_milliseconds_from_utc(info.xpath("//note/created")[0].text)
note.updated = get_milliseconds_from_utc(info.xpath("//note/updated")[0].text)
note.attributes = Types.NoteAttributes()
note.attributes.author = info.xpath("//note/note-attributes/author")[0].text.encode('utf-8')
content = info.xpath("//note/content")[0].text
if notebook_guid in self.already_createds \
and note.created in self.already_createds[notebook_guid]:
plunk('"')
raise NoteAlreadyExists
plunk(".")
if len(attachments):
plunk("@")
note.resources = [self.create_attachment(attachment) \
for attachment in attachments]
attachment_xml = "<br /><br />"
for attachment in note.resources:
hash_hex = binascii.hexlify(attachment.data.bodyHash)
attachment_xml += '<en-media type="%s" hash="%s"/>' % (attachment.mime, hash_hex)
content = content.replace("</en-note>", attachment_xml + "</en-note>")
note.content = content.encode('utf-8')
note.tagNames = extract_tags(content)
if len(note.tagNames):
plunk("#")
self.note_store.createNote(note)
return note
def create_attachment(self, attachment_note_file):
file_data = self.zip_file.read(attachment_note_file.full_name)
md5 = hashlib.md5()
md5.update(file_data)
hash = md5.digest()
data = Types.Data()
data.size = len(file_data)
data.bodyHash = hash
data.body = file_data
resource = Types.Resource()
resource.data = data
resource.fileName = attachment_note_file.file
resource.mime = mimetypes.guess_type(resource.fileName)[0]
if resource.fileName.endswith("3gpp"):
resource.mime = "audio/3gpp"
if resource.fileName.endswith("mp4"):
resource.mime = "audio/mp4"
return resource
def import_notes_into_evernote(self):
space_files = self.space_files
space_file_list = sorted(space_files.iterkeys())
for space in space_file_list:
if space == "All Notes":
continue
notebook_guid, is_new = self.create_notebook(space)
action = "Created" if is_new else "Continuing with"
print "%s '%s' Notebook and uploading its %s notes..." \
% (action, space, len(space_files[space]))
counter = 0
total_attachments = 0
total_tags = 0
note_files = space_files[space]
note_list = sorted(note_files.iterkeys())
for note in note_list:
if counter:
if counter % 5 == 0:
plunk(" ")
if counter % 25 == 0:
print
if counter % 100 == 0:
print
counter += 1
note_file = note_files[note][0]
attachments = []
if "All Notes" in space_files and note in space_files["All Notes"]:
attachments = space_files["All Notes"][note]
total_attachments += len(attachments)
try:
created_note = self.create_note(notebook_guid, note_file, attachments)
except NoteAlreadyExists:
continue
except EDAMUserException as e:
if e.errorCode == 2:
print
print "had trouble with title for note %s\n trying with '%s'" \
% (note_file.note, bad_title_name)
created_note = self.create_note(notebook_guid, note_file, attachments, title_override=bad_title_name)
else:
bale(note_file, e)
continue
except EDAMSystemException as e:
if e.errorCode == 19:
pause(e.rateLimitDuration)
created_note = self.create_note(notebook_guid, note_file, attachments)
else:
bale(note_file, e)
continue
if len(created_note.tagNames):
total_tags += len(created_note.tagNames)
print
message = "Completed '%s' Notebook upload: uploaded %s notes" % (space, len(space_files[space]))
if total_attachments:
message += ", %s attachments" % total_attachments
if total_tags:
message += ", %s tags" % total_tags
print message
print
if __name__ == "__main__":
if len(sys.argv) < 2:
print "Please include your developer token on the command line"
else:
note_importer = NoteImporter(sys.argv[1])
note_importer.import_notes_into_evernote()