-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
80 lines (64 loc) · 2.28 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import jinja2
import json
import logging
import os
import re
import urllib
import urllib2
import webapp2
from google.appengine.api import urlfetch
# Boilerpipe Removal and Fulltext Extraction, (c) http://www.kohlschutter.com/
ENDPOINT = "http://boilerpipe-web.appspot.com/extract?"
# Template utils
template_dir = os.getcwd() + '/templates'
jinja_env = jinja2.Environment(
loader=jinja2.FileSystemLoader(template_dir),
autoescape=True)
def render_str(template, **params):
t = jinja_env.get_template(template)
return t.render(params)
class Handler(webapp2.RequestHandler):
def write(self, *a, **kw):
self.response.out.write(*a, **kw)
def render(self, template, **kw):
self.write(render_str(template, **kw))
class Reader(Handler):
def get(self):
self.render("reader.html")
def post(self):
self.params = dict(book="")
self.text = self.request.get('text').strip()
if self.text.startswith('http://'):
params = "url={}&output=json".format(self.text) # not urlencoded?
try:
response = urlfetch.fetch(ENDPOINT + params)
data = json.loads(response.content)['response']
text = data['content']
title = data['title']
self.params['title'] = title
if text:
self.text = text
else:
self.params['title'] = 'No content found.'
self.text = None
except:
self.params['title'] = 'Error accessing url.'
self.text = None
if self.text:
text = re.sub('\s+', ' ', self.text)
label = u"\"{}...\"".format(text[:28])
words = text.split(" ")
for ix, word in enumerate(words):
if len(word) > 15:
words[ix] = word[10:]
words.insert(ix, u"{}-".format(word[:10]))
text = u" ".join(words)
text = json.dumps(text)
insert = "<script>books['user']={}</script>".format(text)
self.params['label'] = label
self.params['book'] = insert
self.render("reader.html", **self.params)
app = webapp2.WSGIApplication(
[
('/?.*', Reader)
], debug=True)