forked from dariusk/rss-to-activitypub
-
Notifications
You must be signed in to change notification settings - Fork 0
/
updateFeeds.js
282 lines (247 loc) · 9.43 KB
/
updateFeeds.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
const config = require('./config.json');
const { DOMAIN, PRIVKEY_PATH, CERT_PATH, PORT_HTTP, PORT_HTTPS } = config;
const Database = require('better-sqlite3');
const db = new Database('bot-node.db'),
Parser = require('rss-parser'),
request = require('request'),
crypto = require('crypto'),
parser = new Parser({timeout: 2000});
const Jackd = require('jackd');
const beanstalkd = new Jackd();
beanstalkd.connect()
async function foo() {
while (true) {
try {
const { id, payload } = await beanstalkd.reserve()
console.log(payload)
/* ... process job here ... */
await beanstalkd.delete(id)
await doFeed(payload)
} catch (err) {
// Log error somehow
console.error(err)
}
}
}
foo()
function doFeed(feedUrl) {
return new Promise((resolve, reject) => {
// fetch new RSS for each feed
parser.parseURL(feedUrl, function(err, feedData) {
if (err) {
reject('error fetching ' + feedUrl + '; ' + err);
}
else {
let feed = db.prepare('select * from feeds where feed = ?').get(feedUrl);
// get the old feed data from the database
let oldFeed = JSON.parse(feed.content);
// compare the feed item contents. if there's one or more whole new items (aka a new item with a unique guid),
// add the items to a list like
// [ { items: [], username }, {}, ... ]
let oldItems = oldFeed.items;
let newItems = feedData.items;
// find the difference of the sets of guids (fall back to title or
// description since guid is not required by spec) in the old and new feeds
let oldGuidSet = new Set(oldItems.map(el => el.guid || el.title || el.description));
let newGuidSet = new Set(newItems.map(el => el.guid || el.title || el.description));
// find things in the new set that aren't in the old set
let difference = new Set( [...newGuidSet].filter(x => !oldGuidSet.has(x)));
difference = [...difference];
console.log('diff', feed.feed, difference.length, difference);
if (difference.length > 0) {
// get a list of new items in the diff
let brandNewItems = newItems.filter(el => difference.includes(el.guid) || difference.includes(el.title) || difference.includes(el.description));
let acct = feed.username;
let domain = DOMAIN;
//console.log(acct, brandNewItems);
// send the message to everyone for each item!
for (var item of brandNewItems) {
item = transformContent(item);
let message;
if(item.link.match('/twitter.com/')) {
message = `${item.content}`;
}
else {
message = `<p><a href="${item.link}">${item.title}</a></p><p>${item.content || ''}</p>`;
}
if (item.enclosure && item.enclosure.url && item.enclosure.url.includes('.mp3')) {
message += `<p><a href="${item.enclosure.url}">${item.enclosure.url}</a></p>`;
}
sendCreateMessage(message, acct, domain, null, null, item);
}
// update the DB with new contents
let content = JSON.stringify(feedData);
db.prepare('insert or replace into feeds(feed, username, content) values(?, ?, ?)').run(feed.feed, acct, content);
return resolve('done with ' + feedUrl)
}
else {
return resolve('done with ' + feedUrl + ', no change')
}
}
});
}).catch((e) => console.log(e));
}
// TODO: update the display name of a feed if the feed title has changed
// This is a function with a bunch of custom rules for different kinds of content I've found in the wild in things like Reddit rss feeds. Right now we just use the first image we find, if any.
function transformContent(item) {
let cheerio = require('cheerio');
console.log(JSON.stringify(item));
if (item.content === undefined) {
item.urls = [];
return item;
}
let $ = cheerio.load(item.content);
// look through all the links to find images
let links = $('a');
let urls = [];
//console.log('links', links.length);
links.each((i,e) => {
let url = $(e).attr('href');
// if there's an image, add it as a media attachment
if (url && url.match(/(http)?s?:?(\/\/[^"']*\.(?:png|jpg|jpeg|gif|png|svg))/)) {
//console.log(url);
urls.push(url);
}
});
// look through all the images
let images = $('img');
images.each((i,e) => {
let url = $(e).attr('src');
// if there's an image, add it as a media attachment
if (url) {
//console.log(url);
urls.push(url);
// remove the image from the post body since it's in the attachment now
$(e).remove();
}
});
item.urls = urls;
// find iframe embeds and turn them into links
let iframes = $('iframe');
iframes.each((i,e) => {
console.log('iframe',i,e);
let url = $(e).attr('src');
$(e).replaceWith($(`<a href="${url}">[embedded content]</a>`));
});
// remove multiple line breaks
//$('br').remove();
$('p').each((i, el) => {
if($(el).html().replace(/\s| /g, '').length === 0) {$(el).remove();}
});
// couple of hacky regexes to make sure we clean up everything
item.content = $('body').html().replace(/^(\n|\r)/g,'').replace(/>\r+</g,'><').replace(/ +/g, '');
item.content = item.content.replace(/^(\n|\r)/g,'').replace(/>\r+</g,'><').replace(/>\s*</g,'><').replace(/​/g,'').replace(/>\u200B+</g,'><').replace(/ +/g, '').replace(/<p><\/p>/g,'').replace(/(<br\/?>)+/g,'<br>');
return item;
}
// for each item in the list, get the account corresponding to the username
// for each item in the ITEMS list, send a message to all followers
// TODO import these form a helper
function signAndSend(message, name, domain, req, res, targetDomain, inbox) {
// get the private key
console.log('sending to ', name, targetDomain, inbox);
let inboxFragment = inbox.replace('https://'+targetDomain,'');
let result = db.prepare('select privkey from accounts where name = ?').get(`${name}@${domain}`);
//console.log('got key', result === undefined, `${name}@${domain}`);
if (result === undefined) {
console.log(`No record found for ${name}.`);
}
else {
let privkey = result.privkey;
const signer = crypto.createSign('sha256');
let d = new Date();
let stringToSign = `(request-target): post ${inboxFragment}\nhost: ${targetDomain}\ndate: ${d.toUTCString()}`;
signer.update(stringToSign);
signer.end();
const signature = signer.sign(privkey);
const signature_b64 = signature.toString('base64');
let header = `keyId="https://${domain}/u/${name}",headers="(request-target) host date",signature="${signature_b64}"`;
//console.log('signature:',header);
request({
url: inbox,
headers: {
'Host': targetDomain,
'Date': d.toUTCString(),
'Signature': header
},
method: 'POST',
json: true,
body: message
}, function (error, response, body){
});
}
}
function createMessage(text, name, domain, item, follower, guidNote) {
const guidCreate = crypto.randomBytes(16).toString('hex');
let d = new Date();
let out = {
'@context': 'https://www.w3.org/ns/activitystreams',
'id': `https://${domain}/m/${guidCreate}`,
'type': 'Create',
'actor': `https://${domain}/u/${name}`,
'to': [ follower ],
'object': {
'id': `https://${domain}/m/${guidNote}`,
'type': 'Note',
'published': d.toISOString(),
'attributedTo': `https://${domain}/u/${name}`,
'content': text,
'link': item.link,
'cc': 'https://www.w3.org/ns/activitystreams#Public'
}
};
// add image attachment
let attachment;
console.log('NUM IMAGES',item.urls.length);
if (item.enclosure && item.enclosure.url && item.enclosure.url.includes('.mp3')) {
attachment = {
'type': 'Document',
'mediaType': 'audio/mpeg',
'url': item.enclosure.url,
'name': null
};
out.object.attachment = attachment;
}
else if (item.urls.length > 0) {
attachment = {
'type': 'Document',
'mediaType': 'image/png', // TODO: update the mediaType to match jpeg,gif,etc
'url': item.urls[0],
'name': null
};
out.object.attachment = attachment;
}
else if (item.urls.length > 1) {
attachment = [];
let lengthFourMax = Math.min(item.urls.length, 4);
for (var i=0; i<lengthFourMax; i++) {
attachment.push({
'type': 'Document',
'mediaType': 'image/png', // TODO: update the mediaType to match jpeg,gif,etc
'url': item.urls[i],
'name': null
});
}
out.object.attachment = attachment;
}
console.log(guidCreate, guidNote);
db.prepare('insert or replace into messages(guid, message) values(?, ?)').run( guidCreate, JSON.stringify(out));
db.prepare('insert or replace into messages(guid, message) values(?, ?)').run( guidNote, JSON.stringify(out.object));
return out;
}
function sendCreateMessage(text, name, domain, req, res, item) {
// console.log(`${name}@${domain}`);
let result = db.prepare('select followers from accounts where name = ?').get(`${name}@${domain}`);
let followers = JSON.parse(result.followers);
const guidNote = crypto.randomBytes(16).toString('hex');
// console.log(followers);
if (!followers) {
followers = [];
}
for (let follower of followers) {
let inbox = follower+'/inbox';
let myURL = new URL(follower);
let targetDomain = myURL.hostname;
let message = createMessage(text, name, domain, item, follower, guidNote);
signAndSend(message, name, domain, req, res, targetDomain, inbox);
}
}