-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpatreon-mp3.py
executable file
·125 lines (110 loc) · 3.99 KB
/
patreon-mp3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/local/bin/python3
import feedparser
import re
import requests
import shutil
import time
import datetime
import os
import configparser
import magic
from eyed3.id3 import Tag
from eyed3.id3 import ID3_V2_4
from pathlib import Path
# function that cleans up a file name so we can use it as the
# local download file name
def download_name (n):
o = re.sub(r'\s+', r'_', n)
o = re.sub(r'[^a-zA-Z0-9_]', r'', o).lower()
return cfg.get('DownloadPrefix') + o[:100]
# read in the configuration file
parser = configparser.ConfigParser()
parser.read('config.ini')
cfg = parser['feed']
# download the RSS XML and parse it
f = feedparser.parse(cfg.get('RSSURL'))
# file where we keep track of the last entry we previously saw
lastRunSeenFileName = cfg.get('DownloadPrefix') + '/last.txt'
lastRunSeen = datetime.datetime.utcfromtimestamp(0)
if Path(lastRunSeenFileName).is_file():
with open(lastRunSeenFileName, 'r') as ls_file:
lastRunSeen = datetime.datetime.fromisoformat(ls_file.read())
artist = f.feed.title
# check if we already have a cover image, if not use the
# image specified in the RSS feed, downloading it to a local file
coverFileName = cfg.get('DownloadPrefix') + '/cover.jpg'
if not Path(coverFileName).is_file():
imageUrl = f.feed.image.href
print ('Download artist image')
imageResponse = requests.get(imageUrl)
with open(coverFileName, 'wb') as fd:
for chunk in imageResponse.iter_content(chunk_size=256):
fd.write(chunk)
# read in the cover image and figure out what file type it is
with open(coverFileName, 'rb') as file:
coverImage = file.read()
coverImageType = magic.from_file(coverFileName, mime=True)
# now loop over the RSS feed items, in reverse order,
# which hopefully is oldest first (so we get track numbers ordered properly)
lastSeen = datetime.datetime.utcfromtimestamp(0)
cnt = 0
f.entries.reverse()
for item in f.entries:
cnt += 1
song = item.title
try:
comment = item.summary.replace('<br>','').strip()
except:
pass
published = datetime.datetime(item.published_parsed[0]
,item.published_parsed[1]
,item.published_parsed[2]
,item.published_parsed[3]
,item.published_parsed[4]
,item.published_parsed[5]
,0
)
# keep track of the most recent RSS item we have seen this run
if published > lastSeen:
lastSeen = published
# if the item is older than the most recent item from the
# last run, then skip it
if published <= lastRunSeen:
continue;
# process each enclosure of the RSS item, these are the links
# to the audio files we want to download
for enc in item.enclosures:
audioUrl = enc.url
ext = re.sub(r'.*\.',r'', audioUrl.lower())
audioFileName = download_name(published.strftime("%Y%m%d") + '_' + song) + '.' + ext
if Path(audioFileName).is_file():
continue
print('Downloading new item ' + audioFileName)
# go get the audio file and save it locally
with requests.get(audioUrl) as audioResponse:
with open(audioFileName, 'wb') as audioFile:
for chunk in audioResponse.iter_content(chunk_size=128):
audioFile.write(chunk)
audioFile.close()
audioResponse.close()
# replace the MP3 ID tags in the downloaded audio file
if ext != 'wav':
t = Tag()
t.artist = artist
t.album_artist = artist
t.album = cfg.get('Album')
t.title = song[:100]
t.comments.set(comment)
t.release_date = published.strftime("%Y-%m-%d")
t.recording_date = published.strftime("%Y")
t.genre = cfg.get('Genre')
t.track_num = (cnt)
t.disc_num = (1,1)
t.images.set(3, coverImage, coverImageType)
t.save(audioFileName, version=ID3_V2_4)
# set the OS file time to the published time
os.utime(audioFileName, (time.mktime(published.timetuple()), time.mktime(published.timetuple())))
# write out the newest item we saw, so we can skip previous
# items on next run
with open(lastRunSeenFileName, 'w') as ls_file:
print(lastSeen.isoformat(), end='', file=ls_file)