forked from vinaychandra/Moodle-Downloader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmoodle.py
112 lines (86 loc) · 3.48 KB
/
moodle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import cookielib
import urllib2
import urllib
import os
import os.path
import re
from ConfigParser import ConfigParser
from bs4 import BeautifulSoup
conf = ConfigParser()
project_dir = os.path.dirname(os.path.abspath(__file__))
conf.read(os.path.join(project_dir, 'config.ini'))
root_directory = conf.get("dirs", "root_dir")
username = conf.get("auth", "username")
password = conf.get("auth", "password")
authentication_url = conf.get("auth", "url").strip('\'"')
# Store the cookies and create an opener that will hold them
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
# Add our headers
opener.addheaders = [('User-agent', 'Moodle-Crawler')]
# Install our opener (note that this changes the global opener to the one
# we just made, but you can also just call opener.open() if you want)
urllib2.install_opener(opener)
# Input parameters we are going to send
payload = {
'username': username,
'password': password
}
# Use urllib to encode the payload
data = urllib.urlencode(payload)
# Build our Request object (supplying 'data' makes it a POST)
req = urllib2.Request(authentication_url, data)
# Make the request and read the response
response = urllib2.urlopen(req)
contents = response.read()
# Verify the contents
if "My courses" not in contents:
print "Cannot connect to moodle"
exit(1)
courses = contents.split("<h2>My courses</h2>")[1].split('<aside id="block-region-side-pre" ')[0]
regex = re.compile('<h3 class="coursename">(.*?)</h3>')
course_list = regex.findall(courses)
courses = []
for course_string in course_list:
soup = BeautifulSoup(course_string)
a = soup.find('a')
course_name = a.text
course_link = a.get('href')
courses.append([course_name, course_link])
for course in courses:
if not os.path.isdir(root_directory + course[0]):
os.mkdir(root_directory+course[0])
response1 = urllib2.urlopen(course[1])
scrap = response1.read()
soup = BeautifulSoup(scrap)
course_links = soup.find(class_="course-content").find(class_="weeks").find_all('a')
for link in course_links:
current_dir = root_directory + course[0] + "/"
href = link.get('href')
# Checking only resources... Ignoring forum and folders, etc
if "resource" in href:
cj1 = cookielib.CookieJar()
opener1 = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj1))
# Add our headers
opener1.addheaders = [('User-agent', 'Moodle-Crawler')]
# Install our opener (note that this changes the global opener to the one
# we just made, but you can also just call opener.open() if you want)
urllib2.install_opener(opener1)
# The action/ target from the form
authentication_url1 = href
# Build our Request object (supplying 'data' makes it a POST)
req1 = urllib2.Request(authentication_url, data)
# Make the request and read the response
resp = urllib2.urlopen(req1)
webFile = urllib2.urlopen(href)
url = current_dir + webFile.geturl().split('/')[-1].split('?')[0]
file_name = urllib.unquote(url).decode('utf8')
if os.path.isfile(file_name):
print "File found : ", file_name
continue
print "Creating file : ", file_name
pdfFile = open(file_name, 'wb')
pdfFile.write(webFile.read())
webFile.close()
pdfFile.close()
print "Update Complete"