forked from SarahEDuehr/GernativeAI-with-Knitting
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Ravelry Patterns.py
120 lines (77 loc) · 2.28 KB
/
Ravelry Patterns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
# coding: utf-8
# In[2]:
import pandas
df = pandas.read_csv('C:\\Users\\girlg\\Documents\\IT 496\\pdfs.csv')
print(df)
# In[3]:
df['path1'] = r'C:\\Users\\girlg\\Downloads\\Ravelry\\'
df['path3'] = '.pdf'
# In[4]:
df['path'] = df['path1'].str.cat(df['id'].astype(str).str.zfill(8))
df['paths'] = df['path'].str.cat(df['path3'])
# In[5]:
inputs = zip(df['paths'], df['pdf_url'])
urls = df['pdf_url']
fns = df['paths']
# In[6]:
import time
import requests
""""
def download_url(args):
t0 = time.time()
url, fn = args[0], args[1]
try:
r = requests.get(url)
with open(fn, 'wb') as f:
f.write(r.content)
return(url, time.time() - t0)
except Exception as e:
print('Exception in download_url():', e)
t0 = time.time()
for i in inputs:
result = download_url(i)
print('url:', result[0], 'time:', result[1])
print('Total time:', time.time() - t0)
"""
# In[11]:
import os
import time
from time import sleep
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
session = requests.Session()
retry = Retry(connect=0, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
def download_pdf_file(args) -> bool:
"""Download PDF from given URL to local directory.
:param url: The url of the PDF file to be downloaded
:return: True if PDF file was successfully downloaded, otherwise False.
"""
fn, url = args[0], args[1]
# Request URL and get response object
response = session.get(url)
# isolate PDF filename from URL
if response.status_code == 200:
# Save in current working directory
with open(fn, 'wb') as pdf_object:
pdf_object.write(response.content)
print(f'{url} was successfully saved!')
return True
else:
print(f'Uh oh! Could not download {url},')
print(f'HTTP response status code: {response.status_code}')
return False
t0 = time.time()
for i in inputs:
try:
result = download_pdf_file(i)
print(result)
sleep(3)
except Exception as e:
print('Exception in download_url():', e)
print('Total time:', time.time() - t0)
# In[12]: