Skip to content

Commit

Permalink
Spoof user agent to fix POST request scraping (quacs#1107)
Browse files Browse the repository at this point in the history
* Spoof user agent to fix POST request scraping

* Lint
  • Loading branch information
powe97 authored Feb 1, 2024
1 parent 14bfd4f commit b1fbd3c
Showing 1 changed file with 15 additions and 1 deletion.
16 changes: 15 additions & 1 deletion scrapers/sis_scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,20 @@ async def get_classes_with_code(term, code):
async with session.post(
"https://sis.rpi.edu/rss/bwckctlg.p_display_courses",
data=f"term_in={term}&call_proc_in=&sel_subj=dummy&sel_levl=dummy&sel_schd=dummy&sel_coll=dummy&sel_divs=dummy&sel_dept=dummy&sel_attr=dummy&sel_subj={code}&sel_crse_strt=&sel_crse_end=&sel_title=&sel_levl=%25&sel_schd=%25&sel_coll=%25&sel_divs=%25&sel_dept=%25&sel_from_cred=&sel_to_cred=&sel_attr=%25",
headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Content-Type": "application/x-www-form-urlencoded",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Sec-GPC": "1",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
},
) as request:
return await request.text()

Expand Down Expand Up @@ -248,7 +262,7 @@ async def scrape_subject(term, name, code):

async def get_subjects_for_term(term):
global session
url = f"https://sis.rpi.edu/rss/bwckctlg.p_display_courses?term_in={term}&sel_crse_strt=0&sel_crse_end=9999&sel_subj=&sel_levl=&sel_schd=&sel_coll=&sel_divs=&sel_dept=&sel_attr="
url = f"https://sis.rpi.edu/rss/bwckctlg.p_display_courses?term_in={term}&sel_crse_strt=&sel_crse_end=&sel_subj=&sel_levl=&sel_schd=&sel_coll=&sel_divs=&sel_dept=&sel_attr="
async with session.get(url) as request:
soup = BeautifulSoup(await request.text())
return [
Expand Down

0 comments on commit b1fbd3c

Please sign in to comment.