From 41f1517cb031da4a12b7cb6094cb782849c2fdc9 Mon Sep 17 00:00:00 2001 From: Hung Pham Date: Sat, 22 Jun 2024 16:03:11 +1000 Subject: [PATCH] fix: remerge facilities PR (#55) --- .gitignore | 1 + README.md | 46 ++++-- libcal/src/libraryScraper.ts | 253 +++++++++++++++++++++----------- libcal/src/types.ts | 22 ++- nss/README.md | 2 + nss/package-lock.json | 9 ++ nss/package.json | 1 + nss/src/nssFetch.ts | 5 +- nss/src/runScraper.ts | 150 +++++++++++-------- nss/src/scrapeRoomFacilities.ts | 99 +++++++++++++ nss/src/types.ts | 53 ++++++- sql/rooms/down.sql | 2 + sql/rooms/up.sql | 33 ++++- 13 files changed, 487 insertions(+), 189 deletions(-) create mode 100644 nss/src/scrapeRoomFacilities.ts diff --git a/.gitignore b/.gitignore index b1001a6..cfefc09 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .idea/** +.vscode/** diff --git a/README.md b/README.md index 6da3a51..f1e2bb7 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ For instructions on how you can access this data, see the [DevSoc GraphQL API](h ### Buildings | **Field** | **Description** | **Example** | -|-----------|----------------------------------------------|-----------------| +| --------- | -------------------------------------------- | --------------- | | `id` | Building ID in the format `CAMPUS-GRID_REF`. | "K-F8" | | `name` | Name of the building. | "Law Building" | | `lat` | Latitude of the building. | -33.91700 | @@ -20,22 +20,34 @@ For instructions on how you can access this data, see the [DevSoc GraphQL API](h ### Rooms -| **Field** | **Description** | **Example** | -|--------------|---------------------------------------------------|---------------------| -| `id` | Room ID in the format `CAMPUS-GRID_REF-ROOM_NUM`. | "K-J17-305" | -| `name` | Name of the room. | "Brass Lab J17 305" | -| `abbr` | Shortened name, as seen on timetable. | "BrassME305" | -| `usage` | Room type - see below for list. | "CMLB" | -| `capacity` | Number of people the room is suitable for. | 36 | -| `school` | School that manages the room - `" "` if none. | "CSE" | -| `buildingId` | ID of building that room is in. | "K-J17" | - -Mapping of room usages can be found [here](https://github.com/devsoc-unsw/freerooms/blob/dev/common/roomUsages.ts). Mapping of school codes can be found [here](https://github.com/devsoc-unsw/freerooms/blob/dev/common/schools.ts). +| **Field** | **Description** | **Example** | +| ---------------- | ------------------------------------------------- | -------------------- | +| `id` | Room ID in the format `CAMPUS-GRID_REF-ROOM_NUM`. | "K-J17-305" | +| `name` | Name of the room. | "Brass Lab J17 305" | +| `abbr` | Shortened name, as seen on timetable. | "BrassME305" | +| `usage` | Room type - see below for list. | "CMLB" | +| `capacity` | Number of people the room is suitable for. | 36 | +| `school` | School that manages the room - `" "` if none. | "CSE" | +| `buildingId` | ID of building that room is in. | "K-J17" | +| `floor` | Floor type - see below for list. | "Tiered" | +| `seating` | Seating type - see below for list. | "Movable" | +| `microphone` | List of microphone facilities of the room. | "Lectern (fixed)" | +| `accessibility` | List of accessibility facilities of the room. | "Hearing loop" | +| `audiovisual` | List of audiovisual facilities of the room. | "Television monitor" | +| `infotechnology` | List of intotechnology facilities of the room. | "IT Lectern" | +| `writingMedia` | List of writingMedia facilities of the room. | "Blackboard" | +| `service` | List of service facilities of the room. | "Break out rooms" | + +Floor type can be 'Flat', 'Tiered', 'Other' or null. +Seating type can be 'Movable', 'Fixed' or null. + +Mapping of room usages can be found [here](https://github.com/devsoc-unsw/freerooms/blob/dev/common/roomUsages.ts). +Mapping of school codes can be found [here](https://github.com/devsoc-unsw/freerooms/blob/dev/common/schools.ts). ### Bookings | **Field** | **Description** | **Example** | -|---------------|------------------------------------------------------|-----------------------------| +| ------------- | ---------------------------------------------------- | --------------------------- | | `bookingType` | Type of booking - see below. | "SOCIETY" | | `name` | Name of the booking (usually related to the booker). | "SOFTWAREDEV" | | `roomId` | ID of the room the booking is for. | "K-E19-G05" | @@ -45,23 +57,31 @@ Mapping of room usages can be found [here](https://github.com/devsoc-unsw/freero Full list of current booking types is: "CLASS", "SOCIETY", "INTERNAL", "LIB", "BLOCK", "MISC". ### Relationships + The following relationships exist between tables. These relationships are tracked by Hasura and can be followed in GraphQL queries. + - Every **building** contains 1 or more **rooms** - Every **room** belongs to a **building** - Every **room** has 0 or more **bookings** - Every **booking** is for a specific **room** ## Making Changes + ### Schema updates + To update the schema, you will need to: + - Update the relevant `up.sql` and `down.sql` files in the root `sql/` directory - Update the scrapers to produce this data ### Adding additional scrapers + To add additional scrapers, you will need to: + - Create a new subdirectory with the scraper inside it - Ensure that if you are using the shared schema SQL files, you reference them using symlinks so all scrapers are updated - Add to the GitHub workflow so that it also tests/builds/deploys the new scraper ### Testing + See the [DevSoc GraphQL API docs](https://github.com/devsoc-unsw/graphql-api/blob/master/scrapers.md) on how to test scrapers. diff --git a/libcal/src/libraryScraper.ts b/libcal/src/libraryScraper.ts index 77af46a..648ebba 100644 --- a/libcal/src/libraryScraper.ts +++ b/libcal/src/libraryScraper.ts @@ -1,20 +1,21 @@ -import { load } from 'cheerio'; +import { load } from "cheerio"; import { Library, Room, RoomBooking } from "./types"; import toSydneyTime from "./toSydneyTime"; import axios from "axios"; -import * as fs from 'fs'; -import { DRYRUN, HASURAGRES_API_KEY, HASURAGRES_URL } from './config'; +import * as fs from "fs"; +import { DRYRUN, HASURAGRES_API_KEY, HASURAGRES_URL } from "./config"; const ROOM_URL = "https://unswlibrary-bookings.libcal.com/space/"; -const BOOKINGS_URL = "https://unswlibrary-bookings.libcal.com/spaces/availability/grid"; +const BOOKINGS_URL = + "https://unswlibrary-bookings.libcal.com/spaces/availability/grid"; const LIBRARIES: Library[] = [ - { name: 'Main Library', libcalCode: '6581', buildingId: 'K-F21' }, - { name: 'Law Library', libcalCode: '6584', buildingId: 'K-F8' }, + { name: "Main Library", libcalCode: "6581", buildingId: "K-F21" }, + { name: "Law Library", libcalCode: "6584", buildingId: "K-F8" }, ]; const scrapeLibrary = async (library: Library) => { const response = await downloadBookingsPage(library.libcalCode); - const bookingData = parseBookingData(response.data['slots']); + const bookingData = parseBookingData(response.data["slots"]); const allRoomData: Room[] = []; const allRoomBookings: RoomBooking[] = []; @@ -33,17 +34,19 @@ const scrapeLibrary = async (library: Library) => { let i = 0; while (i < bookingData[roomID].length) { const currBooking: RoomBooking = { - bookingType: 'LIB', + bookingType: "LIB", name: "Library Booking", roomId: roomData.id, start: bookingData[roomID][i].start, end: bookingData[roomID][i].end, - } + }; i++; // Combine all subsequent bookings that start when this ends - while (i < bookingData[roomID].length && - bookingData[roomID][i].start.getTime() == currBooking.end.getTime()) { + while ( + i < bookingData[roomID].length && + bookingData[roomID][i].start.getTime() == currBooking.end.getTime() + ) { currBooking.end = bookingData[roomID][i].end; i++; } @@ -53,16 +56,16 @@ const scrapeLibrary = async (library: Library) => { } return { rooms: allRoomData, bookings: allRoomBookings }; -} +}; // Formats a date into YYYY-MM-DD format const formatDate = (date: Date): string => { const year = date.getFullYear(); - const month = String(date.getMonth() + 1).padStart(2, '0'); - const day = String(date.getDate()).padStart(2, '0'); + const month = String(date.getMonth() + 1).padStart(2, "0"); + const day = String(date.getDate()).padStart(2, "0"); return `${year}-${month}-${day}`; -} +}; const downloadBookingsPage = async (locationId: string) => { const todaysDate = formatDate(new Date()); @@ -74,35 +77,37 @@ const downloadBookingsPage = async (locationId: string) => { const postData = { lid: locationId, - gid: '0', - eid: '-1', - seat: '0', - seatId: '0', - zone: '0', + gid: "0", + eid: "-1", + seat: "0", + seatId: "0", + zone: "0", start: todaysDate, end: furthestBookableDate, - pageIndex: '0', - pageSize: '18' + pageIndex: "0", + pageSize: "18", }; const headers = { - 'Content-Type': 'application/x-www-form-urlencoded', // because the request data is URL encoded - 'Referer': 'https://unswlibrary-bookings.libcal.com/' + "Content-Type": "application/x-www-form-urlencoded", // because the request data is URL encoded + Referer: "https://unswlibrary-bookings.libcal.com/", }; - return await axios.post(BOOKINGS_URL, new URLSearchParams(postData), { headers }); -} + return await axios.post(BOOKINGS_URL, new URLSearchParams(postData), { + headers, + }); +}; interface ResponseData { - start: string, - end: string, - itemId: number, - checksum: string, - className?: string + start: string; + end: string; + itemId: number; + checksum: string; + className?: string; } const parseBookingData = (bookingData: ResponseData[]) => { - const bookings: { [roomNumber: number]: { start: Date, end: Date }[] } = {}; + const bookings: { [roomNumber: number]: { start: Date; end: Date }[] } = {}; for (const slot of bookingData) { if (!(slot.itemId in bookings)) { @@ -110,26 +115,26 @@ const parseBookingData = (bookingData: ResponseData[]) => { } if (slot.className == "s-lc-eq-checkout") { - bookings[slot.itemId].push( - { - start: toSydneyTime(new Date(slot.start)), - end: toSydneyTime(new Date(slot.end)), - } - ) + bookings[slot.itemId].push({ + start: toSydneyTime(new Date(slot.start)), + end: toSydneyTime(new Date(slot.end)), + }); } } return bookings; -} +}; const getRoomData = async (roomId: string, buildingId: string) => { const response = await axios.get(ROOM_URL + roomId, {}); const $ = load(response.data); - - const $heading = $('h1#s-lc-public-header-title'); + const $heading = $("h1#s-lc-public-header-title"); // Remove whitespace and split the name, location and capacity into newlines - const data = $heading.text().trim().split(/\s{2,}/g); + const data = $heading + .text() + .trim() + .split(/\s{2,}/g); const [name, rawLocation, rawCapacity] = data; // We only care about rooms and pods @@ -137,29 +142,90 @@ const getRoomData = async (roomId: string, buildingId: string) => { return null; } - const libraryName = rawLocation.replace(/[()]/, '').split(':')[0]; + const libraryName = rawLocation.replace(/[()]/, "").split(":")[0]; const capacity = parseInt(rawCapacity.split(": ")[1]); - let roomNumber = name.split(' ')[2]; + let roomNumber = name.split(" ")[2]; if (name.match(/POD/)) { // Pods are just numbered 1-8 so prepend POD - roomNumber = 'POD' + roomNumber; + roomNumber = "POD" + roomNumber; + } + + const hasPower = $('p:contains("Power available")').length > 0; + + const equipments: Set = new Set( + $('strong:contains("Equipment")') + .parent() + .contents() + .last() + .text() + .split(",") + .map((string) => string.trim()) + .filter((string) => string.length) + ); + /* + Possible values when fetched on 05/05/2024 + 'Whiteboard', + 'LCD screen', + 'USB charging', + 'Projector', + 'Computer', + '(Note: LCD Screen is Out of Order)', + '(NOTE: Audio and Video equipment are currently out of order)', + '(NOTE: only HDMI cable connection is available)' + */ + + const facilities = { + microphone: [] as string[], + accessibility: [] as string[], + audiovisual: [] as string[], + infotechnology: [] as string[], + writingMedia: [] as string[], + service: [] as string[], + }; + + if (hasPower) { + facilities.accessibility.push("Power at Wall"); + } + + for (const equipment of equipments) { + switch (equipment) { + case "Whiteboard": + facilities.writingMedia.push("Whiteboard"); + break; + case "Projector": + facilities.audiovisual.push("Projector, 16mm"); + break; + case "LCD screen": + case "USB charging": + case "Computer": + case "(Note: LCD Screen is Out of Order)": + case "(NOTE: Audio and Video equipment are currently out of order)": // yup there's an invisible character... + case "(NOTE: only HDMI cable connection is available)": + break; + default: + fs.writeFileSync("test", equipment); + console.warn( + `Got unknown option for library room equipment ${equipment}` + ); + } } const roomData: Room = { - name: libraryName + ' ' + name, + name: libraryName + " " + name, abbr: name, id: buildingId + "-" + roomNumber, usage: "LIB", capacity, school: " ", - buildingId: buildingId - } + buildingId: buildingId, + ...facilities, + }; return roomData; -} +}; const runScrapeJob = async () => { - console.time('Scraping'); + console.time("Scraping"); const allRooms: Room[] = []; const allBookings: RoomBooking[] = []; for (const library of LIBRARIES) { @@ -167,55 +233,66 @@ const runScrapeJob = async () => { allRooms.push(...rooms); allBookings.push(...bookings); } - console.timeEnd('Scraping'); + console.timeEnd("Scraping"); // Send to Hasuragres const requestConfig = { headers: { "Content-Type": "application/json", "X-API-Key": HASURAGRES_API_KEY, - } - } - - await axios.post( - `${HASURAGRES_URL}/insert`, - { - metadata: { - table_name: "Rooms", - columns: ["abbr", "name", "id", "usage", "capacity", "school", "buildingId"], - sql_up: fs.readFileSync("./sql/rooms/up.sql", "utf8"), - sql_down: fs.readFileSync("./sql/rooms/down.sql", "utf8"), - // overwrite all outdated lib rooms - sql_before: "DELETE FROM Rooms WHERE \"usage\" = 'LIB' " + - `AND "id" NOT IN (${allRooms.map(room => `'${room.id}'`).join(",")});`, - write_mode: 'append', - dryrun: DRYRUN, - }, - payload: allRooms }, - requestConfig - ); + }; - // libcal shows all bookings that start during or after the current 30-min period - const baseTime = new Date(); - baseTime.setMinutes(baseTime.getMinutes() < 30 ? 0 : 30, 0, 0); await axios.post( - `${HASURAGRES_URL}/insert`, - { - metadata: { - table_name: "Bookings", - columns: ["bookingType", "name", "roomId", "start", "end"], - sql_up: fs.readFileSync("./sql/bookings/up.sql", "utf8"), - sql_down: fs.readFileSync("./sql/bookings/down.sql", "utf8"), - sql_before: fs.readFileSync("./sql/bookings/before.sql", "utf8"), - sql_after: fs.readFileSync("./sql/bookings/after.sql", "utf8"), - write_mode: 'append', - dryrun: DRYRUN, + `${HASURAGRES_URL}/batch-insert`, + [ + { + metadata: { + table_name: "Rooms", + columns: [ + "abbr", + "name", + "id", + "usage", + "capacity", + "school", + "buildingId", + "microphone", + "accessibility", + "audiovisual", + "infotechnology", + "writingMedia", + "service", + ], + sql_up: fs.readFileSync("./sql/rooms/up.sql", "utf8"), + sql_down: fs.readFileSync("./sql/rooms/down.sql", "utf8"), + // overwrite all outdated lib rooms + sql_before: + "DELETE FROM Rooms WHERE \"usage\" = 'LIB' " + + `AND "id" NOT IN (${allRooms + .map((room) => `'${room.id}'`) + .join(",")});`, + write_mode: "append", + dryrun: DRYRUN, + }, + payload: allRooms, }, - payload: allBookings - }, + { + metadata: { + table_name: "Bookings", + columns: ["bookingType", "name", "roomId", "start", "end"], + sql_up: fs.readFileSync("./sql/bookings/up.sql", "utf8"), + sql_down: fs.readFileSync("./sql/bookings/down.sql", "utf8"), + sql_before: fs.readFileSync("./sql/bookings/before.sql", "utf8"), + sql_after: fs.readFileSync("./sql/bookings/after.sql", "utf8"), + write_mode: "append", + dryrun: DRYRUN, + }, + payload: allBookings, + }, + ], requestConfig ); -} +}; runScrapeJob(); diff --git a/libcal/src/types.ts b/libcal/src/types.ts index 7973f48..65f02bf 100644 --- a/libcal/src/types.ts +++ b/libcal/src/types.ts @@ -1,10 +1,10 @@ export type RoomBooking = { - bookingType: string; - name: string; - roomId: string; - start: Date; - end: Date; - } + bookingType: string; + name: string; + roomId: string; + start: Date; + end: Date; +}; export type Room = { abbr: string; @@ -14,10 +14,16 @@ export type Room = { capacity: number; school: string; buildingId: string; -} + microphone: string[]; + accessibility: string[]; + audiovisual: string[]; + infotechnology: string[]; + writingMedia: string[]; + service: string[]; +}; export type Library = { name: string; libcalCode: string; buildingId: string; -} +}; diff --git a/nss/README.md b/nss/README.md index ea292ed..8b25053 100644 --- a/nss/README.md +++ b/nss/README.md @@ -7,6 +7,8 @@ Building IDs and names are scraped from the dropdown list of buildings on https: Room data is all scraped from doing a search on https://nss.cse.unsw.edu.au/tt/find_rooms.php?dbafile=2024-KENS-COFA.DBA&campus=KENS. To get all rooms to show up, set the search parameter to be all days and set the start/end time to be equal. +For each room, the facilities are scraped from https://nss.cse.unsw.edu.au/tt/find_rooms.php?dbafile=2024-KENS-COFA.DBA&campus=KENS (same link as above). For the facilities to appear, you need to pass in `show: "show_facilities"` and `room: roomId` in the request body. + Bookings are scraped separately for each room from the individual room pages (e.g. https://nss.cse.unsw.edu.au/tt/view_rooms.php?dbafile=2024-KENS-COFA.DBA&campus=KENS). By setting the date range to be the whole year, each booking will show a bit string on hover (HTML `title` element) describing which weeks of the year it runs in. Some buildings and rooms are ignored, which can be seen and configured in `src/exclusions.ts`. \ No newline at end of file diff --git a/nss/package-lock.json b/nss/package-lock.json index 30bd061..b1fe1bb 100644 --- a/nss/package-lock.json +++ b/nss/package-lock.json @@ -10,6 +10,7 @@ "license": "ISC", "dependencies": { "axios": "^1.4.0", + "axios-rate-limit": "^1.3.0", "cheerio": "^1.0.0-rc.12", "date-fns": "^2.30.0", "date-fns-tz": "^2.0.0", @@ -144,6 +145,14 @@ "proxy-from-env": "^1.1.0" } }, + "node_modules/axios-rate-limit": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/axios-rate-limit/-/axios-rate-limit-1.3.0.tgz", + "integrity": "sha512-cKR5wTbU/CeeyF1xVl5hl6FlYsmzDVqxlN4rGtfO5x7J83UxKDckudsW0yW21/ZJRcO0Qrfm3fUFbhEbWTLayw==", + "peerDependencies": { + "axios": "*" + } + }, "node_modules/boolbase": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", diff --git a/nss/package.json b/nss/package.json index 51653b9..90fbb74 100644 --- a/nss/package.json +++ b/nss/package.json @@ -21,6 +21,7 @@ "homepage": "https://github.com/csesoc/nss-scraper#readme", "dependencies": { "axios": "^1.4.0", + "axios-rate-limit": "^1.3.0", "cheerio": "^1.0.0-rc.12", "date-fns": "^2.30.0", "date-fns-tz": "^2.0.0", diff --git a/nss/src/nssFetch.ts b/nss/src/nssFetch.ts index 75f7880..8e690f1 100644 --- a/nss/src/nssFetch.ts +++ b/nss/src/nssFetch.ts @@ -1,11 +1,14 @@ // Fetch an NSS page, adding all required request parameters // Take in actual variable parameters like roomId import axios from "axios"; +import rateLimit from "axios-rate-limit" import { firstMonday, scwWeekNumber } from './dateUtils'; import { YEAR } from './config'; const DAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]; +const http = rateLimit(axios.create(), { maxRPS: 30 }) + const nssFetch = async ( page: "find_rooms" | "view_rooms" | "view_multirooms", params: Record = {}, @@ -27,7 +30,7 @@ const nssFetch = async ( break; } - return axios.post(url, { + return http.post(url, { ...params, ...requiredParams, fr_week: scwWeekNumber(firstMonday(YEAR)), diff --git a/nss/src/runScraper.ts b/nss/src/runScraper.ts index 59d2ec1..a40398b 100644 --- a/nss/src/runScraper.ts +++ b/nss/src/runScraper.ts @@ -4,94 +4,116 @@ import scrapeBookings from "./scrapeBookings"; import parseBooking from "./parseBooking"; import scrapeBuildings from "./scrapeBuildings"; import { DRYRUN, HASURAGRES_API_KEY, HASURAGRES_URL, YEAR } from "./config"; -import axios from 'axios'; -import { formatString } from './stringUtils'; +import axios from "axios"; +import { formatString } from "./stringUtils"; +import { scrapeRoomFacilities } from "./scrapeRoomFacilities"; const runScrapeJob = async () => { const buildings = await scrapeBuildings(); const rooms = await scrapeRooms(); + const facilitiesPromises = rooms.map((room) => scrapeRoomFacilities(room.id)); // Filter buildings with no rooms const filteredBuildings = buildings.filter( - building => !!rooms.find(room => room.id.startsWith(building.id)) + (building) => !!rooms.find((room) => room.id.startsWith(building.id)) ); - const bookingPromises = rooms.map(room => scrapeBookings(room.id)); - const bookings = (await Promise.all(bookingPromises)).flat(); - const parsedBookings = bookings.map(parseBooking).flat(); + const bookingPromises = rooms.map((room) => scrapeBookings(room.id)); + // we're sending about 1000 requests here + const [facilities, bookings] = await Promise.all([ + Promise.all(facilitiesPromises), + Promise.all(bookingPromises), + ]); + const parsedBookings = bookings.flat().map(parseBooking).flat(); parsedBookings.sort((a, b) => a.start.getTime() - b.start.getTime()); - return { buildings: filteredBuildings, rooms, bookings: parsedBookings }; -} + return { + buildings: filteredBuildings, + rooms, + facilities, + bookings: parsedBookings, + }; +}; const runScraper = async () => { - console.time('Scraping'); - const { buildings, rooms, bookings } = await runScrapeJob(); - console.timeEnd('Scraping'); + console.time("Scraping"); + const { buildings, rooms, facilities, bookings } = await runScrapeJob(); + console.timeEnd("Scraping"); const requestConfig = { headers: { "Content-Type": "application/json", "X-Api-Key": HASURAGRES_API_KEY, - } - } - - await axios.post( - `${HASURAGRES_URL}/insert`, - { - metadata: { - table_name: "Buildings", - sql_up: fs.readFileSync("./sql/buildings/up.sql", "utf8"), - sql_down: fs.readFileSync("./sql/buildings/down.sql", "utf8"), - columns: ["id", "name", "lat", "long", "aliases"], - write_mode: 'overwrite', - dryrun: DRYRUN, - }, - payload: buildings }, - requestConfig - ); + }; await axios.post( - `${HASURAGRES_URL}/insert`, - { - metadata: { - table_name: "Rooms", - columns: ["abbr", "name", "id", "usage", "capacity", "school", "buildingId"], - sql_up: fs.readFileSync("./sql/rooms/up.sql", "utf8"), - sql_down: fs.readFileSync("./sql/rooms/down.sql", "utf8"), - sql_before: formatString( - fs.readFileSync("./sql/rooms/before.sql", "utf8"), - rooms.map(room => `'${room.id}'`).join(",") - ), - write_mode: 'append', - dryrun: DRYRUN, + `${HASURAGRES_URL}/batch_insert`, + [ + { + metadata: { + table_name: "Buildings", + sql_up: fs.readFileSync("./sql/buildings/up.sql", "utf8"), + sql_down: fs.readFileSync("./sql/buildings/down.sql", "utf8"), + columns: ["id", "name", "lat", "long", "aliases"], + write_mode: "overwrite", + dryrun: DRYRUN, + }, + payload: buildings, }, - payload: rooms - }, - requestConfig - ); - - await axios.post( - `${HASURAGRES_URL}/insert`, - { - metadata: { - table_name: "Bookings", - columns: ["bookingType", "name", "roomId", "start", "end"], - sql_up: fs.readFileSync("./sql/bookings/up.sql", "utf8"), - sql_down: fs.readFileSync("./sql/bookings/down.sql", "utf8"), - sql_before: formatString( - fs.readFileSync("./sql/bookings/before.sql", "utf8"), - new Date(YEAR, 0, 1).toISOString(), - new Date(YEAR + 1, 0, 1).toISOString() - ), - write_mode: 'append', - dryrun: DRYRUN, + { + metadata: { + table_name: "Rooms", + columns: [ + "abbr", + "name", + "id", + "usage", + "capacity", + "school", + "buildingId", + "floor", + "seating", + "microphone", + "accessibility", + "audiovisual", + "infotechnology", + "writingMedia", + "service", + ], + sql_up: fs.readFileSync("./sql/rooms/up.sql", "utf8"), + sql_down: fs.readFileSync("./sql/rooms/down.sql", "utf8"), + sql_before: formatString( + fs.readFileSync("./sql/rooms/before.sql", "utf8"), + rooms.map((room) => `'${room.id}'`).join(",") + ), + write_mode: "append", + dryrun: DRYRUN, + }, + payload: rooms.map((room, i) => ({ + ...room, + ...facilities[i], + })), }, - payload: bookings - }, + { + metadata: { + table_name: "Bookings", + columns: ["bookingType", "name", "roomId", "start", "end"], + sql_up: fs.readFileSync("./sql/bookings/up.sql", "utf8"), + sql_down: fs.readFileSync("./sql/bookings/down.sql", "utf8"), + sql_before: formatString( + fs.readFileSync("./sql/bookings/before.sql", "utf8"), + new Date(YEAR, 0, 1).toISOString(), + new Date(YEAR + 1, 0, 1).toISOString() + ), + write_mode: "append", + dryrun: DRYRUN, + }, + payload: bookings, + }, + ], requestConfig ); -} +}; runScraper(); diff --git a/nss/src/scrapeRoomFacilities.ts b/nss/src/scrapeRoomFacilities.ts new file mode 100644 index 0000000..29b30c2 --- /dev/null +++ b/nss/src/scrapeRoomFacilities.ts @@ -0,0 +1,99 @@ +import { load } from "cheerio"; +import nssFetch from "./nssFetch"; +import { + ScrapedFacilities, + FACILITIES_LIST, + MappedFacilities, + FacilityFloor, + FacilitySeating, +} from "./types"; + +export const scrapeRoomFacilities = async ( + id: string +): Promise => { + const additionalParams: Record = { + show: "show_facilities", + room: id, + }; + const response = await nssFetch("view_rooms", additionalParams); + const $ = load(response.data); + + const additionalInformationData = {} as ScrapedFacilities; + for (const field of FACILITIES_LIST) { + const data = $(`td:contains("${field}")`).parent(); + additionalInformationData[field] = cleanString(data.find("td.data").text()); + } + + return facilitiesMapper(id, additionalInformationData); +}; + +const cleanString = (input: string): string[] => { + return input + .split("|") + .map((e) => e.trim()) + .filter((e) => e.length); +}; + +const facilitiesMapper = ( + id: string, + facilities: ScrapedFacilities +): MappedFacilities => { + const floorSeating = extractFloorSeating(id, facilities["Floor/seating"][0]); + return { + floor: floorSeating.floor, + seating: floorSeating.seating, + microphone: facilities.Microphone, + accessibility: facilities.Accessibility, + audiovisual: facilities["Audio-visual"], + infotechnology: facilities["Info technology"], + writingMedia: facilities["Writing media"], + service: facilities.Services, + }; +}; + +const extractFloorSeating = ( + id: string, + scrapedFloorSeating: string | undefined +): { floor: FacilityFloor | null; seating: FacilitySeating | null } => { + switch (scrapedFloorSeating) { + // yea, this case is moveable while everything else is movable.. + case "Flat floor node chairs moveable seating": + return { + floor: FacilityFloor.FLAT, + seating: FacilitySeating.MOVABLE, + }; + case "Flat floor, fixed seating": + return { + floor: FacilityFloor.FLAT, + seating: FacilitySeating.FIXED, + }; + case "Flat floor, movable seating": + return { + floor: FacilityFloor.FLAT, + seating: FacilitySeating.MOVABLE, + }; + case "Other Floor, Movable Seating": + return { + floor: FacilityFloor.OTHER, + seating: FacilitySeating.MOVABLE, + }; + case "Tiered Floor, Movable Seating": + return { + floor: FacilityFloor.TIERED, + seating: FacilitySeating.MOVABLE, + }; + case "Tiered floor, fixed seating": + return { + floor: FacilityFloor.TIERED, + seating: FacilitySeating.FIXED, + }; + default: + console.warn( + `In Room ${id}, got unknown option for floor/seating combination! ${scrapedFloorSeating}` + ); + return { + floor: null, + seating: null, + }; + } +}; diff --git a/nss/src/types.ts b/nss/src/types.ts index bd12406..88e2451 100644 --- a/nss/src/types.ts +++ b/nss/src/types.ts @@ -1,15 +1,14 @@ - export type UngroupedRoomBooking = { name: string; day: string; start: string; weekPattern: number; -} +}; export type RawRoomBooking = UngroupedRoomBooking & { roomId: string; end: string; -} +}; export type RoomBooking = { bookingType: string; @@ -17,8 +16,46 @@ export type RoomBooking = { roomId: string; start: Date; end: Date; +}; + +export const FACILITIES_LIST = [ + "Floor/seating", + "Microphone", + "Accessibility", + "Audio-visual", + "Info technology", + "Writing media", + "Services", +] as const; + +export type ScrapedFacilities = Record< + (typeof FACILITIES_LIST)[number], + string[] +>; + +// remember to change the sql enum type as well! +export enum FacilityFloor { + FLAT = "Flat", + TIERED = "Tiered", + OTHER = "Other", +} + +export enum FacilitySeating { + MOVABLE = "Movable", + FIXED = "Fixed", } +export type MappedFacilities = { + floor: FacilityFloor | null; + seating: FacilitySeating | null; + microphone: string[]; + accessibility: string[]; + audiovisual: string[]; + infotechnology: string[]; + writingMedia: string[]; + service: string[]; +}; + export type Room = { abbr: string; name: string; @@ -27,17 +64,17 @@ export type Room = { capacity: number; school: string; buildingId: string; -} +}; export type ParsedName = { bookingType: string; name: string; -} +}; export type NameParser = { pattern: RegExp; parser: (matchGroups: Record) => ParsedName; -} +}; export type Building = { name: string; @@ -45,8 +82,8 @@ export type Building = { lat: number; long: number; aliases: string[]; -} +}; export type OverrideData = { buildings: Building[]; -} +}; diff --git a/sql/rooms/down.sql b/sql/rooms/down.sql index 6ba6525..ed537db 100644 --- a/sql/rooms/down.sql +++ b/sql/rooms/down.sql @@ -1 +1,3 @@ DROP TABLE Rooms CASCADE; +DROP TYPE FloorTypeEnum; +DROP TYPE SeatingTypeEnum; diff --git a/sql/rooms/up.sql b/sql/rooms/up.sql index e6c608d..fccfb51 100644 --- a/sql/rooms/up.sql +++ b/sql/rooms/up.sql @@ -1,10 +1,29 @@ +CREATE TYPE FloorTypeEnum AS ENUM ( + 'Flat', + 'Tiered', + 'Other' +); + +CREATE TYPE SeatingTypeEnum AS ENUM ( + 'Movable', + 'Fixed' +); + CREATE TABLE Rooms ( - "id" TEXT PRIMARY KEY, - "name" TEXT NOT NULL, - "abbr" TEXT NOT NULL, - "usage" TEXT NOT NULL, - "capacity" INTEGER NOT NULL, - "school" TEXT NOT NULL, - "buildingId" TEXT NOT NULL, + "id" TEXT PRIMARY KEY, + "name" TEXT NOT NULL, + "abbr" TEXT NOT NULL, + "usage" TEXT NOT NULL, + "capacity" INTEGER NOT NULL, + "school" TEXT NOT NULL, + "buildingId" TEXT NOT NULL, + "floor" FloorTypeEnum, + "seating" SeatingTypeEnum, + "microphone" TEXT[] NOT NULL, + "accessibility" TEXT[] NOT NULL, + "audiovisual" TEXT[] NOT NULL, + "infotechnology" TEXT[] NOT NULL, + "writingMedia" TEXT[] NOT NULL, + "service" TEXT[] NOT NULL, FOREIGN KEY ("buildingId") REFERENCES Buildings("id") ON DELETE CASCADE );