From ac3eae898514a04924e6ccd62c79e46138316b24 Mon Sep 17 00:00:00 2001 From: Joshua Taylor Keays <128968711+berlintay@users.noreply.github.com> Date: Sat, 10 Aug 2024 02:25:17 -0300 Subject: [PATCH] Update scrape.mjs --- backend/scrape.mjs | 71 ++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/backend/scrape.mjs b/backend/scrape.mjs index 5d6ddd2..09ab62e 100755 --- a/backend/scrape.mjs +++ b/backend/scrape.mjs @@ -1,40 +1,49 @@ -import fs from 'fs'; // Import fs from ES module -import fetch from 'node-fetch'; // Import fetch from node-fetch +import axios from 'axios'; +import cheerio from 'cheerio'; +import fs from 'fs'; const DATA_FILE = './data.json'; -async function fetchAndAppendData() { - try { - const response = await fetch('http://localhost:3000/api/trending'); - const newData = await response.json(); +const scrapeTrendingRepos = async () => { + try { + const { data } = await axios.get('https://github.com/trending'); + const $ = cheerio.load(data); - // Read the existing data file - let existingData = []; - if (fs.existsSync(DATA_FILE)) { - const rawData = fs.readFileSync(DATA_FILE); - existingData = JSON.parse(rawData); - } + const trendingRepos = []; - // Append the new data - existingData.push(...newData); + $('.Box-row').each((index, element) => { + const orgOrUserName = $(element).find('span.text-normal').text().trim(); + const repoName = $(element).find('h1.h3 a').text().replace(orgOrUserName, '').trim(); + const fullRepoName = `${orgOrUserName}${repoName}`; + const description = $(element).find('p.col-9').text().trim(); - // Write the updated data back to the file - fs.writeFileSync(DATA_FILE, JSON.stringify(existingData, null, 2)); + trendingRepos.push({ name: fullRepoName, description }); + }); - console.log('Data fetched and appended successfully.'); - } catch (error) { - console.error('Error fetching or appending data:', error); - process.exit(1); // Exit with an error code - } -} - -const startScraping = async () => { - console.log('Waiting for 1 minute before starting the scraping process...'); - await new Promise(resolve => setTimeout(resolve, 1 * 60 * 1000)); // 1 minute in milliseconds - console.log('Starting the scraping process now...'); - - // Call your scraping function here - fetchAndAppendData(); + return trendingRepos; + } catch (error) { + console.error('Error scraping trending repositories:', error); + } }; -startScraping(); +const fetchAndAppendData = async () => { + try { + const newData = await scrapeTrendingRepos(); + + // Read the existing data file + let existingData = []; + if (fs.existsSync(DATA_FILE)) { + const rawData = fs.readFileSync(DATA_FILE); + existingData = JSON.parse(rawData); + } + + // Append the new data + existingData.push(...newData); + + // Write the updated data back to the file + fs.writeFileSync(DATA_FILE, JSON.stringify(existingData, null, 2)); + + console.log('Data fetched and appended successfully.'); + } catch (error) { + console.error('Error fetching or appending data:', error); + process.exit(1); // Exit