From f0557276964ec43fbacd5839a7fbfbbace4682b8 Mon Sep 17 00:00:00 2001 From: Joshua Taylor Keays <128968711+berlintay@users.noreply.github.com> Date: Sat, 10 Aug 2024 04:04:07 -0300 Subject: [PATCH] Update scraper.js --- backend/scraper.js | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/backend/scraper.js b/backend/scraper.js index e1995eb..93efc4b 100644 --- a/backend/scraper.js +++ b/backend/scraper.js @@ -1,26 +1,38 @@ const axios = require('axios'); const cheerio = require('cheerio'); - const scrapeTrendingRepos = async () => { try { - const { data } = await axios.get('https://github.com/trending?since=daily'); - + const { data } = await axios.get('https://github.com/trending'); const $ = cheerio.load(data); - const trendingRepos = []; - $('.Box-row').each((index, element) => { - const orgOrUserName = $(element).find('span.text-normal').text().trim(); - const repoName = $(element).find('h1.h3 a').text().replace(orgOrUserName, '').trim(); - const fullRepoName = `${orgOrUserName}${repoName}`; - const description = $(element).find('p.col-9').text().trim(); - const repoUrl = $(element).find('h1.h3 a').attr('href').trim(); // Get the repository URL - - trendingRepos.push({ name: fullRepoName, description, url: `https://github.com${repoUrl}` }); + const repoArticles = $('article.Box-row'); + + repoArticles.each((index, article) => { + const anchor = $(article).find('h2 a'); + + // Extract organization name + const orgNameElement = anchor.find('.text-normal'); + let orgName = orgNameElement.text().trim() || 'No org name'; + orgName = orgName.replace('/', '').trim(); + + // Extract repository name + const fullText = anchor.text().replace(orgName, '').trim(); + const repoName = fullText.replace('/', '').trim(); + + // Extract description (assuming it's in a
tag) + const descriptionElement = $(article).find('p'); + const description = descriptionElement.text().trim() || 'No description'; + + // Output the results + console.log(`Organization: ${orgName}`); + console.log(`Repository: ${repoName}`); + console.log(`Description: ${description}`); + console.log('---'); }); - return trendingRepos; } catch (error) { console.error('Error scraping trending repositories:', error); } }; -module.exports = scrapeTrendingRepos; +// Run the function to scrape the data +scrapeTrendingRepos();