Skip to content

Commit

Permalink
Update scraper.js
Browse files Browse the repository at this point in the history
  • Loading branch information
berlintay authored Aug 10, 2024
1 parent b9f5010 commit f055727
Showing 1 changed file with 26 additions and 14 deletions.
40 changes: 26 additions & 14 deletions backend/scraper.js
Original file line number Diff line number Diff line change
@@ -1,26 +1,38 @@
const axios = require('axios');
const cheerio = require('cheerio');

const scrapeTrendingRepos = async () => {
try {
const { data } = await axios.get('https://github.com/trending?since=daily');

const { data } = await axios.get('https://github.com/trending');
const $ = cheerio.load(data);

const trendingRepos = [];
$('.Box-row').each((index, element) => {
const orgOrUserName = $(element).find('span.text-normal').text().trim();
const repoName = $(element).find('h1.h3 a').text().replace(orgOrUserName, '').trim();
const fullRepoName = `${orgOrUserName}${repoName}`;
const description = $(element).find('p.col-9').text().trim();
const repoUrl = $(element).find('h1.h3 a').attr('href').trim(); // Get the repository URL

trendingRepos.push({ name: fullRepoName, description, url: `https://github.com${repoUrl}` });
const repoArticles = $('article.Box-row');

repoArticles.each((index, article) => {
const anchor = $(article).find('h2 a');

// Extract organization name
const orgNameElement = anchor.find('.text-normal');
let orgName = orgNameElement.text().trim() || 'No org name';
orgName = orgName.replace('/', '').trim();

// Extract repository name
const fullText = anchor.text().replace(orgName, '').trim();
const repoName = fullText.replace('/', '').trim();

// Extract description (assuming it's in a <p> tag)
const descriptionElement = $(article).find('p');
const description = descriptionElement.text().trim() || 'No description';

// Output the results
console.log(`Organization: ${orgName}`);
console.log(`Repository: ${repoName}`);
console.log(`Description: ${description}`);
console.log('---');
});
return trendingRepos;
} catch (error) {
console.error('Error scraping trending repositories:', error);
}
};

module.exports = scrapeTrendingRepos;
// Run the function to scrape the data
scrapeTrendingRepos();

0 comments on commit f055727

Please sign in to comment.