Skip to content

Commit

Permalink
Update scrape.mjs
Browse files Browse the repository at this point in the history
  • Loading branch information
berlintay authored Aug 10, 2024
1 parent ecf55a0 commit fca64eb
Showing 1 changed file with 18 additions and 12 deletions.
30 changes: 18 additions & 12 deletions backend/scrape.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,23 @@ const scrapeTrendingRepos = async () => {
const { data } = await axios.get('https://github.com/trending/unknown?since=daily');
const $ = cheerio.load(data);

const trendingRepos = [];

$('.Box-row').each((index, element) => {
const orgOrUserName = $(element).find('span.text-normal').text().trim();
const repoName = $(element).find('h1.h3 a').text().replace(orgOrUserName, '').trim();
const fullRepoName = `${orgOrUserName}${repoName}`;
const description = $(element).find('p.col-9').text().trim();
const repoUrl = $(element).find('h1.h3 a').attr('href').trim(); // Get the repository URL

trendingRepos.push({ name: fullRepoName, description, url: `https://github.com${repoUrl}` });
});
const trendingRepos = document.querySelectorAll('article h2 a');

repoLinks.forEach(anchor => {
// Assuming each anchor tag is a repository link
const orgNameElement = anchor.querySelector('.text-normal');
let orgName = orgNameElement ? orgNameElement.textContent.trim() : 'No org name';

// Remove the trailing slash from the organization name
orgName = orgName.replace('/', '').trim();

// Extracting repository name by removing the organization name from the anchor text
const fullText = anchor.textContent.replace(orgName, '').trim();
const repoName = fullText.replace('/', '').trim();

console.log(`Organization: ${orgName}`);
console.log(`Repository: ${repoName}`);
});

return trendingRepos;
} catch (error) {
Expand Down Expand Up @@ -49,4 +55,4 @@ const fetchAndAppendData = async () => {
console.error('Error fetching or appending data:', error);
};
process.exit(1)
}
}

0 comments on commit fca64eb

Please sign in to comment.