diff --git a/backend/scrape.mjs b/backend/scrape.mjs index 736db41..e1a599e 100755 --- a/backend/scrape.mjs +++ b/backend/scrape.mjs @@ -1,65 +1,39 @@ import axios from 'axios'; import cheerio from 'cheerio'; -import fs from 'fs'; - -const DATA_FILE = './data.json'; const scrapeTrendingRepos = async () => { try { const { data } = await axios.get('https://github.com/trending'); const $ = cheerio.load(data); - const trendingRepos = []; + const repoArticles = $('article.Box-row'); - $('article h2 a').each((index, element) => { - const anchor = $(element); - const orgNameElement = anchor.find('.text-normal'); - let orgName = orgNameElement.text().trim(); + repoArticles.each((index, article) => { + const anchor = $(article).find('h2 a'); - // Remove the trailing slash from the organization name + // Extract organization name + const orgNameElement = anchor.find('.text-normal'); + let orgName = orgNameElement.text().trim() || 'No org name'; orgName = orgName.replace('/', '').trim(); - // Extracting repository name by removing the organization name from the anchor text - const repoName = anchor.text().replace(orgName, '').replace('/', '').trim(); + // Extract repository name + const fullText = anchor.text().replace(orgName, '').trim(); + const repoName = fullText.replace('/', '').trim(); - // Adding the repository details to the array - trendingRepos.push({ - organization: orgName || 'No org name', - repository: repoName || 'No repository name', - }); - }); + // Extract description (assuming it's in a
tag) + const descriptionElement = $(article).find('p'); + const description = descriptionElement.text().trim() || 'No description'; - return trendingRepos; + // Output the results + console.log(`Organization: ${orgName}`); + console.log(`Repository: ${repoName}`); + console.log(`Description: ${description}`); + console.log('---'); + }); } catch (error) { console.error('Error scraping trending repositories:', error); - return []; - } -}; - -const fetchAndAppendData = async () => { - try { - const newData = await scrapeTrendingRepos(); - - // Read the existing data file - let existingData = []; - if (fs.existsSync(DATA_FILE)) { - const rawData = fs.readFileSync(DATA_FILE); - existingData = JSON.parse(rawData); - } - - // Append the new data - existingData.push(...newData); - - // Write the updated data back to the file - fs.writeFileSync(DATA_FILE, JSON.stringify(existingData, null, 2)); - - console.log('Data fetched and appended successfully.'); - } catch (error) { - console.error('Error fetching or appending data:', error); - } finally { - process.exit(1); } }; -// Execute the function to fetch and append data -fetchAndAppendData(); +// Run the function to scrape the data +scrapeTrendingRepos();