Skip to content

Commit

Permalink
Update scrape.mjs
Browse files Browse the repository at this point in the history
  • Loading branch information
berlintay authored Aug 10, 2024
1 parent e61158c commit b9f5010
Showing 1 changed file with 20 additions and 46 deletions.
66 changes: 20 additions & 46 deletions backend/scrape.mjs
Original file line number Diff line number Diff line change
@@ -1,65 +1,39 @@
import axios from 'axios';
import cheerio from 'cheerio';
import fs from 'fs';

const DATA_FILE = './data.json';

const scrapeTrendingRepos = async () => {
try {
const { data } = await axios.get('https://github.com/trending');
const $ = cheerio.load(data);

const trendingRepos = [];
const repoArticles = $('article.Box-row');

$('article h2 a').each((index, element) => {
const anchor = $(element);
const orgNameElement = anchor.find('.text-normal');
let orgName = orgNameElement.text().trim();
repoArticles.each((index, article) => {
const anchor = $(article).find('h2 a');

// Remove the trailing slash from the organization name
// Extract organization name
const orgNameElement = anchor.find('.text-normal');
let orgName = orgNameElement.text().trim() || 'No org name';
orgName = orgName.replace('/', '').trim();

// Extracting repository name by removing the organization name from the anchor text
const repoName = anchor.text().replace(orgName, '').replace('/', '').trim();
// Extract repository name
const fullText = anchor.text().replace(orgName, '').trim();
const repoName = fullText.replace('/', '').trim();

// Adding the repository details to the array
trendingRepos.push({
organization: orgName || 'No org name',
repository: repoName || 'No repository name',
});
});
// Extract description (assuming it's in a <p> tag)
const descriptionElement = $(article).find('p');
const description = descriptionElement.text().trim() || 'No description';

return trendingRepos;
// Output the results
console.log(`Organization: ${orgName}`);
console.log(`Repository: ${repoName}`);
console.log(`Description: ${description}`);
console.log('---');
});
} catch (error) {
console.error('Error scraping trending repositories:', error);
return [];
}
};

const fetchAndAppendData = async () => {
try {
const newData = await scrapeTrendingRepos();

// Read the existing data file
let existingData = [];
if (fs.existsSync(DATA_FILE)) {
const rawData = fs.readFileSync(DATA_FILE);
existingData = JSON.parse(rawData);
}

// Append the new data
existingData.push(...newData);

// Write the updated data back to the file
fs.writeFileSync(DATA_FILE, JSON.stringify(existingData, null, 2));

console.log('Data fetched and appended successfully.');
} catch (error) {
console.error('Error fetching or appending data:', error);
} finally {
process.exit(1);
}
};

// Execute the function to fetch and append data
fetchAndAppendData();
// Run the function to scrape the data
scrapeTrendingRepos();

0 comments on commit b9f5010

Please sign in to comment.