Skip to content

Commit

Permalink
Update scrape.mjs
Browse files Browse the repository at this point in the history
  • Loading branch information
berlintay authored Aug 10, 2024
1 parent 2dfcef2 commit ac3eae8
Showing 1 changed file with 40 additions and 31 deletions.
71 changes: 40 additions & 31 deletions backend/scrape.mjs
Original file line number Diff line number Diff line change
@@ -1,40 +1,49 @@
import fs from 'fs'; // Import fs from ES module
import fetch from 'node-fetch'; // Import fetch from node-fetch
import axios from 'axios';
import cheerio from 'cheerio';
import fs from 'fs';

const DATA_FILE = './data.json';

async function fetchAndAppendData() {
try {
const response = await fetch('http://localhost:3000/api/trending');
const newData = await response.json();
const scrapeTrendingRepos = async () => {
try {
const { data } = await axios.get('https://github.com/trending');
const $ = cheerio.load(data);

// Read the existing data file
let existingData = [];
if (fs.existsSync(DATA_FILE)) {
const rawData = fs.readFileSync(DATA_FILE);
existingData = JSON.parse(rawData);
}
const trendingRepos = [];

// Append the new data
existingData.push(...newData);
$('.Box-row').each((index, element) => {
const orgOrUserName = $(element).find('span.text-normal').text().trim();
const repoName = $(element).find('h1.h3 a').text().replace(orgOrUserName, '').trim();
const fullRepoName = `${orgOrUserName}${repoName}`;
const description = $(element).find('p.col-9').text().trim();

// Write the updated data back to the file
fs.writeFileSync(DATA_FILE, JSON.stringify(existingData, null, 2));
trendingRepos.push({ name: fullRepoName, description });
});

console.log('Data fetched and appended successfully.');
} catch (error) {
console.error('Error fetching or appending data:', error);
process.exit(1); // Exit with an error code
}
}

const startScraping = async () => {
console.log('Waiting for 1 minute before starting the scraping process...');
await new Promise(resolve => setTimeout(resolve, 1 * 60 * 1000)); // 1 minute in milliseconds
console.log('Starting the scraping process now...');

// Call your scraping function here
fetchAndAppendData();
return trendingRepos;
} catch (error) {
console.error('Error scraping trending repositories:', error);
}
};

startScraping();
const fetchAndAppendData = async () => {
try {
const newData = await scrapeTrendingRepos();

// Read the existing data file
let existingData = [];
if (fs.existsSync(DATA_FILE)) {
const rawData = fs.readFileSync(DATA_FILE);
existingData = JSON.parse(rawData);
}

// Append the new data
existingData.push(...newData);

// Write the updated data back to the file
fs.writeFileSync(DATA_FILE, JSON.stringify(existingData, null, 2));

console.log('Data fetched and appended successfully.');
} catch (error) {
console.error('Error fetching or appending data:', error);
process.exit(1); // Exit

0 comments on commit ac3eae8

Please sign in to comment.