From db952f0df07c33148c81b51428d82cac2650b1e6 Mon Sep 17 00:00:00 2001 From: Ornella Ourfi Date: Tue, 1 Aug 2023 17:01:19 +0200 Subject: [PATCH 1/6] script pour la collection vers datalake --- src/tasks/scripts/replace-cras-datalake.js | 81 ++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 src/tasks/scripts/replace-cras-datalake.js diff --git a/src/tasks/scripts/replace-cras-datalake.js b/src/tasks/scripts/replace-cras-datalake.js new file mode 100644 index 0000000..c69e07d --- /dev/null +++ b/src/tasks/scripts/replace-cras-datalake.js @@ -0,0 +1,81 @@ +#!/usr/bin/env node +'use strict'; + +const { execute } = require('../utils'); +const { encrypt } = require('../../utils/encrypt'); + +// node src/tasks/scripts/replace-cras-datalake.js + +execute(__filename, async ({ logger, db, dbDatalake }) => { + let startDate = new Date(); + startDate.setDate(startDate.getDate() - 2); //Max 2 jours avant car on laissera le CRON pour les CRAS effectué dans les 2 derniers jours... + startDate.setUTCHours(0, 0, 0, 0); + const sleep = ms => new Promise(r => setTimeout(r, ms)); + const countTotalCras = await db.collection('cras').countDocuments({ $or: [ + { createdAt: { $lte: startDate } }, + { updatedAt: { $lte: startDate } } + ] }); + + const calculLots = () => { + const result = []; + const limit = 100000; + const lot = Math.ceil(countTotalCras / limit); + for (let i = 0; i < lot; i++) { + if (countTotalCras <= (i * limit)) { + break; + } + result.push({ id: i, limit, skip: i * limit }); + } + return result; + }; + + const lotsArray = await calculLots(); + console.log('lotsArray:', lotsArray); + for (const lot of lotsArray) { + const cras = await db.collection('cras').find({ $or: [ + { createdAt: { $lte: startDate } }, + { updatedAt: { $lte: startDate } } + ] }).limit(lot.limit).skip(lot.skip).toArray(); + const promises = []; + let count = 0; + await cras.forEach(cra => { + promises.push(new Promise(async resolve => { + const whitelist = [ + '_id', + 'cra', + 'conseiller', + 'structure', + 'permanence', + 'createdAt', + 'updatedAt' + ]; + + for (const property in cra) { + if (!whitelist.includes(property)) { + delete cra[property]; + } + } + + cra._id = encrypt(cra._id.toString()); + cra.conseillerId = encrypt(cra.conseiller.oid.toString()); + if (cra.permanence) { + cra.permanenceId = encrypt(cra.permanence.oid.toString()); + + } + if (cra.structure) { + cra.structureId = encrypt(cra.structure.oid.toString()); + + } + delete cra.conseiller; + delete cra.permanence; + delete cra.structure; + count++; + resolve(); + })); + }); + await Promise.all(promises); + await dbDatalake.collection('cras-insert').insertMany(cras); + logger.info(`Le lot ${lot.id + 1} / ${lotsArray.length} effectué (+ ${count} CRAS)`); + await sleep(1000); + } +}); From 2644dd3d01d7dafdb7d5226b18cb98364e0c3994 Mon Sep 17 00:00:00 2001 From: Ornella Ourfi Date: Wed, 2 Aug 2023 17:43:34 +0200 Subject: [PATCH 2/6] ajout logger.info / delete connsole.log --- src/tasks/scripts/replace-cras-datalake.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tasks/scripts/replace-cras-datalake.js b/src/tasks/scripts/replace-cras-datalake.js index c69e07d..db1408f 100644 --- a/src/tasks/scripts/replace-cras-datalake.js +++ b/src/tasks/scripts/replace-cras-datalake.js @@ -30,7 +30,9 @@ execute(__filename, async ({ logger, db, dbDatalake }) => { }; const lotsArray = await calculLots(); - console.log('lotsArray:', lotsArray); + + logger.info(`Début des ${lotsArray.length} lots...`); + for (const lot of lotsArray) { const cras = await db.collection('cras').find({ $or: [ { createdAt: { $lte: startDate } }, From ed4b91be76eaa5b27c34b5a1225e527e93cb0d6f Mon Sep 17 00:00:00 2001 From: morgan Date: Wed, 9 Aug 2023 15:26:16 +0200 Subject: [PATCH 3/6] =?UTF-8?q?Cr=C3=A9ation=20du=20script=20d'export=20de?= =?UTF-8?q?=20cras=20car=20metabase=20est=20limit=C3=A9=20=C3=A0=201M=20de?= =?UTF-8?q?=20ligne?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tasks/scripts/extract-cras-csv.js | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/tasks/scripts/extract-cras-csv.js diff --git a/src/tasks/scripts/extract-cras-csv.js b/src/tasks/scripts/extract-cras-csv.js new file mode 100644 index 0000000..8f6a24d --- /dev/null +++ b/src/tasks/scripts/extract-cras-csv.js @@ -0,0 +1,31 @@ +#!/usr/bin/env node +'use strict'; +const path = require('path'); +const fs = require('fs'); +const { program } = require('commander'); +const { execute } = require('../utils'); + +require('dotenv').config(); + +// Penser à réaliser la commande avec --max-old-space-size=8192 +// pour traiter la quantité de données sans problème de mémoire +execute(__filename, async ({ logger, dbDatalake }) => { + program.helpOption('-e', 'HELP command'); + program.parse(process.argv); + + const cras = await dbDatalake.collection('cras').find().toArray(); + const promises = []; + logger.info(`Generating CSV file...`); + let csvFile = path.join(__dirname, '../../../data/exports', `cras.csv`); + let file = fs.createWriteStream(csvFile, { flags: 'w' }); + + file.write(`ID,ConseillerId,Cra,CreatedAt,UpdatedAt,PermanenceId,StructureId\n`); + cras.forEach(cra => { + promises.push(new Promise(async resolve => { + file.write(`${cra._id},${cra.conseillerId},${cra.cra},${cra.createdAt},${cra.updatedAt},${cra.permanenceId},${cra.structureId},\n`); + resolve(); + })); + }); + await Promise.all(promises); + file.close(); +}); From 783699631f49b202b55001ac6ba653ad9edf001c Mon Sep 17 00:00:00 2001 From: morgan Date: Mon, 11 Sep 2023 11:36:05 +0200 Subject: [PATCH 4/6] modification pour cursor --- src/tasks/scripts/extract-cras-csv.js | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/tasks/scripts/extract-cras-csv.js b/src/tasks/scripts/extract-cras-csv.js index 8f6a24d..eef5521 100644 --- a/src/tasks/scripts/extract-cras-csv.js +++ b/src/tasks/scripts/extract-cras-csv.js @@ -7,25 +7,21 @@ const { execute } = require('../utils'); require('dotenv').config(); -// Penser à réaliser la commande avec --max-old-space-size=8192 -// pour traiter la quantité de données sans problème de mémoire execute(__filename, async ({ logger, dbDatalake }) => { program.helpOption('-e', 'HELP command'); program.parse(process.argv); - const cras = await dbDatalake.collection('cras').find().toArray(); - const promises = []; + const cursor = await dbDatalake.collection('cras').find(); + logger.info(`Generating CSV file...`); let csvFile = path.join(__dirname, '../../../data/exports', `cras.csv`); let file = fs.createWriteStream(csvFile, { flags: 'w' }); - file.write(`ID,ConseillerId,Cra,CreatedAt,UpdatedAt,PermanenceId,StructureId\n`); - cras.forEach(cra => { - promises.push(new Promise(async resolve => { - file.write(`${cra._id},${cra.conseillerId},${cra.cra},${cra.createdAt},${cra.updatedAt},${cra.permanenceId},${cra.structureId},\n`); - resolve(); - })); - }); - await Promise.all(promises); + file.write(`ID;ConseillerId;Cra;CreatedAt;UpdatedAt;PermanenceId;StructureId\n`); + while (await cursor.hasNext()) { + const cra = await cursor.next(); + file.write(`${cra._id};${cra.conseillerId};${JSON.stringify(cra.cra)};${cra.createdAt};${cra.updatedAt};${cra.permanenceId};${cra.structureId};\n`); + } + file.close(); }); From 9e04b389553fe321765db7c7f97144908b0ab186 Mon Sep 17 00:00:00 2001 From: morgan Date: Thu, 14 Sep 2023 17:44:43 +0200 Subject: [PATCH 5/6] script de modification des ids de permanences dans les cras --- .../correctionDoublonsPermanences.js | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/tasks/correction/correctionDoublonsPermanences.js diff --git a/src/tasks/correction/correctionDoublonsPermanences.js b/src/tasks/correction/correctionDoublonsPermanences.js new file mode 100644 index 0000000..b27087a --- /dev/null +++ b/src/tasks/correction/correctionDoublonsPermanences.js @@ -0,0 +1,41 @@ +#!/usr/bin/env node +'use strict'; +const csv = require('csv-parser'); +const fs = require('fs'); +const { encrypt } = require('../../utils/encrypt'); +const { execute } = require('../utils'); + +const updateCraWithIdPermanence = dbDatalake => async (id, ids) => await dbDatalake.collection('cras').updateMany( + { 'permanenceId': { '$in': ids } }, + { 'permanenceId': id } +); + +execute(__filename, async ({ logger, dbDatalake }) => { + + const permanences = []; + const promises = []; + let nbMaj = 0; + + logger.info('Modification des ids suite à la fusion des doublons de permanences'); + + fs.createReadStream('data/imports/permanences-doublons.csv') + .pipe(csv({ separator: ';' })) + .on('data', data => permanences.push(data)) + .on('end', () => { + permanences.forEach(permanence => { + promises.push(new Promise(async resolve => { + const id = encrypt(permanence.idPermanence.toString()); + const ids = []; + permanence.doublons.split(',').forEach(id => { + ids.push(encrypt(id.toString())); + }); + await updateCraWithIdPermanence(dbDatalake)(id, ids); + nbMaj++; + resolve(); + })); + }); + Promise.all(promises); + }); + + logger.info(`${nbMaj} CRAs mis à jour`); +}); From d7fd0f8ffa6857b4a385e942765a2135ab255cd4 Mon Sep 17 00:00:00 2001 From: morgan Date: Tue, 19 Sep 2023 12:04:46 +0200 Subject: [PATCH 6/6] =?UTF-8?q?ajout=20de=20l'ent=C3=AAte=20des=20colonnes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tasks/correction/correctionDoublonsPermanences.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tasks/correction/correctionDoublonsPermanences.js b/src/tasks/correction/correctionDoublonsPermanences.js index b27087a..b5aea9e 100644 --- a/src/tasks/correction/correctionDoublonsPermanences.js +++ b/src/tasks/correction/correctionDoublonsPermanences.js @@ -18,6 +18,8 @@ execute(__filename, async ({ logger, dbDatalake }) => { logger.info('Modification des ids suite à la fusion des doublons de permanences'); + //eslint-disable-next-line max-len + //idPermanence|estStructure|nomEnseigne|numeroTelephone|email|siteWeb|siret|adresse|location|horaires|typeAcces|conseillers|lieuPrincipalPour|conseillersItinerants|structure|updatedAt|updatedBy|doublons fs.createReadStream('data/imports/permanences-doublons.csv') .pipe(csv({ separator: ';' })) .on('data', data => permanences.push(data)) @@ -36,6 +38,6 @@ execute(__filename, async ({ logger, dbDatalake }) => { }); Promise.all(promises); }); - + logger.info(`${nbMaj} CRAs mis à jour`); });