From abf65d4df59042c567fae3a10d9895729a35dea1 Mon Sep 17 00:00:00 2001 From: Nathan Randriamanana <37664429+TheAIWizard@users.noreply.github.com> Date: Sun, 8 Sep 2024 21:50:03 +0200 Subject: [PATCH] Update extract-db-otm.py --- utils/extract-db-otm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/extract-db-otm.py b/utils/extract-db-otm.py index 6f53b15..cfd113e 100644 --- a/utils/extract-db-otm.py +++ b/utils/extract-db-otm.py @@ -36,6 +36,9 @@ def sample_data(df_path: str, n_lines: str): .to_pandas() ) + # Convertir la colonne de dates en format datetime si ce n'est pas déjà fait + df["date_modification_dt"] = pd.to_datetime(df["date_modification"], unit='ms', origin='unix') + # Normaliser les colonnes 'libelle' des deux DataFrames (par exemple, en minuscules) df['libelle_normalized'] = df['libelle'].str.lower() df_s3['libelle_normalized'] = df_s3['libelle'].str.lower() @@ -46,8 +49,6 @@ def sample_data(df_path: str, n_lines: str): df = df.drop(columns=['libelle_normalized']) print("Number of lines after selection (remove already done): " + str(len(df))) - # Convertir la colonne de dates en format datetime si ce n'est pas déjà fait - df["date_modification_dt"] = pd.to_datetime(df["date_modification"], unit='ms', origin='unix') # Calculer la taille de l'échantillon pour chaque strate n = int(n_lines)