Skip to content

Commit

Permalink
Update extract-db-otm.py
Browse files Browse the repository at this point in the history
  • Loading branch information
TheAIWizard authored Sep 8, 2024
1 parent dcf30f9 commit abf65d4
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions utils/extract-db-otm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ def sample_data(df_path: str, n_lines: str):
.to_pandas()
)

# Convertir la colonne de dates en format datetime si ce n'est pas déjà fait
df["date_modification_dt"] = pd.to_datetime(df["date_modification"], unit='ms', origin='unix')

# Normaliser les colonnes 'libelle' des deux DataFrames (par exemple, en minuscules)
df['libelle_normalized'] = df['libelle'].str.lower()
df_s3['libelle_normalized'] = df_s3['libelle'].str.lower()
Expand All @@ -46,8 +49,6 @@ def sample_data(df_path: str, n_lines: str):
df = df.drop(columns=['libelle_normalized'])
print("Number of lines after selection (remove already done): " + str(len(df)))

# Convertir la colonne de dates en format datetime si ce n'est pas déjà fait
df["date_modification_dt"] = pd.to_datetime(df["date_modification"], unit='ms', origin='unix')
# Calculer la taille de l'échantillon pour chaque strate
n = int(n_lines)

Expand Down

0 comments on commit abf65d4

Please sign in to comment.