-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Mise à jour de la base de données de communes (millésime 2023) (#3555)
* communes 2023 * improve import communes script * mix credo * more detailed logs * Apply suggestions from code review Co-authored-by: Antoine Augusti <[email protected]> * Remove useless variable attribution --------- Co-authored-by: Antoine Augusti <[email protected]>
- Loading branch information
1 parent
34e0bfe
commit 943307d
Showing
2 changed files
with
88 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,56 @@ | ||
defmodule Mix.Tasks.Transport.ImportCommunes do | ||
@moduledoc "Import the communes" | ||
@moduledoc """ | ||
Import or updates commune data (list, geometry) from official sources. Run with `mix transport.import_communes`. | ||
""" | ||
@shortdoc "Refreshes the database table `commune` with the latest data" | ||
use Mix.Task | ||
import Ecto.Query | ||
alias Ecto.Changeset | ||
alias DB.{Commune, Region, Repo} | ||
require Logger | ||
|
||
@communes_geojson_url "http://etalab-datasets.geo.data.gouv.fr/contours-administratifs/2022/geojson/communes-100m.geojson" | ||
# List of communes with their geometry, but lacking additional information | ||
@communes_geojson_url "http://etalab-datasets.geo.data.gouv.fr/contours-administratifs/2023/geojson/communes-100m.geojson" | ||
# List of official communes with additional information (population, arrondissement, etc.) | ||
# See https://github.com/etalab/decoupage-administratif | ||
@communes_url "https://unpkg.com/@etalab/[email protected]/data/communes.json" | ||
@communes_url "https://unpkg.com/@etalab/[email protected]/data/communes.json" | ||
|
||
@doc "Loads regions from the database and returns a list of tuples with INSEE code and id" | ||
def regions_by_insee do | ||
Region |> Repo.all() |> Enum.into(%{}, fn region -> {region.insee, region.id} end) | ||
end | ||
|
||
@doc "Loads GeoJSON data from the official source and returns a list of tuples with INSEE code and geometry" | ||
def geojson_by_insee do | ||
%{status: 200, body: body} = | ||
Req.get!(@communes_geojson_url, connect_options: [timeout: 15_000], receive_timeout: 15_000) | ||
|
||
body | ||
# Req doesn’t decode GeoJSON body automatically as it does for JSON | ||
|> Jason.decode!() | ||
|> Map.fetch!("features") | ||
|> Enum.into(%{}, fn record -> {record["properties"]["code"], record["geometry"]} end) | ||
end | ||
|
||
@doc """ | ||
Loads communes from the official network source and returns a list of communes as maps. | ||
Result is filtered, we only get: | ||
- Current communes (there may have been communes deletions) | ||
- Communes from the regions we have in the database | ||
""" | ||
def load_etalab_communes(region_insees) do | ||
%{status: 200, body: body} = Req.get!(@communes_url, connect_options: [timeout: 15_000], receive_timeout: 15_000) | ||
|
||
body | ||
|> Enum.filter(&(&1["type"] == "commune-actuelle" and &1["region"] in region_insees)) | ||
end | ||
|
||
@doc """ | ||
First creates the commune (without geometry) if it doesn’t exist. | ||
Then updates the commune with the new data through a changeset. | ||
Returns a list of keys of changed fields for statistics. | ||
""" | ||
def insert_or_update_commune( | ||
%{ | ||
"code" => insee, | ||
|
@@ -22,32 +62,28 @@ defmodule Mix.Tasks.Transport.ImportCommunes do | |
regions, | ||
geojsons | ||
) do | ||
insee | ||
|> get_or_create_commune() | ||
|> Changeset.change(%{ | ||
insee: insee, | ||
nom: nom, | ||
region_id: Map.fetch!(regions, region_insee), | ||
geom: build_geometry(geojsons, insee), | ||
population: population, | ||
siren: Map.get(params, "siren"), | ||
arrondissement_insee: Map.get(params, "arrondissement"), | ||
departement_insee: departement_insee | ||
}) | ||
|> Repo.insert_or_update!() | ||
end | ||
|
||
defp regions_by_insee do | ||
Region |> Repo.all() |> Enum.into(%{}, fn region -> {region.insee, region.id} end) | ||
end | ||
changeset = | ||
insee | ||
|> get_or_create_commune() | ||
|> Changeset.change(%{ | ||
insee: insee, | ||
nom: nom, | ||
region_id: Map.fetch!(regions, region_insee), | ||
geom: build_geometry(geojsons, insee), | ||
population: population, | ||
siren: Map.get(params, "siren"), | ||
arrondissement_insee: Map.get(params, "arrondissement"), | ||
departement_insee: departement_insee | ||
}) | ||
|
||
changeset_change_keys = changeset.changes |> Map.keys() | ||
|
||
unless Enum.empty?(changeset_change_keys -- [:geom, :population]) do | ||
Logger.info("Important changes for INSEE #{changeset.data.insee}. #{readable_changeset(changeset)}") | ||
end | ||
|
||
defp geojson_by_insee do | ||
@communes_geojson_url | ||
|> HTTPoison.get!(timeout: 15_000, recv_timeout: 15_000) | ||
|> Map.fetch!(:body) | ||
|> Jason.decode!() | ||
|> Map.fetch!("features") | ||
|> Enum.into(%{}, fn record -> {record["properties"]["code"], record["geometry"]} end) | ||
changeset |> Repo.insert_or_update!() | ||
changeset_change_keys | ||
end | ||
|
||
defp get_or_create_commune(insee) do | ||
|
@@ -69,40 +105,48 @@ defmodule Mix.Tasks.Transport.ImportCommunes do | |
%{geom | srid: 4326} | ||
end | ||
|
||
defp load_etalab_communes(region_insees) do | ||
@communes_url | ||
|> HTTPoison.get!(timeout: 15_000, recv_timeout: 15_000) | ||
|> Map.fetch!(:body) | ||
|> Jason.decode!() | ||
|> Enum.filter(&(&1["type"] == "commune-actuelle" and &1["region"] in region_insees)) | ||
defp readable_changeset(%Ecto.Changeset{changes: changes, data: data}) do | ||
changes | ||
|> Map.keys() | ||
|> Enum.map_join(" ; ", fn key -> "#{key}: #{Map.get(data, key)} => #{Map.get(changes, key)}" end) | ||
end | ||
|
||
def run(_params) do | ||
Logger.info("Importing communes") | ||
|
||
Mix.Task.run("app.start") | ||
|
||
# Gets a list of tuples describing regions from the database | ||
regions = regions_by_insee() | ||
geojsons = geojson_by_insee() | ||
region_insees = regions |> Map.keys() | ||
|
||
# Gets a list of tuples describing communes GeoJSON from the network | ||
geojsons = geojson_by_insee() | ||
# Gets the official list of communes from the network and filter them to match database regions | ||
etalab_communes = load_etalab_communes(region_insees) | ||
etalab_insee = etalab_communes |> Enum.map(& &1["code"]) | ||
# Loads current communes INSEE list from the database | ||
communes_insee = Commune |> select([c], c.insee) |> Repo.all() | ||
|
||
nb_new = etalab_insee |> MapSet.new() |> MapSet.difference(MapSet.new(communes_insee)) |> Enum.count() | ||
new_communes = etalab_insee |> MapSet.new() |> MapSet.difference(MapSet.new(communes_insee)) | ||
nb_new = new_communes |> Enum.count() | ||
removed_communes = communes_insee |> MapSet.new() |> MapSet.difference(MapSet.new(etalab_insee)) |> Enum.into([]) | ||
nb_removed = removed_communes |> Enum.count() | ||
|
||
Logger.info("#{nb_new} new communes") | ||
Logger.info("#{nb_removed} communes should be removed") | ||
Logger.info("#{nb_new} new communes. INSEE codes: #{Enum.join(new_communes, ", ")}") | ||
Logger.info("#{nb_removed} communes should be removed. INSEE codes: #{Enum.join(removed_communes, ", ")}") | ||
|
||
Logger.info("Deleting removed communes…") | ||
Commune |> where([c], c.insee in ^removed_communes) |> Repo.delete_all() | ||
|
||
Logger.info("Updating communes (including potentially incorrect geometry)…") | ||
disable_trigger() | ||
etalab_communes |> Enum.each(&insert_or_update_commune(&1, regions, geojsons)) | ||
Logger.info("Finished. Enabling trigger and refreshing views.") | ||
# Inserts new communes, updates existing ones (mainly geometry, but also names…) | ||
changelist = etalab_communes |> Enum.map(&insert_or_update_commune(&1, regions, geojsons)) | ||
Logger.info("Finished. Count of changes: #{inspect(changelist |> List.flatten() |> Enum.frequencies())}") | ||
|
||
Logger.info("Ensure valid geometries and rectify if needed.") | ||
ensure_valid_geometries() | ||
Logger.info("Enabling trigger and refreshing views.") | ||
enable_trigger() | ||
end | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters