Skip to content

Commit

Permalink
Mise à jour de la base de données de communes (millésime 2023) (#3555)
Browse files Browse the repository at this point in the history
* communes 2023

* improve import communes script

* mix credo

* more detailed logs

* Apply suggestions from code review

Co-authored-by: Antoine Augusti <[email protected]>

* Remove useless variable attribution

---------

Co-authored-by: Antoine Augusti <[email protected]>
  • Loading branch information
vdegove and AntoineAugusti authored Oct 23, 2023
1 parent 34e0bfe commit 943307d
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 41 deletions.
126 changes: 85 additions & 41 deletions apps/transport/lib/mix/tasks/transport/import_communes.ex
Original file line number Diff line number Diff line change
@@ -1,16 +1,56 @@
defmodule Mix.Tasks.Transport.ImportCommunes do
@moduledoc "Import the communes"
@moduledoc """
Import or updates commune data (list, geometry) from official sources. Run with `mix transport.import_communes`.
"""
@shortdoc "Refreshes the database table `commune` with the latest data"
use Mix.Task
import Ecto.Query
alias Ecto.Changeset
alias DB.{Commune, Region, Repo}
require Logger

@communes_geojson_url "http://etalab-datasets.geo.data.gouv.fr/contours-administratifs/2022/geojson/communes-100m.geojson"
# List of communes with their geometry, but lacking additional information
@communes_geojson_url "http://etalab-datasets.geo.data.gouv.fr/contours-administratifs/2023/geojson/communes-100m.geojson"
# List of official communes with additional information (population, arrondissement, etc.)
# See https://github.com/etalab/decoupage-administratif
@communes_url "https://unpkg.com/@etalab/[email protected]/data/communes.json"
@communes_url "https://unpkg.com/@etalab/[email protected]/data/communes.json"

@doc "Loads regions from the database and returns a list of tuples with INSEE code and id"
def regions_by_insee do
Region |> Repo.all() |> Enum.into(%{}, fn region -> {region.insee, region.id} end)
end

@doc "Loads GeoJSON data from the official source and returns a list of tuples with INSEE code and geometry"
def geojson_by_insee do
%{status: 200, body: body} =
Req.get!(@communes_geojson_url, connect_options: [timeout: 15_000], receive_timeout: 15_000)

body
# Req doesn’t decode GeoJSON body automatically as it does for JSON
|> Jason.decode!()
|> Map.fetch!("features")
|> Enum.into(%{}, fn record -> {record["properties"]["code"], record["geometry"]} end)
end

@doc """
Loads communes from the official network source and returns a list of communes as maps.
Result is filtered, we only get:
- Current communes (there may have been communes deletions)
- Communes from the regions we have in the database
"""
def load_etalab_communes(region_insees) do
%{status: 200, body: body} = Req.get!(@communes_url, connect_options: [timeout: 15_000], receive_timeout: 15_000)

body
|> Enum.filter(&(&1["type"] == "commune-actuelle" and &1["region"] in region_insees))
end

@doc """
First creates the commune (without geometry) if it doesn’t exist.
Then updates the commune with the new data through a changeset.
Returns a list of keys of changed fields for statistics.
"""
def insert_or_update_commune(
%{
"code" => insee,
Expand All @@ -22,32 +62,28 @@ defmodule Mix.Tasks.Transport.ImportCommunes do
regions,
geojsons
) do
insee
|> get_or_create_commune()
|> Changeset.change(%{
insee: insee,
nom: nom,
region_id: Map.fetch!(regions, region_insee),
geom: build_geometry(geojsons, insee),
population: population,
siren: Map.get(params, "siren"),
arrondissement_insee: Map.get(params, "arrondissement"),
departement_insee: departement_insee
})
|> Repo.insert_or_update!()
end

defp regions_by_insee do
Region |> Repo.all() |> Enum.into(%{}, fn region -> {region.insee, region.id} end)
end
changeset =
insee
|> get_or_create_commune()
|> Changeset.change(%{
insee: insee,
nom: nom,
region_id: Map.fetch!(regions, region_insee),
geom: build_geometry(geojsons, insee),
population: population,
siren: Map.get(params, "siren"),
arrondissement_insee: Map.get(params, "arrondissement"),
departement_insee: departement_insee
})

changeset_change_keys = changeset.changes |> Map.keys()

unless Enum.empty?(changeset_change_keys -- [:geom, :population]) do
Logger.info("Important changes for INSEE #{changeset.data.insee}. #{readable_changeset(changeset)}")
end

defp geojson_by_insee do
@communes_geojson_url
|> HTTPoison.get!(timeout: 15_000, recv_timeout: 15_000)
|> Map.fetch!(:body)
|> Jason.decode!()
|> Map.fetch!("features")
|> Enum.into(%{}, fn record -> {record["properties"]["code"], record["geometry"]} end)
changeset |> Repo.insert_or_update!()
changeset_change_keys
end

defp get_or_create_commune(insee) do
Expand All @@ -69,40 +105,48 @@ defmodule Mix.Tasks.Transport.ImportCommunes do
%{geom | srid: 4326}
end

defp load_etalab_communes(region_insees) do
@communes_url
|> HTTPoison.get!(timeout: 15_000, recv_timeout: 15_000)
|> Map.fetch!(:body)
|> Jason.decode!()
|> Enum.filter(&(&1["type"] == "commune-actuelle" and &1["region"] in region_insees))
defp readable_changeset(%Ecto.Changeset{changes: changes, data: data}) do
changes
|> Map.keys()
|> Enum.map_join(" ; ", fn key -> "#{key}: #{Map.get(data, key)} => #{Map.get(changes, key)}" end)
end

def run(_params) do
Logger.info("Importing communes")

Mix.Task.run("app.start")

# Gets a list of tuples describing regions from the database
regions = regions_by_insee()
geojsons = geojson_by_insee()
region_insees = regions |> Map.keys()

# Gets a list of tuples describing communes GeoJSON from the network
geojsons = geojson_by_insee()
# Gets the official list of communes from the network and filter them to match database regions
etalab_communes = load_etalab_communes(region_insees)
etalab_insee = etalab_communes |> Enum.map(& &1["code"])
# Loads current communes INSEE list from the database
communes_insee = Commune |> select([c], c.insee) |> Repo.all()

nb_new = etalab_insee |> MapSet.new() |> MapSet.difference(MapSet.new(communes_insee)) |> Enum.count()
new_communes = etalab_insee |> MapSet.new() |> MapSet.difference(MapSet.new(communes_insee))
nb_new = new_communes |> Enum.count()
removed_communes = communes_insee |> MapSet.new() |> MapSet.difference(MapSet.new(etalab_insee)) |> Enum.into([])
nb_removed = removed_communes |> Enum.count()

Logger.info("#{nb_new} new communes")
Logger.info("#{nb_removed} communes should be removed")
Logger.info("#{nb_new} new communes. INSEE codes: #{Enum.join(new_communes, ", ")}")
Logger.info("#{nb_removed} communes should be removed. INSEE codes: #{Enum.join(removed_communes, ", ")}")

Logger.info("Deleting removed communes…")
Commune |> where([c], c.insee in ^removed_communes) |> Repo.delete_all()

Logger.info("Updating communes (including potentially incorrect geometry)…")
disable_trigger()
etalab_communes |> Enum.each(&insert_or_update_commune(&1, regions, geojsons))
Logger.info("Finished. Enabling trigger and refreshing views.")
# Inserts new communes, updates existing ones (mainly geometry, but also names…)
changelist = etalab_communes |> Enum.map(&insert_or_update_commune(&1, regions, geojsons))
Logger.info("Finished. Count of changes: #{inspect(changelist |> List.flatten() |> Enum.frequencies())}")

Logger.info("Ensure valid geometries and rectify if needed.")
ensure_valid_geometries()
Logger.info("Enabling trigger and refreshing views.")
enable_trigger()
end

Expand Down
3 changes: 3 additions & 0 deletions config/dev.exs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ config :transport, :email_sender_impl, Transport.EmailSender.Dummy
# Add a dev.secret.exs to use a real email provider
config :transport, Transport.Mailer, adapter: Swoosh.Adapters.Local

# Uncomment if you want to disable all logs from the database
# config :transport, DB.Repo, log: false

extra_config_file = Path.join(__DIR__, "#{config_env()}.secret.exs")

if File.exists?(extra_config_file) do
Expand Down

0 comments on commit 943307d

Please sign in to comment.