Skip to content

Commit

Permalink
NewDatagouvDatasetsJob : recherche pour ZFE et gestion accents
Browse files Browse the repository at this point in the history
  • Loading branch information
AntoineAugusti committed Dec 10, 2024
1 parent 241ac29 commit 76f4442
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
31 changes: 21 additions & 10 deletions apps/transport/lib/jobs/new_datagouv_datasets_job.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
tags:
MapSet.new([
"bus",
"deplacements",
"déplacement",
"déplacements",
"horaires",
"mobilite",
"mobilité",
"temps-reel",
"temps-réel",
"transport",
"transports"
Expand Down Expand Up @@ -53,13 +51,13 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
"etalab/schema-comptage-mobilites-measure",
"etalab/schema-comptage-mobilites-site"
],
tags: MapSet.new(["cyclable", "parking", "stationnement", "velo", "vélo"]),
tags: MapSet.new(["cyclable", "parking", "stationnement", "vélo"]),
formats: MapSet.new([])
},
%{
category: "Covoiturage et ZFE",
schemas: ["etalab/schema-lieux-covoiturage", "etalab/schema-zfe"],
tags: MapSet.new(["covoiturage", "zfe"]),
tags: MapSet.new(["covoiturage", "zfe", "zfe-m", "zone à faible émission"]),
formats: MapSet.new([])
},
%{
Expand All @@ -71,8 +69,7 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
"borne de recharge",
"irve",
"sdirve",
"électrique",
"electrique"
"électrique"
]),
formats: MapSet.new([])
}
Expand Down Expand Up @@ -223,12 +220,12 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
defp string_matches?(nil, _rule), do: false

defp string_matches?(str, %{formats: formats, tags: tags} = _rule) when is_binary(str) do
searches = MapSet.union(formats, tags) |> MapSet.to_list()
str |> String.downcase() |> String.contains?(searches)
searches = MapSet.union(formats, tags) |> MapSet.to_list() |> Enum.map(&normalize/1)
str |> normalize() |> String.contains?(searches)
end

defp tags_is_relevant?(%{"tags" => tags} = _dataset, rule) do
tags |> Enum.map(&string_matches?(String.downcase(&1), rule)) |> Enum.any?()
tags |> Enum.map(&string_matches?(&1, rule)) |> Enum.any?()
end

defp resource_is_relevant?(%{} = resource, rule) do
Expand All @@ -250,4 +247,18 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
end

defp resource_schema_is_relevant?(%{}, _rule), do: false

@doc """
Clean up a string, lowercase it and replace accented letters with ASCII letters.
iex> normalize("Paris")
"paris"
iex> normalize("vélo")
"velo"
iex> normalize("Châteauroux")
"chateauroux"
"""
def normalize(value) do
value |> String.normalize(:nfd) |> String.replace(~r/[^A-z]/u, "") |> String.downcase()
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ defmodule Transport.Test.Transport.Jobs.NewDatagouvDatasetsJobTest do
assert %{category: "Transport en commun"} = relevant_fn.(%{base | "title" => "GTFS de Dijon"})
assert %{category: "Transport en commun"} = relevant_fn.(%{base | "description" => "GTFS de Dijon"})
assert %{category: "Transport en commun"} = relevant_fn.(%{base | "tags" => [Ecto.UUID.generate(), "gtfs"]})
assert %{category: "Covoiturage et ZFE"} = relevant_fn.(%{base | "description" => "Périmètre de la Zone à Faible Émission (ZFE) de Dijon Métropole."})

assert %{category: "Freefloating"} =
relevant_fn.(%{
Expand Down

0 comments on commit 76f4442

Please sign in to comment.