Skip to content

Commit

Permalink
fix: Elixir, use natural indexes and switch to duplicate_bag ETS tabl…
Browse files Browse the repository at this point in the history
…e type

~x2 improvement to stop_times.txt load time, minor improvements for requests/sec and RAM usage
  • Loading branch information
vegris committed Oct 27, 2022
1 parent 5429009 commit f29ab22
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 74 deletions.
41 changes: 10 additions & 31 deletions trexit/lib/trexit/gtfs.ex
Original file line number Diff line number Diff line change
@@ -1,35 +1,14 @@
defmodule Trexit.GTFS do
alias Trexit.GTFS.StopTime
alias Trexit.GTFS.Trip

def schedules_for_route(route_id) do
case :ets.lookup(:trips_ix_by_route, route_id) do
[{^route_id, trip_ixs}] ->
Enum.map(trip_ixs, fn trip_ix ->
[{^trip_ix, %Trip{trip_id: trip_id, route_id: ^route_id, service_id: service_id}}] =
:ets.lookup(:trips, trip_ix)

[{^trip_id, st_ixs}] = :ets.lookup(:stop_times_ix_by_trip, trip_id)

%{
"trip_id" => trip_id,
"service_id" => service_id,
"route_id" => route_id,
"schedules" =>
Enum.map(st_ixs, fn st_ix ->
[{^st_ix, %StopTime{} = stop_time}] = :ets.lookup(:stop_times, st_ix)

%{
"stop_id" => stop_time.stop_id,
"arrival_time" => stop_time.arrival,
"departure_time" => stop_time.departure
}
end)
}
end)

_ ->
[]
end
:trips
|> :ets.lookup(route_id)
|> Enum.map(fn {_key, %{trip_id: trip_id} = route} ->
schedules =
:stop_times
|> :ets.lookup(trip_id)
|> Enum.map(fn {_key, schedule} -> schedule end)

Map.merge(route, %{route_id: route_id, schedules: schedules})
end)
end
end
44 changes: 7 additions & 37 deletions trexit/lib/trexit/gtfs/loader.ex
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
defmodule Trexit.GTFS.Loader do
use GenServer
require Logger

alias Trexit.GTFS.StopTime
alias Trexit.GTFS.Trip
require Logger

def start_link(_) do
GenServer.start_link(__MODULE__, [])
Expand All @@ -22,10 +20,8 @@ defmodule Trexit.GTFS.Loader do
end

def load() do
:ets.new(:stop_times, [:named_table, {:read_concurrency, true}])
:ets.new(:stop_times_ix_by_trip, [:named_table, {:read_concurrency, true}])
:ets.new(:trips, [:named_table, {:read_concurrency, true}])
:ets.new(:trips_ix_by_route, [:named_table, {:read_concurrency, true}])
:ets.new(:stop_times, [:named_table, :duplicate_bag, read_concurrency: true])
:ets.new(:trips, [:named_table, :duplicate_bag, read_concurrency: true])

{time, _} =
:timer.tc(fn ->
Expand Down Expand Up @@ -53,22 +49,10 @@ defmodule Trexit.GTFS.Loader do

stream
|> Stream.drop(1)
|> Stream.with_index()
|> Enum.each(fn {[trip_id, arrival_time, departure_time, stop_id] ++ _, index} ->
case :ets.lookup(:stop_times_ix_by_trip, trip_id) do
[] -> :ets.insert(:stop_times_ix_by_trip, {trip_id, [index]})
[{_, sts}] -> :ets.insert(:stop_times_ix_by_trip, {trip_id, [index | sts]})
end

|> Enum.each(fn [trip_id, arrival_time, departure_time, stop_id] ++ _ ->
:ets.insert(
:stop_times,
{index,
%StopTime{
trip_id: trip_id,
stop_id: stop_id,
arrival: arrival_time,
departure: departure_time
}}
{trip_id, %{arrival_time: arrival_time, departure_time: departure_time, stop_id: stop_id}}
)
end)
end
Expand All @@ -84,22 +68,8 @@ defmodule Trexit.GTFS.Loader do

stream
|> Stream.drop(1)
|> Stream.with_index()
|> Enum.each(fn {[route_id, service_id, trip_id] ++ _, index} ->
case :ets.lookup(:trips_ix_by_route, route_id) do
[] -> :ets.insert(:trips_ix_by_route, {route_id, [index]})
[{_, trips}] -> :ets.insert(:trips_ix_by_route, {route_id, [index | trips]})
end

:ets.insert(
:trips,
{index,
%Trip{
trip_id: trip_id,
route_id: route_id,
service_id: service_id
}}
)
|> Enum.each(fn [route_id, service_id, trip_id] ++ _ ->
:ets.insert(:trips, {route_id, %{service_id: service_id, trip_id: trip_id}})
end)
end
end
3 changes: 0 additions & 3 deletions trexit/lib/trexit/gtfs/stop_time.ex

This file was deleted.

3 changes: 0 additions & 3 deletions trexit/lib/trexit/gtfs/trip.ex

This file was deleted.

0 comments on commit f29ab22

Please sign in to comment.