Skip to content

Commit

Permalink
Clean up TraCI and Pybullet exceptions (#2128)
Browse files Browse the repository at this point in the history
* Fix pybullet __del__ improper exception.

* Clarify SUMO and TraCI incompatibility error.

* Add traci_retries to the engine configuration.

* Clarify TraCI connection issues.
  • Loading branch information
Gamenot authored Dec 30, 2023
1 parent e883442 commit 514a9e7
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 46 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@ Copy and pasting the git commit messages is __NOT__ enough.
### Changed
- `VehicleIndex.build_agent_vehicle()` no longer has `filename` and `surface_patches` parameters.
- The following modules have been renamed: `envision.types` -> `envision.etypes`, `smarts.core.utils.logging` -> `smarts.core.utils.core_logging`, `smarts.core.utils.math` -> `smarts.core.utils.core_math`, `smarts.sstudio.types` -> `smarts.sstudio.sstypes`. For compatibility reasons they can still be imported by their original module name.
- Exposed `traffic:traci_retries`/`SMARTS_TRAFFIC_TRACI_RETRIES` to control how many times the `SumoTrafficSimulation` will try to restart when using default configuration.

### Deprecated
### Fixed
- `SumoTrafficSimulation` gives clearer reasons as to why it failed to connect to the TraCI server.
- Suppressed an issue where `pybullet_utils.pybullet.BulletClient` would cause an error because it was catching a non `BaseException` type.
- Fixed an issue where `AgentInterface.vehicle_type` would not affect agent vehicles when attempting to take over an existing vehicle.
- Fixed a case where newly created agent vehicles would have a constant `"sedan"` size instead of the size of `AgentInterface.vehicle_type`.
- Fixed a case where if vehicles are replaced they would not respect controller and vehicle parameters.
Expand Down
42 changes: 24 additions & 18 deletions smarts/core/sumo_traffic_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from shapely.geometry import Polygon
from shapely.geometry import box as shapely_box

from smarts.core import gen_id
from smarts.core import config, gen_id
from smarts.core.actor import ActorRole, ActorState
from smarts.core.colors import SceneColors
from smarts.core.coordinates import Dimensions, Heading, Pose, RefLinePoint
Expand Down Expand Up @@ -74,6 +74,8 @@ class SumoTrafficSimulation(TrafficProvider):
remove_agents_only_mode:
Remove only agent vehicles used by SMARTS and not delete other SUMO
vehicles when the traffic simulation calls to tear-down
traci_retries:
The number of times to retry acquisition of a TraCI server before erroring.
"""

_HAS_DYNAMIC_ATTRIBUTES = True
Expand All @@ -86,8 +88,9 @@ def __init__(
sumo_port: Optional[int] = None,
auto_start: bool = True,
allow_reload: bool = True,
debug: bool = True,
debug: bool = False,
remove_agents_only_mode: bool = False,
traci_retries: Optional[int] = None,
):
self._remove_agents_only_mode = remove_agents_only_mode
self._log = logging.getLogger(self.__class__.__name__)
Expand Down Expand Up @@ -121,6 +124,7 @@ def __init__(
self._last_vehicle_subscriptions = dict()
self._sim = None
self._handling_error = False
self._traci_retries = traci_retries

# start with the default recovery flags...
self._recovery_flags = super().recovery_flags
Expand Down Expand Up @@ -203,15 +207,15 @@ def _initialize_traci_conn(self, num_retries=5):
base_params=self._base_sumo_load_params(),
sumo_binary=sumo_binary,
)
# Ensure there has been enough time for sumo to start
time.sleep(0.05)

try:
while self._traci_conn.viable and not self._traci_conn.connected:
try:
self._traci_conn.connect(
timeout=5,
minimum_traci_version=20,
minimum_sumo_version=(1, 10, 0),
debug=self._debug,
)
except traci.exceptions.FatalTraCIError:
# Could not connect in time just retry connection
Expand All @@ -224,30 +228,28 @@ def _initialize_traci_conn(self, num_retries=5):
self._traci_conn.close_traci_and_pipes()
continue
except ConnectionRefusedError:
# Some other process owns the port... sumo did not die just retry
self._traci_conn.close_traci_and_pipes()
# Some other process somehow owns the port... sumo needs to be restarted.
continue
except OSError:
# TraCI or SUMO version are not at the minimum required version.
raise
except KeyboardInterrupt:
self._log.debug("Keyboard interrupted TraCI connection.")
self._traci_conn.close_traci_and_pipes()
raise
break
else:
exception = traci.exceptions.FatalTraCIError(
f"Unable to connect to TraCI server after `{num_retries=}`."
)
self._handle_traci_exception(exception, actors_relinquishable=False)
raise exception

try:
assert self._traci_conn is not None
# It is mandatory to set order when using multiple clients.
self._traci_conn.setOrder(0)
self._traci_conn.getVersion()
except (traci.exceptions.FatalTraCIError, AssertionError) as err:
logging.error(
"""Failed to initialize SUMO
Your scenario might not be configured correctly or
you were trying to initialize many SUMO instances at
once and we were not able to assign unique port
numbers to all SUMO processes.
Check %s for hints""",
self._log_file,
)
except (traci.exceptions.FatalTraCIError, TypeError) as err:
self._handle_traci_exception(err, actors_relinquishable=False)
self.teardown()
raise
Expand Down Expand Up @@ -321,7 +323,11 @@ def setup(self, scenario) -> ProviderState:

if restart_sumo:
try:
self._initialize_traci_conn()
engine_config = config()
traci_retries = self._traci_retries or engine_config(
"sumo", "traci_retries", default=5, cast=int
)
self._initialize_traci_conn(num_retries=traci_retries)
except traci.exceptions.FatalTraCIError:
return ProviderState()
elif self._allow_reload:
Expand Down
7 changes: 6 additions & 1 deletion smarts/core/utils/pybullet.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@ def __init__(self, connection_mode=None):

def __del__(self):
"""Clean up connection if not already done."""
super().__del__()
try:
super().__del__()
except TypeError as error:
# Pybullet 3.2.6 currently attempts to catch an error type that does not exist.
if not error.args[0].contains("BaseException"):
raise

def __getattr__(self, name):
"""Inject the client id into Bullet functions."""
Expand Down
54 changes: 28 additions & 26 deletions smarts/core/utils/sumo.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"""Importing this module "redirects" the import to the "real" sumolib. This is available
for convenience and to reduce code duplication as sumolib lives under SUMO_HOME.
"""
from __future__ import annotations

import functools
import inspect
Expand All @@ -28,7 +29,7 @@
import os
import subprocess
import sys
from typing import Any, List, Optional
from typing import Any, List, Optional, Tuple

from smarts.core.utils import networking
from smarts.core.utils.core_logging import suppress_output
Expand Down Expand Up @@ -86,7 +87,7 @@ def __init__(
self._sumo_proc = None
self._traci_conn = None
self._sumo_port = None
self._sumo_version = ()
self._sumo_version: Tuple[int, ...] = tuple()

if sumo_port is None:
sumo_port = networking.find_free_port()
Expand Down Expand Up @@ -115,18 +116,15 @@ def __del__(self) -> None:

def connect(
self,
timeout: float = 5,
minimum_traci_version=20,
minimum_sumo_version=(
1,
10,
0,
),
timeout: float,
minimum_traci_version: int,
minimum_sumo_version: Tuple[int, ...],
debug: bool = False,
):
"""Attempt a connection with the SUMO process."""
traci_conn = None
try:
with suppress_output(stdout=False):
with suppress_output(stderr=not debug, stdout=False):
traci_conn = traci.connect(
self._sumo_port,
numRetries=max(0, int(20 * timeout)),
Expand All @@ -142,41 +140,46 @@ def connect(
raise
except ConnectionRefusedError:
logging.error(
"Connection refused. Tried to connect to unpaired TraCI client."
"Connection refused. Tried to connect to an unpaired TraCI client."
)
raise

try:
vers, vers_str = traci_conn.getVersion()
assert (
vers >= minimum_traci_version
), f"TraCI API version must be >= {minimum_traci_version}. Got version ({vers})"
if vers < minimum_traci_version:
raise OSError(
f"TraCI API version must be >= {minimum_traci_version}. Got version ({vers})"
)
self._sumo_version = tuple(
int(v) for v in vers_str.partition(" ")[2].split(".")
) # e.g. "SUMO 1.11.0" -> (1, 11, 0)
assert (
self._sumo_version >= minimum_sumo_version
), f"SUMO version must be >= SUMO {minimum_sumo_version}"
if self._sumo_version < minimum_sumo_version:
raise OSError(f"SUMO version must be >= SUMO {minimum_sumo_version}")
except traci.exceptions.FatalTraCIError as err:
logging.debug("TraCI could not connect in time.")
logging.debug("TraCI disconnected, process may have died.")
# XXX: the error type is changed to TraCIException to make it consistent with the
# process died case of `traci.connect`.
raise traci.exceptions.TraCIException(err)
except AssertionError:
except OSError:
self.close_traci_and_pipes()
raise
self._traci_conn = traci_conn

@property
def connected(self):
def connected(self) -> bool:
"""Check if the connection is still valid."""
return self._sumo_proc is not None and self._traci_conn is not None

@property
def viable(self):
def viable(self) -> bool:
"""If making a connection to the sumo process is still viable."""
return self._sumo_proc is not None and self._sumo_proc.poll() is None

@property
def sumo_version(self) -> Tuple[int, ...]:
"""Get the current SUMO version as a tuple."""
return self._sumo_version

def __getattr__(self, name: str) -> Any:
if not self.connected:
return None
Expand Down Expand Up @@ -210,7 +213,7 @@ def __safe_close(conn):
# TraCI connection is already dead.
pass
except AttributeError:
# Socket was destroyed internally by a fatal error somehow.
# Socket was destroyed internally, likely due to an error.
pass

if self._traci_conn:
Expand All @@ -231,14 +234,13 @@ def teardown(self):


def _wrap_traci_method(*args, method, sumo_process: TraciConn, **kwargs):
# Argument order must be `*args` first so keyword arguments are required for `method` and `sumo_process`.
# Argument order must be `*args` first so `method` and `sumo_process` are keyword only arguments.
try:
return method(*args, **kwargs)
except traci.exceptions.FatalTraCIError:
# Traci cannot continue
# TraCI cannot continue
sumo_process.close_traci_and_pipes()
raise
except traci.exceptions.TraCIException:
# Case where SUMO can continue
# TAI: consider closing the process even with a non fatal error
# Case where TraCI/SUMO can theoretically continue
raise
5 changes: 4 additions & 1 deletion smarts/engine.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; For syntax see https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
[benchmark]
[core]
debug = False
Expand All @@ -11,4 +12,6 @@ max_pybullet_freq = 240
[resources]
default_agent_vehicle = passenger
[ray]
log_to_driver=False
log_to_driver=False
[traffic]
traci_retries=5

0 comments on commit 514a9e7

Please sign in to comment.