diff --git a/CHANGELOG.md b/CHANGELOG.md index cfed5df5b3..bb4a6aa098 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,8 +13,12 @@ Copy and pasting the git commit messages is __NOT__ enough. ### Changed - `VehicleIndex.build_agent_vehicle()` no longer has `filename` and `surface_patches` parameters. - The following modules have been renamed: `envision.types` -> `envision.etypes`, `smarts.core.utils.logging` -> `smarts.core.utils.core_logging`, `smarts.core.utils.math` -> `smarts.core.utils.core_math`, `smarts.sstudio.types` -> `smarts.sstudio.sstypes`. For compatibility reasons they can still be imported by their original module name. +- Exposed `traffic:traci_retries`/`SMARTS_TRAFFIC_TRACI_RETRIES` to control how many times the `SumoTrafficSimulation` will try to restart when using default configuration. + ### Deprecated ### Fixed +- `SumoTrafficSimulation` gives clearer reasons as to why it failed to connect to the TraCI server. +- Suppressed an issue where `pybullet_utils.pybullet.BulletClient` would cause an error because it was catching a non `BaseException` type. - Fixed an issue where `AgentInterface.vehicle_type` would not affect agent vehicles when attempting to take over an existing vehicle. - Fixed a case where newly created agent vehicles would have a constant `"sedan"` size instead of the size of `AgentInterface.vehicle_type`. - Fixed a case where if vehicles are replaced they would not respect controller and vehicle parameters. diff --git a/smarts/core/sumo_traffic_simulation.py b/smarts/core/sumo_traffic_simulation.py index 89806e9538..43d8faf87b 100644 --- a/smarts/core/sumo_traffic_simulation.py +++ b/smarts/core/sumo_traffic_simulation.py @@ -30,7 +30,7 @@ from shapely.geometry import Polygon from shapely.geometry import box as shapely_box -from smarts.core import gen_id +from smarts.core import config, gen_id from smarts.core.actor import ActorRole, ActorState from smarts.core.colors import SceneColors from smarts.core.coordinates import Dimensions, Heading, Pose, RefLinePoint @@ -74,6 +74,8 @@ class SumoTrafficSimulation(TrafficProvider): remove_agents_only_mode: Remove only agent vehicles used by SMARTS and not delete other SUMO vehicles when the traffic simulation calls to tear-down + traci_retries: + The number of times to retry acquisition of a TraCI server before erroring. """ _HAS_DYNAMIC_ATTRIBUTES = True @@ -86,8 +88,9 @@ def __init__( sumo_port: Optional[int] = None, auto_start: bool = True, allow_reload: bool = True, - debug: bool = True, + debug: bool = False, remove_agents_only_mode: bool = False, + traci_retries: Optional[int] = None, ): self._remove_agents_only_mode = remove_agents_only_mode self._log = logging.getLogger(self.__class__.__name__) @@ -121,6 +124,7 @@ def __init__( self._last_vehicle_subscriptions = dict() self._sim = None self._handling_error = False + self._traci_retries = traci_retries # start with the default recovery flags... self._recovery_flags = super().recovery_flags @@ -203,8 +207,7 @@ def _initialize_traci_conn(self, num_retries=5): base_params=self._base_sumo_load_params(), sumo_binary=sumo_binary, ) - # Ensure there has been enough time for sumo to start - time.sleep(0.05) + try: while self._traci_conn.viable and not self._traci_conn.connected: try: @@ -212,6 +215,7 @@ def _initialize_traci_conn(self, num_retries=5): timeout=5, minimum_traci_version=20, minimum_sumo_version=(1, 10, 0), + debug=self._debug, ) except traci.exceptions.FatalTraCIError: # Could not connect in time just retry connection @@ -224,30 +228,28 @@ def _initialize_traci_conn(self, num_retries=5): self._traci_conn.close_traci_and_pipes() continue except ConnectionRefusedError: - # Some other process owns the port... sumo did not die just retry - self._traci_conn.close_traci_and_pipes() + # Some other process somehow owns the port... sumo needs to be restarted. continue + except OSError: + # TraCI or SUMO version are not at the minimum required version. + raise except KeyboardInterrupt: self._log.debug("Keyboard interrupted TraCI connection.") self._traci_conn.close_traci_and_pipes() raise break + else: + exception = traci.exceptions.FatalTraCIError( + f"Unable to connect to TraCI server after `{num_retries=}`." + ) + self._handle_traci_exception(exception, actors_relinquishable=False) + raise exception try: - assert self._traci_conn is not None # It is mandatory to set order when using multiple clients. self._traci_conn.setOrder(0) self._traci_conn.getVersion() - except (traci.exceptions.FatalTraCIError, AssertionError) as err: - logging.error( - """Failed to initialize SUMO - Your scenario might not be configured correctly or - you were trying to initialize many SUMO instances at - once and we were not able to assign unique port - numbers to all SUMO processes. - Check %s for hints""", - self._log_file, - ) + except (traci.exceptions.FatalTraCIError, TypeError) as err: self._handle_traci_exception(err, actors_relinquishable=False) self.teardown() raise @@ -321,7 +323,11 @@ def setup(self, scenario) -> ProviderState: if restart_sumo: try: - self._initialize_traci_conn() + engine_config = config() + traci_retries = self._traci_retries or engine_config( + "sumo", "traci_retries", default=5, cast=int + ) + self._initialize_traci_conn(num_retries=traci_retries) except traci.exceptions.FatalTraCIError: return ProviderState() elif self._allow_reload: diff --git a/smarts/core/utils/pybullet.py b/smarts/core/utils/pybullet.py index a903925809..b527709f6c 100644 --- a/smarts/core/utils/pybullet.py +++ b/smarts/core/utils/pybullet.py @@ -49,7 +49,12 @@ def __init__(self, connection_mode=None): def __del__(self): """Clean up connection if not already done.""" - super().__del__() + try: + super().__del__() + except TypeError as error: + # Pybullet 3.2.6 currently attempts to catch an error type that does not exist. + if not error.args[0].contains("BaseException"): + raise def __getattr__(self, name): """Inject the client id into Bullet functions.""" diff --git a/smarts/core/utils/sumo.py b/smarts/core/utils/sumo.py index 1abf31ab17..9ad042af59 100644 --- a/smarts/core/utils/sumo.py +++ b/smarts/core/utils/sumo.py @@ -20,6 +20,7 @@ """Importing this module "redirects" the import to the "real" sumolib. This is available for convenience and to reduce code duplication as sumolib lives under SUMO_HOME. """ +from __future__ import annotations import functools import inspect @@ -28,7 +29,7 @@ import os import subprocess import sys -from typing import Any, List, Optional +from typing import Any, List, Optional, Tuple from smarts.core.utils import networking from smarts.core.utils.core_logging import suppress_output @@ -86,7 +87,7 @@ def __init__( self._sumo_proc = None self._traci_conn = None self._sumo_port = None - self._sumo_version = () + self._sumo_version: Tuple[int, ...] = tuple() if sumo_port is None: sumo_port = networking.find_free_port() @@ -115,18 +116,15 @@ def __del__(self) -> None: def connect( self, - timeout: float = 5, - minimum_traci_version=20, - minimum_sumo_version=( - 1, - 10, - 0, - ), + timeout: float, + minimum_traci_version: int, + minimum_sumo_version: Tuple[int, ...], + debug: bool = False, ): """Attempt a connection with the SUMO process.""" traci_conn = None try: - with suppress_output(stdout=False): + with suppress_output(stderr=not debug, stdout=False): traci_conn = traci.connect( self._sumo_port, numRetries=max(0, int(20 * timeout)), @@ -142,41 +140,46 @@ def connect( raise except ConnectionRefusedError: logging.error( - "Connection refused. Tried to connect to unpaired TraCI client." + "Connection refused. Tried to connect to an unpaired TraCI client." ) raise try: vers, vers_str = traci_conn.getVersion() - assert ( - vers >= minimum_traci_version - ), f"TraCI API version must be >= {minimum_traci_version}. Got version ({vers})" + if vers < minimum_traci_version: + raise OSError( + f"TraCI API version must be >= {minimum_traci_version}. Got version ({vers})" + ) self._sumo_version = tuple( int(v) for v in vers_str.partition(" ")[2].split(".") ) # e.g. "SUMO 1.11.0" -> (1, 11, 0) - assert ( - self._sumo_version >= minimum_sumo_version - ), f"SUMO version must be >= SUMO {minimum_sumo_version}" + if self._sumo_version < minimum_sumo_version: + raise OSError(f"SUMO version must be >= SUMO {minimum_sumo_version}") except traci.exceptions.FatalTraCIError as err: - logging.debug("TraCI could not connect in time.") + logging.debug("TraCI disconnected, process may have died.") # XXX: the error type is changed to TraCIException to make it consistent with the # process died case of `traci.connect`. raise traci.exceptions.TraCIException(err) - except AssertionError: + except OSError: self.close_traci_and_pipes() raise self._traci_conn = traci_conn @property - def connected(self): + def connected(self) -> bool: """Check if the connection is still valid.""" return self._sumo_proc is not None and self._traci_conn is not None @property - def viable(self): + def viable(self) -> bool: """If making a connection to the sumo process is still viable.""" return self._sumo_proc is not None and self._sumo_proc.poll() is None + @property + def sumo_version(self) -> Tuple[int, ...]: + """Get the current SUMO version as a tuple.""" + return self._sumo_version + def __getattr__(self, name: str) -> Any: if not self.connected: return None @@ -210,7 +213,7 @@ def __safe_close(conn): # TraCI connection is already dead. pass except AttributeError: - # Socket was destroyed internally by a fatal error somehow. + # Socket was destroyed internally, likely due to an error. pass if self._traci_conn: @@ -231,14 +234,13 @@ def teardown(self): def _wrap_traci_method(*args, method, sumo_process: TraciConn, **kwargs): - # Argument order must be `*args` first so keyword arguments are required for `method` and `sumo_process`. + # Argument order must be `*args` first so `method` and `sumo_process` are keyword only arguments. try: return method(*args, **kwargs) except traci.exceptions.FatalTraCIError: - # Traci cannot continue + # TraCI cannot continue sumo_process.close_traci_and_pipes() raise except traci.exceptions.TraCIException: - # Case where SUMO can continue - # TAI: consider closing the process even with a non fatal error + # Case where TraCI/SUMO can theoretically continue raise diff --git a/smarts/engine.ini b/smarts/engine.ini index 9f3100c7e8..67e2cbed1f 100644 --- a/smarts/engine.ini +++ b/smarts/engine.ini @@ -1,3 +1,4 @@ +; For syntax see https://docs.python.org/3/library/configparser.html#supported-ini-file-structure [benchmark] [core] debug = False @@ -11,4 +12,6 @@ max_pybullet_freq = 240 [resources] default_agent_vehicle = passenger [ray] -log_to_driver=False \ No newline at end of file +log_to_driver=False +[traffic] +traci_retries=5 \ No newline at end of file