Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance DPU Tools with Better Minicom Handling, Connectivity Checks, and Runtime Access Improvements #26

Merged
merged 5 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dpu-tools/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM quay.io/centos/centos:stream9
RUN dnf install -y \
procps-ng openssh-clients minicom python39 python3-pexpect python3-requests pciutils lshw && \
procps-ng openssh-clients minicom python39 python3-pexpect python3-requests pciutils lshw iputils && \
dnf clean all && \
rm -rf /var/cache/* && \
ln -s /usr/bin/pip3.9 /usr/bin/pip && \
Expand Down
38 changes: 37 additions & 1 deletion dpu-tools/common_ipu.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import threading
import re
import pexpect
import time
from minicom import configure_minicom, pexpect_child_wait, minicom_cmd


Expand Down Expand Up @@ -178,7 +179,7 @@ def minicom_get_version(logger: logging.Logger) -> str:
# Wait for the expected response (adjust the timeout as needed)

try:
pexpect_child_wait(child, ".*IPU IMC MEV-HW-B1-ci-ts.release.*", 120)
pexpect_child_wait(child, ".*IPU IMC.*", 120)
except Exception as e:
raise e

Expand All @@ -203,3 +204,38 @@ def minicom_get_version(logger: logging.Logger) -> str:
# Ensure Picocom closes properly
child.expect(pexpect.EOF)
return version


def check_connectivity(
address: str,
capture_output: bool = False,
dry_run: bool = False,
retries: int = 3,
delay: int = 2,
) -> bool:
"""
Checks connectivity to the specified address by performing a ping, with retry capability.
"""
# Extract hostname if address is in the form user@hostname
if "@" in address:
host = address.split("@")[-1]
else:
host = address

# Attempt to ping with retry logic
for attempt in range(1, retries + 1):
result = run(
f"ping -c 1 -W 1 {host}", capture_output=capture_output, dry_run=dry_run
)

if result.returncode == 0:
logger.debug(f"{host} is reachable.")
return True
else:
logger.debug(f"Attempt {attempt} to reach {host} failed.")
if attempt < retries:
logger.debug(f"Retrying in {delay} seconds...")
time.sleep(delay)

logger.debug(f"Failed to reach {host} after {retries} attempts.")
return False
53 changes: 31 additions & 22 deletions dpu-tools/fwutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pexpect
from minicom import minicom_cmd, pexpect_child_wait, configure_minicom
from common_ipu import (
check_connectivity,
extract_tar_gz,
run,
download_file,
Expand Down Expand Up @@ -143,29 +144,37 @@ def ipu_runtime_access(self) -> None:
self.logger.debug("[DRY RUN] Send Ctrl-A and 'x' to exit minicom")
self.logger.debug("[DRY RUN] Expect EOF")
else:
run("pkill -9 minicom")
self.logger.debug("Configuring minicom")
configure_minicom()
self.logger.debug("spawn minicom")
child = pexpect.spawn(minicom_cmd("imc"))
child.maxread = 10000
pexpect_child_wait(
child, ".*Press CTRL-A Z for help on special keys.*", 120
self.logger.debug(
f"Checking that ipu runtime access is up by sshing into {self.imc_address}"
)
self.logger.debug("Ready to enter command")
child.sendline("/etc/ipu/ipu_runtime_access")
# Wait for the expected response (adjust the timeout as needed)
pexpect_child_wait(child, ".*Enabling network and sshd.*", 120)

# Capture and self.logger.debug the output
assert child.before is not None
self.logger.debug(child.before.decode("utf-8"))
self.logger.debug(child.after.decode("utf-8"))
# Gracefully close Picocom (equivalent to pressing Ctrl-A and Ctrl-X)
child.sendcontrol("a")
child.sendline("x")
# Ensure Picocom closes properly
child.expect(pexpect.EOF)
connected = check_connectivity(self.imc_address)
if not connected:
self.logger.debug(
f"Couldn't ssh into {self.imc_address}, enabling runtime access through minicom"
)
run("pkill -9 minicom")
self.logger.debug("Configuring minicom")
with configure_minicom():
self.logger.debug("spawn minicom")
child = pexpect.spawn(minicom_cmd("imc"))
child.maxread = 10000
pexpect_child_wait(
child, ".*Press CTRL-A Z for help on special keys.*", 120
)
self.logger.debug("Ready to enter command")
child.sendline("/etc/ipu/ipu_runtime_access")
# Wait for the expected response (adjust the timeout as needed)
pexpect_child_wait(child, ".*Enabling network and sshd.*", 120)

# Capture and self.logger.debug the output
assert child.before is not None
self.logger.debug(child.before.decode("utf-8"))
self.logger.debug(child.after.decode("utf-8"))
# Gracefully close Picocom (equivalent to pressing Ctrl-A and Ctrl-X)
child.sendcontrol("a")
child.sendline("x")
# Ensure Picocom closes properly
child.expect(pexpect.EOF)

def clean_up_imc(self) -> None:
self.logger.info("Cleaning up IMC via SSH")
Expand Down
30 changes: 22 additions & 8 deletions dpu-tools/minicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import shutil
import tempfile
import logging

from contextlib import contextmanager
from typing import Generator

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -40,18 +41,31 @@ def pexpect_child_wait(child: pexpect.spawn, pattern: str, timeout: float) -> fl
return round(time.time() - start_time, 2)


def configure_minicom() -> None:
@contextmanager
def configure_minicom() -> Generator[None, None, None]:
minirc_path = "/root/.minirc.dfl"

# Check if minirc_path exists and create a temporary backup if it does
if os.path.exists(minirc_path):
backed_up = True
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file_path = temp_file.name
shutil.move(minirc_path, temp_file_path)
temp_file = tempfile.NamedTemporaryFile(delete=False)
temp_file_path = temp_file.name
temp_file.close()
shutil.move(minirc_path, temp_file_path) # Backup existing file
else:
backed_up = False
temp_file_path = ""

# Write new configuration
with open(minirc_path, "w") as new_file:
new_file.write("pu rtscts No\n")
if backed_up:
shutil.move(temp_file_path, minirc_path)
new_file.write("pu rtscts No\n")

try:
# Yield control back to the context block
yield
finally:
# Clean up by restoring the backup if it exists
if backed_up:
shutil.move(temp_file_path, minirc_path)
elif temp_file_path:
os.remove(temp_file_path)
Loading