truenas · aiden3c · Sep 18, 2024 · Aug 29, 2024 · Aug 29, 2024 · Sep 3, 2024
diff --git a/src/middlewared/middlewared/etc_files/smartd.py b/src/middlewared/middlewared/etc_files/smartd.py
@@ -2,6 +2,7 @@
 import re
 import shlex
 import subprocess
+import json
 
 from middlewared.common.smart.smartctl import get_smartctl_args, smartctl, SMARTCTX
 from middlewared.plugins.smart_.schedule import SMARTD_SCHEDULE_PIECES, smartd_schedule_piece
@@ -23,15 +24,16 @@ async def ensure_smart_enabled(args):
     if any(arg.startswith("/dev/nvme") for arg in args):
         return True
 
-    p = await smartctl(args + ["-i"], stderr=subprocess.STDOUT, check=False, encoding="utf8", errors="ignore")
-    if not re.search("SMART.*abled", p.stdout):
+    p = await smartctl(args + ["-i", "--json=c"], check=False, stderr=subprocess.STDOUT, encoding="utf8", errors="ignore")
+    pjson = json.loads(p.stdout)
+    if not pjson["smart_support"]["available"]:
         logger.debug("SMART is not supported on %r", args)
         return False
 
-    if re.search("SMART.*Enabled", p.stdout):
+    if pjson["smart_support"]["enabled"]:
         return True
 
-    p = await smartctl(args + ["-s", "on"], stderr=subprocess.STDOUT, check=False)
+    p = await smartctl(args + ["-s", "on"], check=False, stderr=subprocess.STDOUT)
     if p.returncode == 0:
         return True
     else:

diff --git a/src/middlewared/middlewared/plugins/disk.py b/src/middlewared/middlewared/plugins/disk.py
@@ -143,9 +143,7 @@ async def disk_extend(self, disk, context):
             if await self.middleware.call('truenas.is_ix_hardware'):
                 disk['supports_smart'] = True
             else:
-                disk['supports_smart'] = disk['name'].startswith('nvme') or bool(RE_SMART_AVAILABLE.search(
-                    await self.middleware.call('disk.smartctl', disk['name'], ['-a'], {'silent': True}) or ''
-                ))
+                disk['supports_smart'] = disk['name'].startswith('nvme') or await self.middleware.call('disk.smartctl', disk['name'], ['-a', '--json=c'], {'silent': True})['smart_support']['available']
 
         if disk['name'] in context['boot_pool_disks']:
             disk['pool'] = context['boot_pool_name']

diff --git a/src/middlewared/middlewared/plugins/disk_/smart_attributes.py b/src/middlewared/middlewared/plugins/disk_/smart_attributes.py
@@ -38,7 +38,7 @@ async def smart_attributes(self, name):
         """
         Returns S.M.A.R.T. attributes values for specified disk name.
         """
-        output = json.loads(await self.middleware.call('disk.smartctl', name, ['-A', '-j']))
+        output = json.loads(await self.middleware.call('disk.smartctl', name, ['-A', '--json=c']))
 
         if 'ata_smart_attributes' in output:
             return output['ata_smart_attributes']['table']

diff --git a/src/middlewared/middlewared/plugins/disk_/temperature.py b/src/middlewared/middlewared/plugins/disk_/temperature.py
@@ -1,6 +1,7 @@
 import asyncio
 import datetime
 import time
+import json
 
 import async_timeout
 
@@ -61,9 +62,8 @@ async def temperature(self, name, options):
 
     @private
     async def temperature_uncached(self, name, powermode):
-        output = await self.middleware.call('disk.smartctl', name, ['-a', '-n', powermode.lower()], {'silent': True})
-        if output is not None:
-            return parse_smartctl_for_temperature_output(output)
+        if output := await self.middleware.call('disk.smartctl', name, ['-a', '-n', powermode.lower(), '--json=c'], {'silent': True}):
+            return parse_smartctl_for_temperature_output(json.loads(output))
 
     @private
     async def reset_temperature_cache(self):

diff --git a/src/middlewared/middlewared/plugins/smart.py b/src/middlewared/middlewared/plugins/smart.py
@@ -3,6 +3,7 @@
 import functools
 import re
 import time
+import json
 
 from humanize import ordinal
 
@@ -21,159 +22,142 @@
 RE_TIME = re.compile(r'test will complete after ([a-z]{3} [a-z]{3} [0-9 ]+ \d\d:\d\d:\d\d \d{4})', re.IGNORECASE)
 RE_TIME_SCSIPRINT_EXTENDED = re.compile(r'Please wait (\d+) minutes for test to complete')
 
-RE_OF_TEST_REMAINING = re.compile(r'([0-9]+)% of test remaining')
-RE_SELF_TEST_STATUS = re.compile(r'self-test in progress \(([0-9]+)% completed\)')
-
 
 async def annotate_disk_smart_tests(middleware, tests_filter, disk):
     if disk["disk"] is None:
         return
 
-    output = await middleware.call("disk.smartctl", disk["disk"], ["-a"], {"silent": True})
+    output = await middleware.call("disk.smartctl", disk["disk"], ["-a", "--json=c"], {"silent": True})
     if output is None:
         return
+    data = json.loads(output)
 
-    tests = parse_smart_selftest_results(output) or []
-    current_test = parse_current_smart_selftest(output)
+    tests = parse_smart_selftest_results(data) or []
+    current_test = parse_current_smart_selftest(data)
     return dict(tests=filter_list(tests, tests_filter), current_test=current_test, **disk)
 
 
-def parse_smart_selftest_results(stdout):
+def parse_smart_selftest_results(data):
     tests = []
 
     # ataprint.cpp
-    if "LBA_of_first_error" in stdout:
-        for line in stdout.split("\n"):
-            if not line.startswith("#"):
-                continue
-
-            if line[58] == "%":
-                remaining = line[55:58]
-                lifetime = line[61:69]
-            else:
-                remaining = line[55:57]
-                lifetime = line[60:68]
+    if "ata_smart_self_test_log" in data:
+        if "table" in data["ata_smart_self_test_log"]["standard"]: # If there are no tests, there is no table
+            for index, entry in enumerate(data["ata_smart_self_test_log"]["standard"]["table"]):
 
-            test = {
-                "num": int(line[1:3].strip()),
-                "description": line[5:24].strip(),
-                "status_verbose": line[25:54].strip(),
-                "remaining": int(remaining.strip()) / 100,
-                "lifetime": int(lifetime.strip()),
-                "lba_of_first_error": line[77:].strip(),
-            }
+                # remaining_percent is in the dict only if the test is in progress (status value & 0x0f)
+                if remaining := entry["status"]["value"] & 0x0f:
+                    remaining = entry["status"]["remaining_percent"]
 
-            if test["status_verbose"] == "Completed without error":
-                test["status"] = "SUCCESS"
-            elif test["status_verbose"] == "Self-test routine in progress":
-                test["status"] = "RUNNING"
-            elif test["status_verbose"] in ["Aborted by host", "Interrupted (host reset)"]:
-                test["status"] = "ABORTED"
-            else:
-                test["status"] = "FAILED"
+                test = {
+                    "num": index,
+                    "description": entry["type"]["string"],
+                    "status_verbose": entry["status"]["string"],
+                    "remaining": remaining,
+                    "lifetime": entry["lifetime_hours"],
+                    "lba_of_first_error": entry.get("lba"), # only included if there is an error
+                }
 
-            if test["lba_of_first_error"] == "-":
-                test["lba_of_first_error"] = None
+                if test["status_verbose"] == "Completed without error":
+                    test["status"] = "SUCCESS"
+                elif test["status_verbose"] == "Self-test routine in progress":
+                    test["status"] = "RUNNING"
+                elif test["status_verbose"] in ["Aborted by host", "Interrupted (host reset)"]:
+                    test["status"] = "ABORTED"
+                else:
+                    test["status"] = "FAILED"
 
-            tests.append(test)
+                tests.append(test)
 
         return tests
 
     # nvmeprint.cpp
-    if "Failing_LBA" in stdout:
-        got_header = False
-        for line in stdout.split("\n"):
-            if "Failing_LBA" in line:
-                got_header = True
-                continue
-
-            if not got_header:
-                continue
-
-            try:
-                status_verbose = line[23:56].strip()
-                if status_verbose == "Completed without error":
-                    status = "SUCCESS"
-                elif status_verbose.startswith("Aborted:"):
-                    status = "ABORTED"
-                else:
-                    status = "FAILED"
+    if "nvme_self_test_log" in data:
+        if "table" in data["nvme_self_test_log"]:
+            for index, entry in enumerate(data["nvme_self_test_log"]["table"]):
 
-                failing_lba = line[67:79].strip()
-                nsid = line[80:85].strip()
-                seg = line[86:89].strip()
-                sct = line[90:93]
-                code = line[94:98]
+                if lba := entry.get("lba"):
+                    lba = entry["lba"]["value"]
 
                 test = {
-                    "num": int(line[0:2].strip()),
-                    "description": line[5:22].strip(),
-                    "status": status,
-                    "status_verbose": status_verbose,
-                    "power_on_hours": int(line[57:66].strip()),
-                    "failing_lba": None if failing_lba == "-" else int(failing_lba),
-                    "nsid": None if nsid == "-" else nsid,
-                    "seg": None if seg == "-" else int(seg),
-                    "sct": sct,
-                    "code": code,
+                    "num": index,
+                    "description": entry["self_test_code"]["string"],
+                    "status_verbose": entry["self_test_result"]["string"],
+                    "power_on_hours": entry["power_on_hours"],
+                    "failing_lba": lba,
+                    "nsid": entry.get("nsid"),
+                    "seg": entry.get("segment"),
+                    "sct": entry.get("status_code_type") or 0x0,
+                    "code": entry.get("status_code") or 0x0,
                 }
-            except ValueError:
-                break
 
-            tests.append(test)
+                if test["status_verbose"] == "Completed without error":
+                    test["status"] = "SUCCESS"
+                elif test["status_verbose"].startswith("Aborted:"):
+                    test["status"] = "ABORTED"
+                else:
+                    test["status"] = "FAILED"
+
+                tests.append(test)
 
         return tests
 
     # scsiprint.cpp
-    if "LBA_first_err" in stdout:
-        for line in stdout.split("\n"):
-            if not line.startswith("#"):
-                continue
+    # this JSON has numbered keys as an index, there's a reason it's not called a "smart" test
+    if "scsi_self_test_0" in data: # 0 is most recent test
+        for index in range(0, 20): # only 20 tests can ever return
+            test_key = f"scsi_self_test_{index}"
+            if not test_key in data:
+                break
+            entry = data[test_key]
+
+            if segment := entry.get("failed_segment"):
+                segment = entry["failed_segment"]["value"]
+
+            if lba := entry.get("lba_first_failure"):
+                lba = entry["lba_first_failure"]["value"]
+
+            lifetime = 0
+            if not entry.get("self_test_in_progress"):
+                lifetime = entry["power_on_time"]["hours"]
 
             test = {
-                "num": int(line[1:3].strip()),
-                "description": line[5:22].strip(),
-                "status_verbose": line[23:48].strip(),
-                "segment_number": line[49:52].strip(),
-                "lifetime": line[55:60].strip(),
-                "lba_of_first_error": line[60:78].strip(),
+                "num": index,
+                "description": entry["code"]["string"],
+                "status_verbose": entry["result"]["string"],
+                "segment_number": segment,
+                "lifetime": lifetime,
+                "lba_of_first_error": lba,
             }
 
             if test["status_verbose"] == "Completed":
                 test["status"] = "SUCCESS"
             elif test["status_verbose"] == "Self test in progress ...":
                 test["status"] = "RUNNING"
-            elif test["status_verbose"] in ["Aborted (by user command)", "Aborted (device reset ?)"]:
+            elif test["status_verbose"].startswith("Aborted"):
                 test["status"] = "ABORTED"
             else:
                 test["status"] = "FAILED"
 
-            if test["segment_number"] == "-":
-                test["segment_number"] = None
-            else:
-                test["segment_number"] = int(test["segment_number"])
-
-            if test["lifetime"] == "NOW":
-                test["lifetime"] = None
-            else:
-                test["lifetime"] = int(test["lifetime"])
-
-            if test["lba_of_first_error"] == "-":
-                test["lba_of_first_error"] = None
-
             tests.append(test)
 
         return tests
 
 
-def parse_current_smart_selftest(stdout):
-    if remaining := RE_OF_TEST_REMAINING.search(stdout):
-        return {"progress": 100 - int(remaining.group(1))}
+def parse_current_smart_selftest(data):
+    # ata
+    if "ata_smart_self_test_log" in data:
+        if tests := data["ata_smart_self_test_log"]["standard"].get("table"):
+            if remaining := tests[0]["status"].get("remaining_percent"):
+                return {"progress": 100 - remaining}
 
-    if remaining := RE_SELF_TEST_STATUS.search(stdout):
-        return {"progress": int(remaining.group(1))}
+    # nvme
+    if "nvme_self_test_log" in data:
+        if remaining := data["nvme_self_test_log"].get("current_self_test_completion_percent"):
+            return {"progress": remaining}
 
-    if "Self test in progress ..." in stdout:
+    # scsi gives no progress
+    if "self_test_in_progress" in data:
         return {"progress": 0}
 
 

diff --git a/src/middlewared/middlewared/pytest/unit/etc_files/test_smartd.py b/src/middlewared/middlewared/pytest/unit/etc_files/test_smartd.py
@@ -12,7 +12,7 @@
 @pytest.mark.asyncio
 async def test__ensure_smart_enabled__smart_error():
     with patch("middlewared.etc_files.smartd.smartctl") as run:
-        run.return_value = Mock(stdout="S.M.A.R.T. Error")
+        run.return_value = Mock(stdout='{"smart_support": {"enabled": false, "available": false}}')
 
         assert await ensure_smart_enabled(["/dev/ada0"]) is False
 
@@ -22,7 +22,7 @@ async def test__ensure_smart_enabled__smart_error():
 @pytest.mark.asyncio
 async def test__ensure_smart_enabled__smart_enabled():
     with patch("middlewared.etc_files.smartd.smartctl") as run:
-        run.return_value = Mock(stdout="SMART   Enabled")
+        run.return_value = Mock(stdout='{"smart_support": {"enabled": true, "available": true}}')
 
         assert await ensure_smart_enabled(["/dev/ada0"])
 
@@ -32,12 +32,12 @@ async def test__ensure_smart_enabled__smart_enabled():
 @pytest.mark.asyncio
 async def test__ensure_smart_enabled__smart_was_disabled():
     with patch("middlewared.etc_files.smartd.smartctl") as run:
-        run.return_value = Mock(stdout="SMART   Disabled", returncode=0)
+        run.return_value = Mock(stdout='{"smart_support": {"enabled": false, "available": true}}', returncode=0)
 
         assert await ensure_smart_enabled(["/dev/ada0"])
 
         assert run.call_args_list == [
-            call(["/dev/ada0", "-i"], check=False, stderr=subprocess.STDOUT,
+            call(["/dev/ada0", "-i", "--json=c"], check=False, stderr=subprocess.STDOUT,
                  encoding="utf8", errors="ignore"),
             call(["/dev/ada0", "-s", "on"], check=False, stderr=subprocess.STDOUT),
         ]
@@ -46,20 +46,20 @@ async def test__ensure_smart_enabled__smart_was_disabled():
 @pytest.mark.asyncio
 async def test__ensure_smart_enabled__enabling_smart_failed():
     with patch("middlewared.etc_files.smartd.smartctl") as run:
-        run.return_value = Mock(stdout="SMART   Disabled", returncode=1)
+        run.return_value = Mock(stdout='{"smart_support": {"enabled": false, "available": false}}', returncode=1)
 
         assert await ensure_smart_enabled(["/dev/ada0"]) is False
 
 
 @pytest.mark.asyncio
 async def test__ensure_smart_enabled__handled_args_properly():
     with patch("middlewared.etc_files.smartd.smartctl") as run:
-        run.return_value = Mock(stdout="SMART   Enabled")
+        run.return_value = Mock(stdout='{"smart_support": {"enabled": true, "available": true}}')
 
         assert await ensure_smart_enabled(["/dev/ada0", "-d", "sat"])
 
         run.assert_called_once_with(
-            ["/dev/ada0", "-d", "sat", "-i"], check=False, stderr=subprocess.STDOUT,
+            ["/dev/ada0", "-d", "sat", "-i", "--json=c"], check=False, stderr=subprocess.STDOUT,
             encoding="utf8", errors="ignore",
         )