From 992e96b8454c57e9a6589f1c9458e13ab4748826 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Tue, 11 Sep 2018 05:39:11 +0900 Subject: [PATCH 01/20] feat: add type field and change some conditions --- controller.py | 9 ++++----- isolating_controller/workload.py | 7 ++++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/controller.py b/controller.py index 929bff7..ba5b308 100755 --- a/controller.py +++ b/controller.py @@ -55,10 +55,10 @@ def _cbk_wl_creation(self, ch: BlockingChannel, method: Basic.Deliver, _: BasicP logger = logging.getLogger('monitoring.workload_creation') logger.debug(f'{arr} is received from workload_creation queue') - if len(arr) != 4: + if len(arr) != 5: return - wl_name, pid, perf_pid, perf_interval = arr + wl_name, wl_type, pid, perf_pid, perf_interval = arr pid = int(pid) perf_pid = int(perf_pid) perf_interval = int(perf_interval) @@ -66,10 +66,9 @@ def _cbk_wl_creation(self, ch: BlockingChannel, method: Basic.Deliver, _: BasicP if not psutil.pid_exists(pid): return - workload = Workload(wl_name, pid, perf_pid, perf_interval) + workload = Workload(wl_name, wl_type, pid, perf_pid, perf_interval) - # FIXME: hard coded - if wl_name == 'SP': + if wl_type == 'bg': self._pending_wl.add_bg(workload) else: self._pending_wl.add_fg(workload) diff --git a/isolating_controller/workload.py b/isolating_controller/workload.py index edcec88..872ebed 100644 --- a/isolating_controller/workload.py +++ b/isolating_controller/workload.py @@ -20,8 +20,9 @@ class Workload: ControlThread schedules the groups of `Workload' instances to enforce their scheduling decisions """ - def __init__(self, name: str, pid: int, perf_pid: int, perf_interval: int) -> None: + def __init__(self, name: str, wl_type: str, pid: int, perf_pid: int, perf_interval: int) -> None: self._name = name + self._wl_type = wl_type self._pid = pid self._metrics: Deque[BasicMetric] = deque() self._perf_pid = perf_pid @@ -40,6 +41,10 @@ def name(self) -> str: def pid(self) -> int: return self._pid + @property + def wl_type(self) -> str: + return self._wl_type + @property def metrics(self) -> Deque[BasicMetric]: return self._metrics From 3479f4845774174d99896ef1a62317954e3bab57 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Tue, 18 Sep 2018 04:09:08 +0900 Subject: [PATCH 02/20] Feat: Modify the pending_queue to treat more than two workloads --- .../isolation/policies/base_policy.py | 3 +- isolating_controller/utils/numa_topology.py | 54 +++++++++++++ isolating_controller/workload.py | 13 ++- pending_queue.py | 79 +++++++++++++------ 4 files changed, 125 insertions(+), 24 deletions(-) create mode 100644 isolating_controller/utils/numa_topology.py diff --git a/isolating_controller/isolation/policies/base_policy.py b/isolating_controller/isolation/policies/base_policy.py index f72fa9c..42336ee 100644 --- a/isolating_controller/isolation/policies/base_policy.py +++ b/isolating_controller/isolation/policies/base_policy.py @@ -17,9 +17,10 @@ class ResourceType(IntEnum): class IsolationPolicy(metaclass=ABCMeta): _IDLE_ISOLATOR: IdleIsolator = IdleIsolator() - def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: + def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: self._fg_wl = fg_wl self._bg_wl = bg_wl + self._skt_id = skt_id self._isolator_map: Mapping[Type[Isolator], Isolator] = dict() self._cur_isolator: Isolator = IsolationPolicy._IDLE_ISOLATOR diff --git a/isolating_controller/utils/numa_topology.py b/isolating_controller/utils/numa_topology.py new file mode 100644 index 0000000..901738d --- /dev/null +++ b/isolating_controller/utils/numa_topology.py @@ -0,0 +1,54 @@ +# coding: UTF-8 + +from pathlib import Path +from typing import Dict, Set, Tuple + +import aiofiles + +from .hyphen import convert_to_set + + +class NumaTopology: + BASE_PATH: Path = Path('/sys/devices/system/node') + + @staticmethod + async def get_node_topo() -> Set[int]: + online_path: Path = NumaTopology.BASE_PATH / 'online' + + async with aiofiles.open(online_path) as fp: + line: str = await fp.readline() + node_list = convert_to_set(line) + + return node_list + + @staticmethod + async def get_cpu_topo(node_list: Set[int]) -> Dict[int, Set[int]]: + cpu_topo: Dict[int, Set[int]] = dict() + + for num in node_list: + cpulist_path: Path = NumaTopology.BASE_PATH / f'node{num}/cpulist' + + async with aiofiles.open(cpulist_path) as fp: + line: str = await fp.readline() + cpu_topo[num] = convert_to_set(line) + + return cpu_topo + + @staticmethod + async def get_mem_topo() -> Set[int]: + has_memory_path = NumaTopology.BASE_PATH / 'has_memory' + + async with aiofiles.open(has_memory_path) as fp: + line: str = await fp.readline() + mem_topo = convert_to_set(line) + + # TODO: get_mem_topo can be enhanced by using real numa memory access latency + + return mem_topo + + @staticmethod + async def get_numa_info() -> Tuple[Dict[int, Set[int]], Set[int]]: + node_list = await NumaTopology.get_node_topo() + cpu_topo = await NumaTopology.get_cpu_topo(node_list) + mem_topo = await NumaTopology.get_mem_topo() + return cpu_topo, mem_topo diff --git a/isolating_controller/workload.py b/isolating_controller/workload.py index 872ebed..7f58087 100644 --- a/isolating_controller/workload.py +++ b/isolating_controller/workload.py @@ -2,14 +2,16 @@ from collections import deque from itertools import chain -from typing import Deque, Tuple +from typing import Deque, Tuple, Dict, Set import cpuinfo import psutil +from .utils.numa_topology import NumaTopology from .metric_container.basic_metric import BasicMetric, MetricDiff from .solorun_data.datas import data_map + L3_SIZE = int(cpuinfo.get_cpu_info()['l3_cache_size'].split()[0]) * 1024 @@ -79,3 +81,12 @@ def all_child_tid(self) -> Tuple[int, ...]: )) except psutil.NoSuchProcess: return tuple() + + def get_socket_id(self): + cpuset = self.cpuset + cpu_topo, _ = await NumaTopology.get_numa_info() + + # FIXME: Hardcode for assumption (one workload to one socket) + for socket_id, skt_cpus in cpu_topo.items(): + if cpuset in skt_cpus: + return socket_id diff --git a/pending_queue.py b/pending_queue.py index e5dbdcf..b126ee3 100644 --- a/pending_queue.py +++ b/pending_queue.py @@ -5,14 +5,16 @@ from isolating_controller.isolation.policies import IsolationPolicy from isolating_controller.workload import Workload - +from .isolating_controller.utils.numa_topology import NumaTopology class PendingQueue(Sized): - def __init__(self, policy_type: Type[IsolationPolicy]) -> None: + def __init__(self, policy_type: Type[IsolationPolicy], max_pending: int) -> None: self._policy_type: Type[IsolationPolicy] = policy_type + self._max_pending: int = max_pending - self._bg_q: Dict[Tuple[int, ...], Workload] = dict() - self._fg_q: Dict[Tuple[int, ...], Workload] = dict() + self._cur_pending: int = 0 + self._bg_q: Dict[int, Workload] = dict() + self._fg_q: Dict[int, Workload] = dict() self._pending_list: List[IsolationPolicy] = list() def __len__(self) -> int: @@ -24,33 +26,66 @@ def add_bg(self, workload: Workload) -> None: logger = logging.getLogger(__name__) logger.info(f'{workload} is ready for active as Background') - # FIXME: hard coded - other_cpuset = tuple(map(lambda x: x - 8, workload.cpuset)) - - if other_cpuset in self._fg_q: - new_group = self._policy_type(self._fg_q[other_cpuset], workload) - self._pending_list.append(new_group) - del self._fg_q[other_cpuset] - + if self._cur_pending < self._max_pending: + self._bg_q[workload.pid] = workload + self._cur_pending += 1 else: - self._bg_q[workload.cpuset] = workload + self.dump_to_pending_list() def add_fg(self, workload: Workload) -> None: logger = logging.getLogger(__name__) logger.info(f'{workload} is ready for active as Foreground') - # FIXME: hard coded - other_cpuset = tuple(map(lambda x: x + 8, workload.cpuset)) - - if other_cpuset in self._bg_q: - new_group = self._policy_type(self._bg_q[other_cpuset], workload) - self._pending_list.append(new_group) - del self._bg_q[other_cpuset] - + if self._cur_pending < self._max_pending: + self._fg_q[workload.pid] = workload + self._cur_pending += 1 else: - self._fg_q[workload.cpuset] = workload + self.dump_to_pending_list() def pop(self) -> IsolationPolicy: if len(self) is 0: raise IndexError(f'{self} is empty') return self._pending_list.pop() + + def dump_to_pending_list(self) -> None: + fg_pids = list(self._fg_q.keys()) + bg_pids = list(self._bg_q.keys()) + all_pids = list() + for i in range(len(self._fg_q)): + all_pids.append(fg_pids[i]) + for i in range(len(self._bg_q)): + all_pids.append(bg_pids[i]) + + node_list = await NumaTopology.get_node_topo() + group_pids = dict() # Dict. for grouping the fg and bg + for node in node_list: + group_pids[node] = set() + + for pid in all_pids: + if pid in fg_pids: + skt_id = self._fg_q[pid].get_socket_id() + group_pids[skt_id].add(pid) + elif pid in bg_pids: + skt_id = self._bg_q[pid].get_socket_id() + group_pids[skt_id].add(pid) + + # Grouping pids based on their types and skt_id + for node in node_list: + node_pidset = group_pids[node] + pid = node_pidset.pop() + if pid in fg_pids: + bg_pid = node_pidset.pop() + new_group = self._policy_type(pid, bg_pid, node) + self._pending_list.append(new_group) + del self._fg_q[pid] + del self._bg_q[bg_pid] + elif pid in bg_pids: + fg_pid = node_pidset.pop() + new_group = self._policy_type(fg_pid, pid, node) + self._pending_list.append(new_group) + del self._fg_q[fg_pid] + del self._bg_q[pid] + return + + def update_max_pending(self, new_max_pending: int): + self._max_pending = new_max_pending From 702507fba711a59f3d7bc1be9d6621a4f3b548ad Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Tue, 18 Sep 2018 14:39:15 +0900 Subject: [PATCH 03/20] fix: fix NumaTopology from async to sync --- isolating_controller/utils/numa_topology.py | 26 ++++++++++----------- pending_queue.py | 4 ++-- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/isolating_controller/utils/numa_topology.py b/isolating_controller/utils/numa_topology.py index 901738d..18c998f 100644 --- a/isolating_controller/utils/numa_topology.py +++ b/isolating_controller/utils/numa_topology.py @@ -12,34 +12,34 @@ class NumaTopology: BASE_PATH: Path = Path('/sys/devices/system/node') @staticmethod - async def get_node_topo() -> Set[int]: + def get_node_topo() -> Set[int]: online_path: Path = NumaTopology.BASE_PATH / 'online' - async with aiofiles.open(online_path) as fp: - line: str = await fp.readline() + with open(online_path) as fp: + line: str = fp.readline() node_list = convert_to_set(line) return node_list @staticmethod - async def get_cpu_topo(node_list: Set[int]) -> Dict[int, Set[int]]: + def get_cpu_topo(node_list: Set[int]) -> Dict[int, Set[int]]: cpu_topo: Dict[int, Set[int]] = dict() for num in node_list: cpulist_path: Path = NumaTopology.BASE_PATH / f'node{num}/cpulist' - async with aiofiles.open(cpulist_path) as fp: - line: str = await fp.readline() + with open(cpulist_path) as fp: + line: str = fp.readline() cpu_topo[num] = convert_to_set(line) return cpu_topo @staticmethod - async def get_mem_topo() -> Set[int]: + def get_mem_topo() -> Set[int]: has_memory_path = NumaTopology.BASE_PATH / 'has_memory' - async with aiofiles.open(has_memory_path) as fp: - line: str = await fp.readline() + with open(has_memory_path) as fp: + line: str = fp.readline() mem_topo = convert_to_set(line) # TODO: get_mem_topo can be enhanced by using real numa memory access latency @@ -47,8 +47,8 @@ async def get_mem_topo() -> Set[int]: return mem_topo @staticmethod - async def get_numa_info() -> Tuple[Dict[int, Set[int]], Set[int]]: - node_list = await NumaTopology.get_node_topo() - cpu_topo = await NumaTopology.get_cpu_topo(node_list) - mem_topo = await NumaTopology.get_mem_topo() + def get_numa_info() -> Tuple[Dict[int, Set[int]], Set[int]]: + node_list = NumaTopology.get_node_topo() + cpu_topo = NumaTopology.get_cpu_topo(node_list) + mem_topo = NumaTopology.get_mem_topo() return cpu_topo, mem_topo diff --git a/pending_queue.py b/pending_queue.py index b126ee3..404381b 100644 --- a/pending_queue.py +++ b/pending_queue.py @@ -56,8 +56,8 @@ def dump_to_pending_list(self) -> None: for i in range(len(self._bg_q)): all_pids.append(bg_pids[i]) - node_list = await NumaTopology.get_node_topo() - group_pids = dict() # Dict. for grouping the fg and bg + node_list = NumaTopology.get_node_topo() + group_pids = dict() # Dict. for grouping the fg and bg for node in node_list: group_pids[node] = set() From ee6a735c333bb3c2f5dd580a13858b675b271645 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Wed, 19 Sep 2018 15:25:34 +0900 Subject: [PATCH 04/20] fix: Fix grouping code in pending_queue.py --- controller.py | 8 +++-- .../isolation/isolators/base_isolator.py | 8 ++--- .../isolation/policies/diff_policy.py | 4 +-- .../policies/diff_with_violation_policy.py | 4 +-- .../isolation/policies/greedy_diff_policy.py | 4 +-- .../greedy_diff_with_violation_policy.py | 4 +-- isolating_controller/utils/hyphen.py | 22 ++++++++++++ isolating_controller/utils/numa_topology.py | 8 ++--- isolating_controller/workload.py | 16 +++++---- pending_queue.py | 35 ++++++++++++------- 10 files changed, 76 insertions(+), 37 deletions(-) create mode 100644 isolating_controller/utils/hyphen.py diff --git a/controller.py b/controller.py index 6ca9969..ca39610 100755 --- a/controller.py +++ b/controller.py @@ -24,6 +24,7 @@ from isolating_controller.metric_container.basic_metric import BasicMetric from isolating_controller.workload import Workload from pending_queue import PendingQueue +from threading import RLock MIN_PYTHON = (3, 6) @@ -44,8 +45,10 @@ def __init__(self, metric_buf_size: int) -> None: self._rmq_host = 'localhost' self._rmq_creation_queue = 'workload_creation' - self._pending_wl = PendingQueue(DiffPolicy) + ## FIXME : Hard coded - PendingQueue can have four workloads at most (second argument) + self._pending_wl = PendingQueue(DiffPolicy, 4) self._control_thread = ControlThread(self._pending_wl) + self._lock = RLock() def _cbk_wl_creation(self, ch: BlockingChannel, method: Basic.Deliver, _: BasicProperties, body: bytes) -> None: ch.basic_ack(method.delivery_tag) @@ -67,10 +70,11 @@ def _cbk_wl_creation(self, ch: BlockingChannel, method: Basic.Deliver, _: BasicP return workload = Workload(wl_name, wl_type, pid, perf_pid, perf_interval) - if wl_type == 'bg': + logger.info(f'{workload} is background process') self._pending_wl.add_bg(workload) else: + logger.info(f'{workload} is foreground process') self._pending_wl.add_fg(workload) logger.info(f'{workload} is created') diff --git a/isolating_controller/isolation/isolators/base_isolator.py b/isolating_controller/isolation/isolators/base_isolator.py index 51a3129..264696c 100644 --- a/isolating_controller/isolation/isolators/base_isolator.py +++ b/isolating_controller/isolation/isolators/base_isolator.py @@ -14,7 +14,7 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: self._foreground_wl = foreground_wl self._background_wl = background_wl - self._is_fist_decision: bool = True + self._is_first_decision: bool = True @abstractmethod def strengthen(self) -> 'Isolator': @@ -63,7 +63,7 @@ def yield_isolation(self) -> None: Declare to stop the configuration search for the current isolator. Must be called when the current isolator yields the initiative. """ - self._is_fist_decision = True + self._is_first_decision = True @abstractmethod def _first_decision(self) -> NextStep: @@ -74,8 +74,8 @@ def _monitoring_result(self) -> NextStep: pass def decide_next_step(self) -> NextStep: - if self._is_fist_decision: - self._is_fist_decision = False + if self._is_first_decision: + self._is_first_decision = False return self._first_decision() else: diff --git a/isolating_controller/isolation/policies/diff_policy.py b/isolating_controller/isolation/policies/diff_policy.py index 56975d9..5757a48 100644 --- a/isolating_controller/isolation/policies/diff_policy.py +++ b/isolating_controller/isolation/policies/diff_policy.py @@ -8,8 +8,8 @@ class DiffPolicy(IsolationPolicy): - def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: - super().__init__(fg_wl, bg_wl) + def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: + super().__init__(fg_wl, bg_wl, skt_id) self._is_llc_isolated = False self._is_mem_isolated = False diff --git a/isolating_controller/isolation/policies/diff_with_violation_policy.py b/isolating_controller/isolation/policies/diff_with_violation_policy.py index 6b457a4..db58386 100644 --- a/isolating_controller/isolation/policies/diff_with_violation_policy.py +++ b/isolating_controller/isolation/policies/diff_with_violation_policy.py @@ -11,8 +11,8 @@ class DiffWViolationPolicy(DiffPolicy): VIOLATION_THRESHOLD = 3 - def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: - super().__init__(fg_wl, bg_wl) + def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: + super().__init__(fg_wl, bg_wl, skt_id) self._violation_count: int = 0 diff --git a/isolating_controller/isolation/policies/greedy_diff_policy.py b/isolating_controller/isolation/policies/greedy_diff_policy.py index 4cd1fad..d2b0fc9 100644 --- a/isolating_controller/isolation/policies/greedy_diff_policy.py +++ b/isolating_controller/isolation/policies/greedy_diff_policy.py @@ -8,8 +8,8 @@ class GreedyDiffPolicy(IsolationPolicy): - def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: - super().__init__(fg_wl, bg_wl) + def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: + super().__init__(fg_wl, bg_wl, skt_id) self._is_mem_isolated = False diff --git a/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py b/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py index a10ef8b..84d41ff 100644 --- a/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py +++ b/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py @@ -11,8 +11,8 @@ class GreedyDiffWViolationPolicy(GreedyDiffPolicy): VIOLATION_THRESHOLD = 3 - def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: - super().__init__(fg_wl, bg_wl) + def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: + super().__init__(fg_wl, bg_wl, skt_id) self._violation_count: int = 0 diff --git a/isolating_controller/utils/hyphen.py b/isolating_controller/utils/hyphen.py new file mode 100644 index 0000000..0ac117e --- /dev/null +++ b/isolating_controller/utils/hyphen.py @@ -0,0 +1,22 @@ +# coding: UTF-8 + +from typing import Iterable, Set + + +def convert_to_set(hyphen_str: str) -> Set[int]: + ret = set() + + for elem in hyphen_str.split(','): + group = tuple(map(int, elem.split('-'))) + + if len(group) is 1: + ret.add(group[0]) + elif len(group) is 2: + ret.update(range(group[0], group[1] + 1)) + + return ret + + +def convert_to_hyphen(core_ids: Iterable[int]) -> str: + # TODO + return ','.join(map(str, set(core_ids))) diff --git a/isolating_controller/utils/numa_topology.py b/isolating_controller/utils/numa_topology.py index 18c998f..cf78890 100644 --- a/isolating_controller/utils/numa_topology.py +++ b/isolating_controller/utils/numa_topology.py @@ -3,8 +3,6 @@ from pathlib import Path from typing import Dict, Set, Tuple -import aiofiles - from .hyphen import convert_to_set @@ -15,7 +13,7 @@ class NumaTopology: def get_node_topo() -> Set[int]: online_path: Path = NumaTopology.BASE_PATH / 'online' - with open(online_path) as fp: + with open(online_path, "r") as fp: line: str = fp.readline() node_list = convert_to_set(line) @@ -28,7 +26,7 @@ def get_cpu_topo(node_list: Set[int]) -> Dict[int, Set[int]]: for num in node_list: cpulist_path: Path = NumaTopology.BASE_PATH / f'node{num}/cpulist' - with open(cpulist_path) as fp: + with open(cpulist_path, "r") as fp: line: str = fp.readline() cpu_topo[num] = convert_to_set(line) @@ -38,7 +36,7 @@ def get_cpu_topo(node_list: Set[int]) -> Dict[int, Set[int]]: def get_mem_topo() -> Set[int]: has_memory_path = NumaTopology.BASE_PATH / 'has_memory' - with open(has_memory_path) as fp: + with open(has_memory_path, "r") as fp: line: str = fp.readline() mem_topo = convert_to_set(line) diff --git a/isolating_controller/workload.py b/isolating_controller/workload.py index 7f58087..f333a28 100644 --- a/isolating_controller/workload.py +++ b/isolating_controller/workload.py @@ -2,7 +2,7 @@ from collections import deque from itertools import chain -from typing import Deque, Tuple, Dict, Set +from typing import Deque, Tuple, Set import cpuinfo import psutil @@ -82,11 +82,15 @@ def all_child_tid(self) -> Tuple[int, ...]: except psutil.NoSuchProcess: return tuple() - def get_socket_id(self): - cpuset = self.cpuset - cpu_topo, _ = await NumaTopology.get_numa_info() + def get_socket_id(self) -> int: + cpuset: Set[int] = self.cpuset + cpu_topo, _ = NumaTopology.get_numa_info() + ret = None # FIXME: Hardcode for assumption (one workload to one socket) for socket_id, skt_cpus in cpu_topo.items(): - if cpuset in skt_cpus: - return socket_id + print(f'cpuset: {cpuset}, socket_id: {socket_id}, skt_cpus: {skt_cpus}') + for cpu_id in cpuset: + if cpu_id in skt_cpus: + ret = socket_id + return ret diff --git a/pending_queue.py b/pending_queue.py index 404381b..877d975 100644 --- a/pending_queue.py +++ b/pending_queue.py @@ -1,11 +1,13 @@ # coding: UTF-8 import logging -from typing import Dict, List, Sized, Tuple, Type +from threading import RLock + +from typing import Dict, List, Sized, Type from isolating_controller.isolation.policies import IsolationPolicy from isolating_controller.workload import Workload -from .isolating_controller.utils.numa_topology import NumaTopology +from isolating_controller.utils.numa_topology import NumaTopology class PendingQueue(Sized): def __init__(self, policy_type: Type[IsolationPolicy], max_pending: int) -> None: @@ -25,21 +27,22 @@ def __len__(self) -> int: def add_bg(self, workload: Workload) -> None: logger = logging.getLogger(__name__) logger.info(f'{workload} is ready for active as Background') + logger.info(f'self._cur_pending: {self._cur_pending}') - if self._cur_pending < self._max_pending: - self._bg_q[workload.pid] = workload - self._cur_pending += 1 - else: + self._bg_q[workload.pid] = workload + self._cur_pending += 1 + if self._cur_pending == self._max_pending: self.dump_to_pending_list() + def add_fg(self, workload: Workload) -> None: logger = logging.getLogger(__name__) logger.info(f'{workload} is ready for active as Foreground') + logger.info(f'self._cur_pending: {self._cur_pending}') - if self._cur_pending < self._max_pending: - self._fg_q[workload.pid] = workload - self._cur_pending += 1 - else: + self._fg_q[workload.pid] = workload + self._cur_pending += 1 + if self._cur_pending == self._max_pending: self.dump_to_pending_list() def pop(self) -> IsolationPolicy: @@ -48,6 +51,9 @@ def pop(self) -> IsolationPolicy: return self._pending_list.pop() def dump_to_pending_list(self) -> None: + logger = logging.getLogger(__name__) + logger.info('Dumping workloads to pending list!') + fg_pids = list(self._fg_q.keys()) bg_pids = list(self._bg_q.keys()) all_pids = list() @@ -69,19 +75,24 @@ def dump_to_pending_list(self) -> None: skt_id = self._bg_q[pid].get_socket_id() group_pids[skt_id].add(pid) + logger.info('Trying to create new groups!') + # # Grouping pids based on their types and skt_id for node in node_list: node_pidset = group_pids[node] pid = node_pidset.pop() + print(f'Pop {pid}!') if pid in fg_pids: bg_pid = node_pidset.pop() - new_group = self._policy_type(pid, bg_pid, node) + print(f'Pop {bg_pid}!') + new_group = self._policy_type(self._fg_q[pid], self._bg_q[bg_pid], node) self._pending_list.append(new_group) del self._fg_q[pid] del self._bg_q[bg_pid] elif pid in bg_pids: fg_pid = node_pidset.pop() - new_group = self._policy_type(fg_pid, pid, node) + print(f'Pop {fg_pid}!') + new_group = self._policy_type(self._fg_q[fg_pid], self._bg_q[pid], node) self._pending_list.append(new_group) del self._fg_q[fg_pid] del self._bg_q[pid] From 75f8bfb26e8199b896506be9e3ec5ff5d87be23d Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Wed, 19 Sep 2018 20:56:12 +0900 Subject: [PATCH 05/20] feat: Add SwapIsolator skeleton code --- .../isolation/isolators/swap.py | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 isolating_controller/isolation/isolators/swap.py diff --git a/isolating_controller/isolation/isolators/swap.py b/isolating_controller/isolation/isolators/swap.py new file mode 100644 index 0000000..3be9aca --- /dev/null +++ b/isolating_controller/isolation/isolators/swap.py @@ -0,0 +1,88 @@ +# coding: UTF-8 + +import logging + +from typing import Dict, Set + +from .base_isolator import Isolator +from .. import NextStep +from ...workload import Workload +from ..policies import IsolationPolicy + +class SwapIsolator(Isolator): + _THRESHOLD = 0.005 + + def __init__(self, foreground_wl: Workload, background_wl: Workload, + isolation_groups: Dict[IsolationPolicy, int]) -> None: + super().__init__(foreground_wl, background_wl) + + self._all_groups = isolation_groups + self._swap_candidates: Set[Workload] = None + + def __del__(self): + logger = logging.getLogger(__name__) + if self._foreground_wl.is_running: + logger.debug(f'reset swap configuration of {self._foreground_wl}') + + if self._background_wl.is_running: + logger.debug(f'reset swap configuration of {self._background_wl}') + + + def strengthen(self) -> 'Isolator': + """ + Choosing which contentious workloads to swap out to other socket + :return: + """ + # FIXME: hard coded (two sockets) + ## Estimating the socket contention + ## + + return + + @property + def is_max_level(self) -> bool: + """ + Searching configuration space to the max level + e.g., There is no searchable candidate to strengthen the degree of isolation + :return: + """ + # FIXME: + + return False + + @property + def is_min_level(self) -> bool: + """ + Searching configuration space to the min level + e.g., There is no searchable candidate to weaken the degree of isolation + :return: + """ + # FIXME: + + return False + + def weaken(self) -> 'Isolator': + """ + Choosing which contentious workloads to swap in from other socket + :return: + """ + # FIXME: hard coded (two sockets) + pass + + def _enforce(self) -> None: + """ + Enforcing the pre-configured swap isolation + :return: + """ + pass + +# def enforce(self) -> None: +# self._prev_metric_diff: MetricDiff = self._foreground_wl.calc_metric_diff() +# +# self._enforce() + + def _first_decision(self) -> NextStep: + pass + + def _monitoring_result(self) -> NextStep: + pass From a09f1d4f58e121a9a9ffd1b843fe4473a8f3cbfa Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Thu, 20 Sep 2018 21:35:30 +0900 Subject: [PATCH 06/20] feat: Add cgroup cycle throttle and create cgroup.py based on cgroup_cpuset.py --- isolating_controller/utils/cgroup.py | 61 ++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 isolating_controller/utils/cgroup.py diff --git a/isolating_controller/utils/cgroup.py b/isolating_controller/utils/cgroup.py new file mode 100644 index 0000000..2f690fd --- /dev/null +++ b/isolating_controller/utils/cgroup.py @@ -0,0 +1,61 @@ +# coding: UTF-8 + + +import subprocess +import getpass +import grp +import os + +from typing import Iterable, Set, Optional +from .hyphen import convert_to_set + + +class Cgroup: + CPUSET_MOUNT_POINT = '/sys/fs/cgroup/cpuset' + CPU_MOUNT_POINT = '/sys/fs/cgroup/cpu' + + def __init__(self, group_name: str, controllers: str) -> None: + self._group_name: str = group_name + self._controllers: str = controllers + self._group_path: str = f'{controllers}:{group_name}' + + def create_group(self) -> None: + uname: str = getpass.getuser() + gid: int = os.getegid() + gname: str = grp.getgrgid(gid).gr_name + + subprocess.check_call(args=( + 'sudo', 'cgcreate', '-a', f'{uname}:{gname}', '-d', '700', '-f', + '600', '-t', f'{uname}:{gname}', '-s', '600', '-g', self._group_path)) + + def assign_cpus(self, core_set: Set[int]) -> None: + core_ids = ','.join(map(str, core_set)) + subprocess.check_call(args=('cgset', '-r', f'cpuset.cpus={core_ids}', self._group_name)) + + def assign_mems(self, socket_set: Set[int]) -> None: + mem_ids = ','.join(map(str, socket_set)) + subprocess.check_call(args=('cgset', '-r', f'cpuset.mems={mem_ids}', self._group_name)) + + def _get_cpu_affinity_from_group(self) -> Set[int]: + with open(f'{Cgroup.CPUSET_MOUNT_POINT}/{self._group_name}/cpuset.cpus', "r") as fp: + line: str = fp.readline() + core_set: Set[int] = convert_to_set(line) + return core_set + + def limit_cpu_quota(self, limit_percentage: float, period: Optional[int]=None) -> None: + if period is None: + with open(f'{Cgroup.CPU_MOUNT_POINT}/cpu.cfs_period_us', "r") as fp: + line: str = fp.readline() + period = int(line) + + cpu_cores = self._get_cpu_affinity_from_group() + quota = int(period * limit_percentage/100 * len(cpu_cores)) + subprocess.check_call(args=('cgset', '-r', f'cpu.cfs_quota_us={quota}', self._group_name)) + + subprocess.check_call(args=('cgset', '-r', f'cpu.cfs_period_us={period}', self._group_name)) + + def add_tasks(self, pids: Iterable[int]) -> None: + subprocess.check_call(args=('cgclassify', '-g', self._group_path, '--sticky', *map(str, pids))) + + def delete(self) -> None: + subprocess.check_call(args=('sudo', 'cgdelete', '-r', '-g', self._group_path)) From 063e25b02cc9c193f38b4c60e7fecdc2cdb8d1fb Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Thu, 20 Sep 2018 21:36:50 +0900 Subject: [PATCH 07/20] feat: Fix SchedIsolator (strengthen, weaken, properties) --- .../isolation/isolators/schedule.py | 89 ++++++++++++++----- isolating_controller/utils/__init__.py | 2 + isolating_controller/workload.py | 8 +- 3 files changed, 77 insertions(+), 22 deletions(-) diff --git a/isolating_controller/isolation/isolators/schedule.py b/isolating_controller/isolation/isolators/schedule.py index 78ae378..8fbe218 100644 --- a/isolating_controller/isolation/isolators/schedule.py +++ b/isolating_controller/isolation/isolators/schedule.py @@ -2,11 +2,15 @@ import logging +from typing import Tuple, Set, Dict + from .base_isolator import Isolator from .. import NextStep -from ...utils import CgroupCpuset +#from ...utils import CgroupCpuset from ...workload import Workload - +from ...utils import Cgroup +from ...utils import NumaTopology +from ...utils import hyphen class SchedIsolator(Isolator): _DOD_THRESHOLD = 0.005 @@ -15,46 +19,87 @@ class SchedIsolator(Isolator): def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: super().__init__(foreground_wl, background_wl) - # FIXME: hard coded - self._cur_step = 24 + self._fg_cpuset: Tuple[int] = foreground_wl.cpuset + self._bg_cpuset: Tuple[int] = background_wl.cpuset + self._cur_bg_step: int = min(self._bg_cpuset) + self._cur_fg_step: int = max(self._fg_cpuset) - self._bg_grp_name = f'{background_wl.name}_{background_wl.pid}' - self._prev_bg_affinity = background_wl.cpuset + self._fg_next_step = NextStep.IDLE + self._bg_next_step = NextStep.IDLE - CgroupCpuset.create_group(self._bg_grp_name) - CgroupCpuset.add_task(self._bg_grp_name, background_wl.pid) - # FIXME: hard coded - CgroupCpuset.assign(self._bg_grp_name, set(range(self._cur_step, 32))) + self._bg_grp_name: str = f'{background_wl.name}_{background_wl.pid}' + self._prev_bg_affinity: Tuple[int] = background_wl.cpuset + self._cgroup = Cgroup(self._bg_grp_name, 'cpuset,cpu') + + cpu_topo, mem_topo = NumaTopology.get_numa_info() + self._cpu_topo: Dict[int, Set[int]] = cpu_topo + self._mem_topo: Set[int] = mem_topo def __del__(self) -> None: if self._background_wl.is_running: - CgroupCpuset.assign(self._bg_grp_name, set(self._prev_bg_affinity)) + self._cgroup.assign_cpus(set(self._prev_bg_affinity)) def strengthen(self) -> 'SchedIsolator': - self._cur_step += 1 + """ + Strengthen reduces the number of CPUs assigned to BG workloads and increase that of FG workload + TODO: Changing Step Size if needed + :return: + """ + # NOTE: Caller is assumed that BG workload + if self._bg_next_step == NextStep.STRENGTHEN: + self._cur_bg_step += 1 + bg_cpuset = set(self._bg_cpuset) + bg_cpuset.remove(self._cur_bg_step) + self._bg_cpuset = tuple(bg_cpuset) + if self._fg_next_step == NextStep.WEAKEN: + self._cur_fg_step += 1 + fg_cpuset = set(self._fg_cpuset) + fg_cpuset.add(self._cur_fg_step) + self._fg_cpuset = tuple(fg_cpuset) return self def weaken(self) -> 'SchedIsolator': - self._cur_step -= 1 + """ + Weaken increase the number of CPUs assigned to BG workloads and decrease that of FG workload + TODO: Changing Step Size if needed + :return: + """ + # NOTE: Caller is assumed that BG workload + if self._bg_next_step == NextStep.WEAKEN: + self._cur_bg_step -= 1 + bg_cpuset = set(self._bg_cpuset) + bg_cpuset.add(self._cur_bg_step) + self._bg_cpuset = tuple(bg_cpuset) + if self._fg_next_step == NextStep.STRENGTHEN: + self._cur_fg_step -= 1 + fg_cpuset = set(self._fg_cpuset) + fg_cpuset.remove(self._cur_fg_step) + self._fg_cpuset = tuple(fg_cpuset) return self @property def is_max_level(self) -> bool: - # FIXME: hard coded - return self._cur_step == 31 + # FIXME: How about first condition is true but the other is false? + if self._bg_next_step == NextStep.STRENGTHEN: + return self._cur_bg_step == max(self._cpu_topo[self._background_wl.socket_id]) + if self._fg_next_step == NextStep.WEAKEN: + return self._cur_fg_step == self._cur_bg_step-1 @property def is_min_level(self) -> bool: - # FIXME: hard coded - return self._cur_step == 24 + # FIXME: How about first condition is true but the other is false? + if self._bg_next_step == NextStep.WEAKEN: + return self._cur_bg_step == self._cur_fg_step+1 + if self._fg_next_step == NextStep.STRENGTHEN: + return self._cur_fg_step == min(self._cpu_topo[self._foreground_wl.socket_id]) def _enforce(self) -> None: logger = logging.getLogger(__name__) - # FIXME: hard coded - logger.info(f'affinity of background is {self._cur_step}-31') + logger.info(f'affinity of background is {hyphen.convert_to_hyphen(self._bg_cpuset)}') - # FIXME: hard coded - CgroupCpuset.assign(self._bg_grp_name, set(range(self._cur_step, 32))) + # FIXME: Only changing the number of CPUs of BG process + self._cgroup.assign_cpus(set(self._bg_cpuset)) + self._cgroup.assign_cpus(set(self._fg_cpuset)) def _first_decision(self) -> NextStep: metric_diff = self._foreground_wl.calc_metric_diff() @@ -94,7 +139,9 @@ def _monitoring_result(self) -> NextStep: return NextStep.STOP elif curr_diff > 0: + self._bg_next_step = NextStep.WEAKEN return NextStep.WEAKEN else: + self._bg_next_step = NextStep.STRENGTHEN return NextStep.STRENGTHEN diff --git a/isolating_controller/utils/__init__.py b/isolating_controller/utils/__init__.py index 37479ca..ce410ef 100644 --- a/isolating_controller/utils/__init__.py +++ b/isolating_controller/utils/__init__.py @@ -2,4 +2,6 @@ from .cat import CAT from .cgroup_cpuset import CgroupCpuset +from .cgroup import Cgroup from .dvfs import DVFS +from .numa_topology import NumaTopology diff --git a/isolating_controller/workload.py b/isolating_controller/workload.py index f333a28..46f614c 100644 --- a/isolating_controller/workload.py +++ b/isolating_controller/workload.py @@ -31,6 +31,7 @@ def __init__(self, name: str, wl_type: str, pid: int, perf_pid: int, perf_interv self._perf_interval = perf_interval self._proc_info = psutil.Process(pid) + self._socket_id = None def __repr__(self) -> str: return f'{self._name} (pid: {self._pid})' @@ -51,6 +52,11 @@ def wl_type(self) -> str: def metrics(self) -> Deque[BasicMetric]: return self._metrics + @property + def socket_id(self) -> int: + self._socket_id = self.get_socket_id() + return self._socket_id + @property def cpuset(self) -> Tuple[int, ...]: return tuple(self._proc_info.cpu_affinity()) @@ -85,7 +91,6 @@ def all_child_tid(self) -> Tuple[int, ...]: def get_socket_id(self) -> int: cpuset: Set[int] = self.cpuset cpu_topo, _ = NumaTopology.get_numa_info() - ret = None # FIXME: Hardcode for assumption (one workload to one socket) for socket_id, skt_cpus in cpu_topo.items(): @@ -93,4 +98,5 @@ def get_socket_id(self) -> int: for cpu_id in cpuset: if cpu_id in skt_cpus: ret = socket_id + self._socket_id = ret return ret From 9bef9471d4025d32c6fc1386d12b5f1a8e9f0277 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Fri, 21 Sep 2018 11:00:18 +0900 Subject: [PATCH 08/20] fix: Rename to CoreIsolator and add fg_next_step code --- .../isolation/isolators/__init__.py | 2 +- .../isolators/{schedule.py => core.py} | 45 ++++++++++++++----- .../isolation/policies/base_policy.py | 4 +- .../isolation/policies/diff_policy.py | 4 +- .../policies/diff_with_violation_policy.py | 4 +- .../isolation/policies/greedy_diff_policy.py | 4 +- .../greedy_diff_with_violation_policy.py | 4 +- 7 files changed, 44 insertions(+), 23 deletions(-) rename isolating_controller/isolation/isolators/{schedule.py => core.py} (74%) diff --git a/isolating_controller/isolation/isolators/__init__.py b/isolating_controller/isolation/isolators/__init__.py index b0084b8..634a419 100644 --- a/isolating_controller/isolation/isolators/__init__.py +++ b/isolating_controller/isolation/isolators/__init__.py @@ -5,4 +5,4 @@ from .cache import CacheIsolator from .idle import IdleIsolator from .memory import MemoryIsolator -from .schedule import SchedIsolator +from .core import CoreIsolator diff --git a/isolating_controller/isolation/isolators/schedule.py b/isolating_controller/isolation/isolators/core.py similarity index 74% rename from isolating_controller/isolation/isolators/schedule.py rename to isolating_controller/isolation/isolators/core.py index 8fbe218..034a04a 100644 --- a/isolating_controller/isolation/isolators/schedule.py +++ b/isolating_controller/isolation/isolators/core.py @@ -6,13 +6,12 @@ from .base_isolator import Isolator from .. import NextStep -#from ...utils import CgroupCpuset from ...workload import Workload from ...utils import Cgroup from ...utils import NumaTopology from ...utils import hyphen -class SchedIsolator(Isolator): +class CoreIsolator(Isolator): _DOD_THRESHOLD = 0.005 _FORCE_THRESHOLD = 0.1 @@ -39,10 +38,10 @@ def __del__(self) -> None: if self._background_wl.is_running: self._cgroup.assign_cpus(set(self._prev_bg_affinity)) - def strengthen(self) -> 'SchedIsolator': + def strengthen(self) -> 'CoreIsolator': """ Strengthen reduces the number of CPUs assigned to BG workloads and increase that of FG workload - TODO: Changing Step Size if needed + TODO: Changing step size, if needed :return: """ # NOTE: Caller is assumed that BG workload @@ -58,10 +57,10 @@ def strengthen(self) -> 'SchedIsolator': self._fg_cpuset = tuple(fg_cpuset) return self - def weaken(self) -> 'SchedIsolator': + def weaken(self) -> 'CoreIsolator': """ Weaken increase the number of CPUs assigned to BG workloads and decrease that of FG workload - TODO: Changing Step Size if needed + TODO: Changing step size, if needed :return: """ # NOTE: Caller is assumed that BG workload @@ -96,8 +95,8 @@ def is_min_level(self) -> bool: def _enforce(self) -> None: logger = logging.getLogger(__name__) logger.info(f'affinity of background is {hyphen.convert_to_hyphen(self._bg_cpuset)}') + logger.info(f'affinity of foreground is {hyphen.convert_to_hyphen(self._fg_cpuset)}') - # FIXME: Only changing the number of CPUs of BG process self._cgroup.assign_cpus(set(self._bg_cpuset)) self._cgroup.assign_cpus(set(self._fg_cpuset)) @@ -108,17 +107,29 @@ def _first_decision(self) -> NextStep: logger = logging.getLogger(__name__) logger.debug(f'current diff: {curr_diff:>7.4f}') + ## FIXME: Specifying fg's strengthen/weaken condition (related to fg's performance) + fg_strengthen_cond = None + fg_weaken_cond = None if curr_diff < 0: if self.is_max_level: + self._bg_next_step = NextStep.STOP return NextStep.STOP else: + self._bg_next_step = NextStep.STRENGTHEN + if fg_weaken_cond: + self._fg_next_step = NextStep.WEAKEN return NextStep.STRENGTHEN - elif curr_diff <= SchedIsolator._FORCE_THRESHOLD: + elif curr_diff <= CoreIsolator._FORCE_THRESHOLD: + self._bg_next_step = NextStep.STOP return NextStep.STOP else: if self.is_min_level: + self._bg_next_step = NextStep.STOP return NextStep.STOP else: + self._bg_next_step = NextStep.WEAKEN + if fg_strengthen_cond: + self._fg_next_step = NextStep.STRENGTHEN return NextStep.WEAKEN def _monitoring_result(self) -> NextStep: @@ -132,16 +143,26 @@ def _monitoring_result(self) -> NextStep: logger.debug(f'diff of diff is {diff_of_diff:>7.4f}') logger.debug(f'current diff: {curr_diff:>7.4f}, previous diff: {prev_diff:>7.4f}') - # FIXME: hard coded - if not (24 < self._cur_step < 31) \ - or abs(diff_of_diff) <= SchedIsolator._DOD_THRESHOLD \ - or abs(curr_diff) <= SchedIsolator._DOD_THRESHOLD: + # FIXME: Specifying fg's strengthen/weaken condition (related to fg's performance) + fg_strengthen_cond = None + fg_weaken_cond = None + max_bg_cpuid = max(self._cpu_topo[self._background_wl.socket_id]) + min_bg_cpuid = min(self._cpu_topo[self._background_wl.socket_id]) + if not (min_bg_cpuid < self._cur_bg_step < max_bg_cpuid) \ + or abs(diff_of_diff) <= CoreIsolator._DOD_THRESHOLD \ + or abs(curr_diff) <= CoreIsolator._DOD_THRESHOLD: + self._bg_next_step = NextStep.STOP + self._fg_next_step = NextStep.STOP return NextStep.STOP elif curr_diff > 0: self._bg_next_step = NextStep.WEAKEN + if fg_strengthen_cond: + self._fg_next_step = NextStep.STRENGTHEN return NextStep.WEAKEN else: self._bg_next_step = NextStep.STRENGTHEN + if fg_weaken_cond: + self._fg_next_step = NextStep.WEAKEN return NextStep.STRENGTHEN diff --git a/isolating_controller/isolation/policies/base_policy.py b/isolating_controller/isolation/policies/base_policy.py index 42336ee..75793dd 100644 --- a/isolating_controller/isolation/policies/base_policy.py +++ b/isolating_controller/isolation/policies/base_policy.py @@ -5,7 +5,7 @@ from typing import Mapping, Type from isolating_controller.metric_container.basic_metric import MetricDiff -from ..isolators import CacheIsolator, IdleIsolator, Isolator, MemoryIsolator, SchedIsolator +from ..isolators import CacheIsolator, IdleIsolator, Isolator, MemoryIsolator, CoreIsolator from ...workload import Workload @@ -35,7 +35,7 @@ def init_isolators(self) -> None: self._isolator_map = dict(( (CacheIsolator, CacheIsolator(self._fg_wl, self._bg_wl)), (MemoryIsolator, MemoryIsolator(self._fg_wl, self._bg_wl)), - (SchedIsolator, SchedIsolator(self._fg_wl, self._bg_wl)) + (CoreIsolator, CoreIsolator(self._fg_wl, self._bg_wl)) )) @property diff --git a/isolating_controller/isolation/policies/diff_policy.py b/isolating_controller/isolation/policies/diff_policy.py index 5757a48..f36c22e 100644 --- a/isolating_controller/isolation/policies/diff_policy.py +++ b/isolating_controller/isolation/policies/diff_policy.py @@ -3,7 +3,7 @@ import logging from .base_policy import IsolationPolicy, ResourceType -from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator +from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload @@ -47,7 +47,7 @@ def choose_next_isolator(self) -> bool: return True elif not self._is_sched_isolated and resource is ResourceType.MEMORY: - self._cur_isolator = self._isolator_map[SchedIsolator] + self._cur_isolator = self._isolator_map[CoreIsolator] self._is_sched_isolated = True logger.info(f'Cpuset Isolation for {self._fg_wl} is started') return True diff --git a/isolating_controller/isolation/policies/diff_with_violation_policy.py b/isolating_controller/isolation/policies/diff_with_violation_policy.py index db58386..8df9003 100644 --- a/isolating_controller/isolation/policies/diff_with_violation_policy.py +++ b/isolating_controller/isolation/policies/diff_with_violation_policy.py @@ -4,7 +4,7 @@ from .base_policy import ResourceType from .diff_policy import DiffPolicy -from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator +from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload @@ -22,7 +22,7 @@ def _check_violation(self) -> bool: return \ resource is ResourceType.CACHE and not isinstance(self._cur_isolator, CacheIsolator) \ or resource is ResourceType.MEMORY and (not isinstance(self._cur_isolator, MemoryIsolator) - and not isinstance(self._cur_isolator, SchedIsolator)) + and not isinstance(self._cur_isolator, CoreIsolator)) @property def new_isolator_needed(self) -> bool: diff --git a/isolating_controller/isolation/policies/greedy_diff_policy.py b/isolating_controller/isolation/policies/greedy_diff_policy.py index d2b0fc9..a0a4d43 100644 --- a/isolating_controller/isolation/policies/greedy_diff_policy.py +++ b/isolating_controller/isolation/policies/greedy_diff_policy.py @@ -3,7 +3,7 @@ import logging from .base_policy import IsolationPolicy, ResourceType -from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator +from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload @@ -35,7 +35,7 @@ def choose_next_isolator(self) -> bool: return True elif resource is ResourceType.MEMORY: - self._cur_isolator = self._isolator_map[SchedIsolator] + self._cur_isolator = self._isolator_map[CoreIsolator] self._is_mem_isolated = False logger.info(f'Cpuset Isolation for {self._fg_wl} is started') return True diff --git a/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py b/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py index 84d41ff..980d178 100644 --- a/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py +++ b/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py @@ -4,7 +4,7 @@ from .base_policy import ResourceType from .greedy_diff_policy import GreedyDiffPolicy -from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator +from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload @@ -22,7 +22,7 @@ def _check_violation(self) -> bool: return \ resource is ResourceType.CACHE and not isinstance(self._cur_isolator, CacheIsolator) \ or resource is ResourceType.MEMORY and (not isinstance(self._cur_isolator, MemoryIsolator) - and not isinstance(self._cur_isolator, SchedIsolator)) + and not isinstance(self._cur_isolator, CoreIsolator)) @property def new_isolator_needed(self) -> bool: From 46029a90bb181ed9993d58ca154def4ab5b87a57 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Fri, 21 Sep 2018 17:41:22 +0900 Subject: [PATCH 09/20] fix: Fix hard coded parts and add resctrl.py to utils --- .../isolation/isolators/cache.py | 72 +++++++++++-------- .../isolation/isolators/core.py | 15 ++-- .../isolation/policies/base_policy.py | 1 - isolating_controller/utils/__init__.py | 1 + isolating_controller/utils/resctrl.py | 67 +++++++++++++++++ 5 files changed, 123 insertions(+), 33 deletions(-) create mode 100644 isolating_controller/utils/resctrl.py diff --git a/isolating_controller/isolation/isolators/cache.py b/isolating_controller/isolation/isolators/cache.py index 5def5d2..e7f2002 100644 --- a/isolating_controller/isolation/isolators/cache.py +++ b/isolating_controller/isolation/isolators/cache.py @@ -1,11 +1,12 @@ # coding: UTF-8 import logging -from typing import Optional +from typing import Optional, Dict, Set from .base_isolator import Isolator from .. import NextStep -from ...utils import CAT +from ...utils import ResCtrl +from ...utils import NumaTopology from ...workload import Workload @@ -20,33 +21,32 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: self._cur_step: Optional[int] = None self._fg_grp_name = f'{foreground_wl.name}_{foreground_wl.pid}' - CAT.create_group(self._fg_grp_name) - for tid in foreground_wl.all_child_tid(): - CAT.add_task(self._fg_grp_name, tid) - self._bg_grp_name = f'{background_wl.name}_{background_wl.pid}' - CAT.create_group(self._bg_grp_name) - for tid in background_wl.all_child_tid(): - CAT.add_task(self._bg_grp_name, tid) + + self._fg_resctrl = ResCtrl(self._fg_grp_name) + self._bg_resctrl = ResCtrl(self._bg_grp_name) def __del__(self) -> None: logger = logging.getLogger(__name__) + max_bits = ResCtrl.MAX_BITS + max_mask = ResCtrl.gen_mask(0, max_bits) + if self._foreground_wl.is_running: logger.debug(f'reset resctrl configuration of {self._foreground_wl}') - # FIXME: hard coded - CAT.assign(self._fg_grp_name, '1', CAT.gen_mask(0, CAT.MAX)) + # FIXME: The number of socket is two at most + ResCtrl.assign_llc(self._fg_resctrl, max_mask, max_mask) if self._background_wl.is_running: logger.debug(f'reset resctrl configuration of {self._background_wl}') - # FIXME: hard coded - CAT.assign(self._bg_grp_name, '1', CAT.gen_mask(0, CAT.MAX)) + # FIXME: The number of socket is two at most + ResCtrl.assign_llc(self._bg_resctrl, max_mask, max_mask) def strengthen(self) -> 'CacheIsolator': self._prev_step = self._cur_step if self._cur_step is None: - self._cur_step = CAT.MAX // 2 + self._cur_step = ResCtrl.MAX_BITS // 2 else: self._cur_step += 1 @@ -66,34 +66,50 @@ def weaken(self) -> 'CacheIsolator': @property def is_max_level(self) -> bool: # FIXME: hard coded - return self._cur_step is not None and self._cur_step + CAT.STEP >= CAT.MAX + return self._cur_step is not None and self._cur_step + ResCtrl.STEP >= ResCtrl.MAX_BITS @property def is_min_level(self) -> bool: # FIXME: hard coded - return self._cur_step is None or self._cur_step - CAT.STEP <= CAT.MIN + return self._cur_step is None or self._cur_step - ResCtrl.STEP <= ResCtrl.MIN_BITS def _enforce(self) -> None: logger = logging.getLogger(__name__) + bg_socket_id = self._background_wl.socket_id + fg_socket_id = self._foreground_wl.socket_id + if self._cur_step is None: logger.info('CAT off') - # FIXME: hard coded - mask = CAT.gen_mask(0, CAT.MAX) - CAT.assign(self._fg_grp_name, '1', mask) - CAT.assign(self._bg_grp_name, '1', mask) + # FIXME: The number of socket is two at most + mask = ResCtrl.gen_mask(0, ResCtrl.MAX_BITS) + if bg_socket_id == 0: + ResCtrl.assign_llc(self._bg_resctrl, mask, '1') + if bg_socket_id == 1: + ResCtrl.assign_llc(self._bg_resctrl, '1', mask) + if fg_socket_id == 0: + ResCtrl.assign_llc(self._fg_resctrl, mask, '1') + if fg_socket_id == 1: + ResCtrl.assign_llc(self._fg_resctrl, '1', mask) else: - logger.info(f'foreground : background = {self._cur_step} : {CAT.MAX - self._cur_step}') + logger.info(f'foreground : background = {self._cur_step} : {ResCtrl.MAX_BITS - self._cur_step}') + + # FIXME: The number of socket is two at most + fg_mask = ResCtrl.gen_mask(0, self._cur_step) + if fg_socket_id == 0: + ResCtrl.assign_llc(self._fg_resctrl, fg_mask, '1') + if fg_socket_id == 1: + ResCtrl.assign_llc(self._fg_resctrl, '1', fg_mask) - # FIXME: hard coded - fg_mask = CAT.gen_mask(0, self._cur_step) - CAT.assign(self._fg_grp_name, '1', fg_mask) + # FIXME: The number of socket is two at most + bg_mask = ResCtrl.gen_mask(self._cur_step) + if bg_socket_id == 0: + ResCtrl.assign_llc(self._bg_resctrl, bg_mask, '1') + if bg_socket_id == 1: + ResCtrl.assign_llc(self._bg_resctrl, '1', bg_mask) - # FIXME: hard coded - bg_mask = CAT.gen_mask(self._cur_step) - CAT.assign(self._bg_grp_name, '1', bg_mask) def _first_decision(self) -> NextStep: metric_diff = self._foreground_wl.calc_metric_diff() @@ -128,7 +144,7 @@ def _monitoring_result(self) -> NextStep: logger.debug(f'current diff: {curr_diff:>7.4f}, previous diff: {prev_diff:>7.4f}') if self._cur_step is not None \ - and not (CAT.MIN < self._cur_step < CAT.MAX) \ + and not (ResCtrl.MIN_BITS < self._cur_step < ResCtrl.MAX_BITS) \ or abs(diff_of_diff) <= CacheIsolator._DOD_THRESHOLD \ or abs(curr_diff) <= CacheIsolator._DOD_THRESHOLD: return NextStep.STOP diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py index 034a04a..add83ad 100644 --- a/isolating_controller/isolation/isolators/core.py +++ b/isolating_controller/isolation/isolators/core.py @@ -26,9 +26,14 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: self._fg_next_step = NextStep.IDLE self._bg_next_step = NextStep.IDLE + self._fg_grp_name: str = f'{foreground_wl.name}_{foreground_wl.pid}' self._bg_grp_name: str = f'{background_wl.name}_{background_wl.pid}' + + self._prev_fg_affinity: Tuple[int] = foreground_wl.cpuset self._prev_bg_affinity: Tuple[int] = background_wl.cpuset - self._cgroup = Cgroup(self._bg_grp_name, 'cpuset,cpu') + + self._fg_cgroup = Cgroup(self._fg_grp_name, 'cpuset,cpu') + self._bg_cgroup = Cgroup(self._bg_grp_name, 'cpuset,cpu') cpu_topo, mem_topo = NumaTopology.get_numa_info() self._cpu_topo: Dict[int, Set[int]] = cpu_topo @@ -36,7 +41,9 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: def __del__(self) -> None: if self._background_wl.is_running: - self._cgroup.assign_cpus(set(self._prev_bg_affinity)) + self._bg_cgroup.assign_cpus(set(self._prev_bg_affinity)) + if self._foreground_wl.is_running: + self._fg_cgroup.assign_cpus(set(self._prev_fg_affinity)) def strengthen(self) -> 'CoreIsolator': """ @@ -97,8 +104,8 @@ def _enforce(self) -> None: logger.info(f'affinity of background is {hyphen.convert_to_hyphen(self._bg_cpuset)}') logger.info(f'affinity of foreground is {hyphen.convert_to_hyphen(self._fg_cpuset)}') - self._cgroup.assign_cpus(set(self._bg_cpuset)) - self._cgroup.assign_cpus(set(self._fg_cpuset)) + self._bg_cgroup.assign_cpus(set(self._bg_cpuset)) + self._fg_cgroup.assign_cpus(set(self._fg_cpuset)) def _first_decision(self) -> NextStep: metric_diff = self._foreground_wl.calc_metric_diff() diff --git a/isolating_controller/isolation/policies/base_policy.py b/isolating_controller/isolation/policies/base_policy.py index 75793dd..10644af 100644 --- a/isolating_controller/isolation/policies/base_policy.py +++ b/isolating_controller/isolation/policies/base_policy.py @@ -13,7 +13,6 @@ class ResourceType(IntEnum): CACHE = 0 MEMORY = 1 - class IsolationPolicy(metaclass=ABCMeta): _IDLE_ISOLATOR: IdleIsolator = IdleIsolator() diff --git a/isolating_controller/utils/__init__.py b/isolating_controller/utils/__init__.py index ce410ef..fad398f 100644 --- a/isolating_controller/utils/__init__.py +++ b/isolating_controller/utils/__init__.py @@ -1,6 +1,7 @@ # coding: UTF-8 from .cat import CAT +from .resctrl import ResCtrl from .cgroup_cpuset import CgroupCpuset from .cgroup import Cgroup from .dvfs import DVFS diff --git a/isolating_controller/utils/resctrl.py b/isolating_controller/utils/resctrl.py new file mode 100644 index 0000000..d7f13ed --- /dev/null +++ b/isolating_controller/utils/resctrl.py @@ -0,0 +1,67 @@ +# coding: UTF-8 + +import subprocess +import asyncio +from pathlib import Path +from typing import Dict, Iterable, List, Tuple + +#import aiofiles +#from aiofiles.base import AiofilesContextManager + + +def len_of_mask(mask: str) -> int: + cnt = 0 + num = int(mask, 16) + while num is not 0: + cnt += 1 + num >>= 1 + return cnt + + +def bits_to_mask(bits: int) -> str: + return f'{bits:x}' + + +class ResCtrl: + MOUNT_POINT: Path = Path('/sys/fs/resctrl') + MAX_MASK: str = Path('/sys/fs/resctrl/info/L3/cbm_mask').read_text(encoding='ASCII').strip() + MAX_BITS: int = len_of_mask((MOUNT_POINT / 'info' / 'L3' / 'cbm_mask').read_text()) + MIN_BITS: int = int((MOUNT_POINT / 'info' / 'L3' / 'min_cbm_bits').read_text()) + MIN_MASK: str = bits_to_mask(MIN_BITS) + STEP = 1 + + def __init__(self, group_name: str) -> None: + self._group_name: str = group_name + self._group_path: Path = ResCtrl.MOUNT_POINT/f'{group_name}' + + @property + def group_name(self): + return self._group_name + + @group_name.setter + def group_name(self, new_name): + self._group_name = new_name + self._group_path: Path = ResCtrl.MOUNT_POINT / new_name + + def add_task(self, pid: int) -> None: + subprocess.run(args=('sudo', 'tee', str(self._group_path / 'tasks')), + input=f'{pid}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) + + def assign_llc(self, *masks: str) -> None: + masks = (f'{i}={mask}' for i, mask in enumerate(masks)) + mask = ';'.join(masks) + subprocess.run(args=('sudo', 'tee', str(ResCtrl.MOUNT_POINT / self._group_name / 'schemata')), + input=f'L3:{mask}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) + + @staticmethod + def gen_mask(start: int, end: int = None) -> str: + if end is None or end > ResCtrl.MAX_BITS: + end = ResCtrl.MAX_BITS + + if start < 0: + raise ValueError('start must be greater than 0') + + return format(((1 << (end - start)) - 1) << (ResCtrl.MAX_BITS - end), 'x') + + def remove_group(self) -> None: + subprocess.check_call(args=('sudo', 'rmdir', str(ResCtrl.MOUNT_POINT / self._group_name))) From 40b29ebf976b7f9d42dd3e5898e1c3892020bbbb Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Fri, 21 Sep 2018 17:42:05 +0900 Subject: [PATCH 10/20] feat: Add Swap related code --- controller.py | 2 ++ .../isolation/isolators/swap.py | 31 +++++++++++++------ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/controller.py b/controller.py index ca39610..78528e6 100755 --- a/controller.py +++ b/controller.py @@ -143,6 +143,8 @@ def __init__(self, pending_queue: PendingQueue) -> None: def _isolate_workloads(self) -> None: logger = logging.getLogger(__name__) + ##TODO: Swapper may come here + for group, iteration_num in self._isolation_groups.items(): logger.info('') logger.info(f'***************isolation of {group.name} #{iteration_num}***************') diff --git a/isolating_controller/isolation/isolators/swap.py b/isolating_controller/isolation/isolators/swap.py index 3be9aca..2bcdbcd 100644 --- a/isolating_controller/isolation/isolators/swap.py +++ b/isolating_controller/isolation/isolators/swap.py @@ -18,6 +18,8 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload, self._all_groups = isolation_groups self._swap_candidates: Set[Workload] = None + self._most_contentious_group = None + self._most_contentious_workload = None def __del__(self): logger = logging.getLogger(__name__) @@ -28,16 +30,16 @@ def __del__(self): logger.debug(f'reset swap configuration of {self._background_wl}') - def strengthen(self) -> 'Isolator': + def strengthen(self) -> 'SwapIsolator': """ Choosing which contentious workloads to swap out to other socket :return: """ # FIXME: hard coded (two sockets) - ## Estimating the socket contention - ## + ## 1.Estimating and selecting the most contentious workloads from the socket of cur_group + ## 2. - return + return self @property def is_max_level(self) -> bool: @@ -46,9 +48,9 @@ def is_max_level(self) -> bool: e.g., There is no searchable candidate to strengthen the degree of isolation :return: """ - # FIXME: + # FIXME: hard coded + return self._swap_candidates == None - return False @property def is_min_level(self) -> bool: @@ -57,17 +59,18 @@ def is_min_level(self) -> bool: e.g., There is no searchable candidate to weaken the degree of isolation :return: """ - # FIXME: + # FIXME: hard coded + return self._swap_candidates == None - return False - def weaken(self) -> 'Isolator': + def weaken(self) -> 'SwapIsolator': """ Choosing which contentious workloads to swap in from other socket :return: """ # FIXME: hard coded (two sockets) - pass + ## 1.Estimating and selecting the most contentious workloads from the socket of other_group + return self def _enforce(self) -> None: """ @@ -82,7 +85,15 @@ def _enforce(self) -> None: # self._enforce() def _first_decision(self) -> NextStep: + """ + How to choose the first candidate? + :return: + """ pass def _monitoring_result(self) -> NextStep: + """ + If the effect of swapping is getting worse, then rollback?? + :return: + """ pass From 43473089610a67093a74a11fb0bed369fde2dd74 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Sun, 23 Sep 2018 22:46:11 +0900 Subject: [PATCH 11/20] feat: Separate fg and bg in CoreIsolator --- controller.py | 10 +- .../isolation/isolators/base_isolator.py | 3 + .../isolation/isolators/core.py | 99 +++++++++++++++---- .../isolation/isolators/idle.py | 4 + .../metric_container/basic_metric.py | 7 +- isolating_controller/workload.py | 2 +- pending_queue.py | 33 ++++--- 7 files changed, 115 insertions(+), 43 deletions(-) diff --git a/controller.py b/controller.py index 78528e6..cf90303 100755 --- a/controller.py +++ b/controller.py @@ -20,7 +20,7 @@ import isolating_controller from isolating_controller.isolation import NextStep from isolating_controller.isolation.isolators import Isolator -from isolating_controller.isolation.policies import DiffPolicy, IsolationPolicy +from isolating_controller.isolation.policies import GreedyDiffWViolationPolicy, IsolationPolicy from isolating_controller.metric_container.basic_metric import BasicMetric from isolating_controller.workload import Workload from pending_queue import PendingQueue @@ -46,7 +46,7 @@ def __init__(self, metric_buf_size: int) -> None: self._rmq_creation_queue = 'workload_creation' ## FIXME : Hard coded - PendingQueue can have four workloads at most (second argument) - self._pending_wl = PendingQueue(DiffPolicy, 4) + self._pending_wl = PendingQueue(GreedyDiffWViolationPolicy, 2) self._control_thread = ControlThread(self._pending_wl) self._lock = RLock() @@ -61,10 +61,12 @@ def _cbk_wl_creation(self, ch: BlockingChannel, method: Basic.Deliver, _: BasicP if len(arr) != 5: return - wl_name, wl_type, pid, perf_pid, perf_interval = arr + wl_identifier, wl_type, pid, perf_pid, perf_interval = arr pid = int(pid) perf_pid = int(perf_pid) perf_interval = int(perf_interval) + item = wl_identifier.split('_') + wl_name = item[0] if not psutil.pid_exists(pid): return @@ -137,7 +139,7 @@ def __init__(self, pending_queue: PendingQueue) -> None: self._pending_queue: PendingQueue = pending_queue - self._interval: int = 2 # Scheduling interval + self._interval: float = 0.2 # Scheduling interval self._isolation_groups: Dict[IsolationPolicy, int] = dict() def _isolate_workloads(self) -> None: diff --git a/isolating_controller/isolation/isolators/base_isolator.py b/isolating_controller/isolation/isolators/base_isolator.py index 264696c..93a9ed7 100644 --- a/isolating_controller/isolation/isolators/base_isolator.py +++ b/isolating_controller/isolation/isolators/base_isolator.py @@ -14,6 +14,9 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: self._foreground_wl = foreground_wl self._background_wl = background_wl + self._fg_next_step = NextStep.IDLE + self._bg_next_step = NextStep.IDLE + self._is_first_decision: bool = True @abstractmethod diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py index add83ad..325210c 100644 --- a/isolating_controller/isolation/isolators/core.py +++ b/isolating_controller/isolation/isolators/core.py @@ -23,8 +23,8 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: self._cur_bg_step: int = min(self._bg_cpuset) self._cur_fg_step: int = max(self._fg_cpuset) - self._fg_next_step = NextStep.IDLE - self._bg_next_step = NextStep.IDLE + #self._fg_next_step = NextStep.IDLE + #self._bg_next_step = NextStep.IDLE self._fg_grp_name: str = f'{foreground_wl.name}_{foreground_wl.pid}' self._bg_grp_name: str = f'{background_wl.name}_{background_wl.pid}' @@ -52,14 +52,20 @@ def strengthen(self) -> 'CoreIsolator': :return: """ # NOTE: Caller is assumed that BG workload + logger = logging.getLogger(__name__) + logger.info(f'self._cur_bg_step: {self._cur_bg_step}') + logger.info(f'self._cur_fg_step: {self._cur_fg_step}') + logger.info(f'self._bg_next_step: {self._bg_next_step.name}') + logger.info(f'self._fg_next_step: {self._fg_next_step.name}') + if self._bg_next_step == NextStep.STRENGTHEN: - self._cur_bg_step += 1 bg_cpuset = set(self._bg_cpuset) bg_cpuset.remove(self._cur_bg_step) self._bg_cpuset = tuple(bg_cpuset) + self._cur_bg_step += 1 if self._fg_next_step == NextStep.WEAKEN: - self._cur_fg_step += 1 fg_cpuset = set(self._fg_cpuset) + self._cur_fg_step += 1 fg_cpuset.add(self._cur_fg_step) self._fg_cpuset = tuple(fg_cpuset) return self @@ -71,33 +77,56 @@ def weaken(self) -> 'CoreIsolator': :return: """ # NOTE: Caller is assumed that BG workload + logger = logging.getLogger(__name__) + logger.info(f'self._cur_bg_step: {self._cur_bg_step}') + logger.info(f'self._cur_fg_step: {self._cur_fg_step}') + logger.info(f'self._bg_next_step: {self._bg_next_step.name}') + logger.info(f'self._fg_next_step: {self._fg_next_step.name}') + if self._bg_next_step == NextStep.WEAKEN: - self._cur_bg_step -= 1 bg_cpuset = set(self._bg_cpuset) + self._cur_bg_step -= 1 bg_cpuset.add(self._cur_bg_step) self._bg_cpuset = tuple(bg_cpuset) if self._fg_next_step == NextStep.STRENGTHEN: - self._cur_fg_step -= 1 fg_cpuset = set(self._fg_cpuset) fg_cpuset.remove(self._cur_fg_step) self._fg_cpuset = tuple(fg_cpuset) + self._cur_fg_step -= 1 return self @property def is_max_level(self) -> bool: + logger = logging.getLogger(__name__) + logger.info(f'bg max cpuset: {max(self._cpu_topo[self._background_wl.socket_id])}') + logger.info(f'self._cur_bg_step: {self._cur_bg_step}') + logger.info(f'self._cur_fg_step: {self._cur_fg_step}') + logger.info(f'self._bg_next_step: {self._bg_next_step.name}') + logger.info(f'self._fg_next_step: {self._fg_next_step.name}') # FIXME: How about first condition is true but the other is false? - if self._bg_next_step == NextStep.STRENGTHEN: - return self._cur_bg_step == max(self._cpu_topo[self._background_wl.socket_id]) - if self._fg_next_step == NextStep.WEAKEN: - return self._cur_fg_step == self._cur_bg_step-1 + if self._cur_bg_step == max(self._cpu_topo[self._background_wl.socket_id]): + self._bg_next_step = NextStep.STOP + return True + #if self._cur_fg_step == self._cur_bg_step-1: + # self._fg_next_step = NextStep.STOP + else: + return False @property def is_min_level(self) -> bool: + logger = logging.getLogger(__name__) + logger.info(f'self._cur_bg_step: {self._cur_bg_step}') + logger.info(f'self._cur_fg_step: {self._cur_fg_step}') + logger.info(f'self._bg_next_step: {self._bg_next_step.name}') + logger.info(f'self._fg_next_step: {self._fg_next_step.name}') + # FIXME: How about first condition is true but the other is false? - if self._bg_next_step == NextStep.WEAKEN: - return self._cur_bg_step == self._cur_fg_step+1 - if self._fg_next_step == NextStep.STRENGTHEN: - return self._cur_fg_step == min(self._cpu_topo[self._foreground_wl.socket_id]) + if self._cur_bg_step == self._cur_fg_step+1: + return True + #if self._cur_fg_step == min(self._cpu_topo[self._foreground_wl.socket_id]): + # return True + else: + return False def _enforce(self) -> None: logger = logging.getLogger(__name__) @@ -151,25 +180,53 @@ def _monitoring_result(self) -> NextStep: logger.debug(f'current diff: {curr_diff:>7.4f}, previous diff: {prev_diff:>7.4f}') # FIXME: Specifying fg's strengthen/weaken condition (related to fg's performance) - fg_strengthen_cond = None - fg_weaken_cond = None + fg_strengthen_cond = self.fg_strengthen_cond(metric_diff.ipc) + fg_weaken_cond = self.fg_weaken_cond(metric_diff.ipc) + + logger = logging.getLogger(__name__) + logger.info(f'metric_diff.ipc: {metric_diff.ipc}') + logger.info(f'self.fg_strengthen_cond: {fg_strengthen_cond}') + logger.info(f'self.fg_weaken_cond: {fg_weaken_cond}') + + # FIXME: Assumption about fg's cpuset IDs are smaller than bg's ones. (kind of hard coded) max_bg_cpuid = max(self._cpu_topo[self._background_wl.socket_id]) - min_bg_cpuid = min(self._cpu_topo[self._background_wl.socket_id]) - if not (min_bg_cpuid < self._cur_bg_step < max_bg_cpuid) \ - or abs(diff_of_diff) <= CoreIsolator._DOD_THRESHOLD \ - or abs(curr_diff) <= CoreIsolator._DOD_THRESHOLD: + min_bg_cpuid = max(self._fg_cpuset)+1 + + # Case1 : diff is too small to perform isolation + if abs(diff_of_diff) <= CoreIsolator._DOD_THRESHOLD \ + or abs(curr_diff) <= CoreIsolator._DOD_THRESHOLD: self._bg_next_step = NextStep.STOP - self._fg_next_step = NextStep.STOP + self._fg_next_step = NextStep.STOP # This line depends on bg status return NextStep.STOP + # Case2 : FG shows lower contention than solo-run -> Slower FG or Faster BG elif curr_diff > 0: self._bg_next_step = NextStep.WEAKEN + if not (min_bg_cpuid < self._cur_bg_step < max_bg_cpuid): + self._bg_next_step = NextStep.STOP if fg_strengthen_cond: self._fg_next_step = NextStep.STRENGTHEN return NextStep.WEAKEN + # Case3 : FG shows higher contention than solo-run else: self._bg_next_step = NextStep.STRENGTHEN + if not (min_bg_cpuid < self._cur_bg_step < max_bg_cpuid): + self._bg_next_step = NextStep.STOP if fg_weaken_cond: self._fg_next_step = NextStep.WEAKEN return NextStep.STRENGTHEN + + @staticmethod + def fg_strengthen_cond(fg_ipc_diff) -> bool: + if fg_ipc_diff > 0: + return True + else: + return False + + @staticmethod + def fg_weaken_cond(fg_ipc_diff) -> bool: + if fg_ipc_diff <= 0: + return True + else: + return False diff --git a/isolating_controller/isolation/isolators/idle.py b/isolating_controller/isolation/isolators/idle.py index d720a59..9a2ec15 100644 --- a/isolating_controller/isolation/isolators/idle.py +++ b/isolating_controller/isolation/isolators/idle.py @@ -26,10 +26,14 @@ def _enforce(self) -> None: pass def _first_decision(self) -> NextStep: + self._fg_next_step = NextStep.IDLE + self._bg_next_step = NextStep.IDLE return NextStep.IDLE def decide_next_step(self) -> NextStep: return self._monitoring_result() def _monitoring_result(self) -> NextStep: + self._fg_next_step = NextStep.IDLE + self._bg_next_step = NextStep.IDLE return NextStep.IDLE diff --git a/isolating_controller/metric_container/basic_metric.py b/isolating_controller/metric_container/basic_metric.py index fcb79d4..eb889c7 100644 --- a/isolating_controller/metric_container/basic_metric.py +++ b/isolating_controller/metric_container/basic_metric.py @@ -71,7 +71,7 @@ def req_date(self): return self._req_date @property - def ipc(self): + def ipc(self) -> float: return self._instructions / self._cycles @property @@ -113,6 +113,7 @@ def __init__(self, curr: BasicMetric, prev: BasicMetric) -> None: self._l3_hit_ratio = curr.l3hit_ratio - prev.l3hit_ratio self._local_mem_ps = curr.local_mem_ps() / prev.local_mem_ps() - 1 self._remote_mem_ps = curr.remote_mem_ps() / prev.remote_mem_ps() - 1 + self._ipc = curr.ipc - prev.ipc @property def l3_hit_ratio(self): @@ -126,5 +127,9 @@ def local_mem_util_ps(self): def remote_mem_ps(self): return self._remote_mem_ps + @property + def ipc(self): + return self._ipc + def __repr__(self) -> str: return f'L3 hit ratio diff: {self._l3_hit_ratio:>6.03f}, Local Memory access diff: {self._local_mem_ps:>6.03f}' diff --git a/isolating_controller/workload.py b/isolating_controller/workload.py index 46f614c..805f2a1 100644 --- a/isolating_controller/workload.py +++ b/isolating_controller/workload.py @@ -94,7 +94,7 @@ def get_socket_id(self) -> int: # FIXME: Hardcode for assumption (one workload to one socket) for socket_id, skt_cpus in cpu_topo.items(): - print(f'cpuset: {cpuset}, socket_id: {socket_id}, skt_cpus: {skt_cpus}') + #print(f'cpuset: {cpuset}, socket_id: {socket_id}, skt_cpus: {skt_cpus}') for cpu_id in cpuset: if cpu_id in skt_cpus: ret = socket_id diff --git a/pending_queue.py b/pending_queue.py index 877d975..88d2fe9 100644 --- a/pending_queue.py +++ b/pending_queue.py @@ -80,22 +80,23 @@ def dump_to_pending_list(self) -> None: # Grouping pids based on their types and skt_id for node in node_list: node_pidset = group_pids[node] - pid = node_pidset.pop() - print(f'Pop {pid}!') - if pid in fg_pids: - bg_pid = node_pidset.pop() - print(f'Pop {bg_pid}!') - new_group = self._policy_type(self._fg_q[pid], self._bg_q[bg_pid], node) - self._pending_list.append(new_group) - del self._fg_q[pid] - del self._bg_q[bg_pid] - elif pid in bg_pids: - fg_pid = node_pidset.pop() - print(f'Pop {fg_pid}!') - new_group = self._policy_type(self._fg_q[fg_pid], self._bg_q[pid], node) - self._pending_list.append(new_group) - del self._fg_q[fg_pid] - del self._bg_q[pid] + if len(node_pidset) > 0: + pid = node_pidset.pop() + print(f'Pop {pid}!') + if pid in fg_pids: + bg_pid = node_pidset.pop() + print(f'Pop {bg_pid}!') + new_group = self._policy_type(self._fg_q[pid], self._bg_q[bg_pid], node) + self._pending_list.append(new_group) + del self._fg_q[pid] + del self._bg_q[bg_pid] + elif pid in bg_pids: + fg_pid = node_pidset.pop() + print(f'Pop {fg_pid}!') + new_group = self._policy_type(self._fg_q[fg_pid], self._bg_q[pid], node) + self._pending_list.append(new_group) + del self._fg_q[fg_pid] + del self._bg_q[pid] return def update_max_pending(self, new_max_pending: int): From d559567cc19103e352399c3c8ce9107253340ac0 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Mon, 24 Sep 2018 15:32:37 +0900 Subject: [PATCH 12/20] feat: Add ResourceType.CPU and related logics --- controller.py | 6 +- .../isolation/isolators/base_isolator.py | 6 +- .../isolation/isolators/cache.py | 2 +- .../isolation/isolators/core.py | 7 +-- .../isolation/isolators/swap.py | 2 +- .../isolation/policies/__init__.py | 1 + .../isolation/policies/base_policy.py | 11 +++- .../isolation/policies/diff_policy.py | 6 +- .../isolation/policies/diff_policy_cpu.py | 61 +++++++++++++++++++ .../isolation/policies/greedy_diff_policy.py | 15 +++-- 10 files changed, 95 insertions(+), 22 deletions(-) create mode 100644 isolating_controller/isolation/policies/diff_policy_cpu.py diff --git a/controller.py b/controller.py index cf90303..20b2020 100755 --- a/controller.py +++ b/controller.py @@ -20,7 +20,7 @@ import isolating_controller from isolating_controller.isolation import NextStep from isolating_controller.isolation.isolators import Isolator -from isolating_controller.isolation.policies import GreedyDiffWViolationPolicy, IsolationPolicy +from isolating_controller.isolation.policies import GreedyDiffWViolationPolicy, DiffCPUPolicy, IsolationPolicy from isolating_controller.metric_container.basic_metric import BasicMetric from isolating_controller.workload import Workload from pending_queue import PendingQueue @@ -46,7 +46,7 @@ def __init__(self, metric_buf_size: int) -> None: self._rmq_creation_queue = 'workload_creation' ## FIXME : Hard coded - PendingQueue can have four workloads at most (second argument) - self._pending_wl = PendingQueue(GreedyDiffWViolationPolicy, 2) + self._pending_wl = PendingQueue(DiffCPUPolicy, 2) self._control_thread = ControlThread(self._pending_wl) self._lock = RLock() @@ -145,7 +145,7 @@ def __init__(self, pending_queue: PendingQueue) -> None: def _isolate_workloads(self) -> None: logger = logging.getLogger(__name__) - ##TODO: Swapper may come here + ## TODO: Swapper may come here for group, iteration_num in self._isolation_groups.items(): logger.info('') diff --git a/isolating_controller/isolation/isolators/base_isolator.py b/isolating_controller/isolation/isolators/base_isolator.py index 93a9ed7..af129bd 100644 --- a/isolating_controller/isolation/isolators/base_isolator.py +++ b/isolating_controller/isolation/isolators/base_isolator.py @@ -2,13 +2,16 @@ from abc import ABCMeta, abstractmethod +from typing import Optional + from .. import NextStep from ...metric_container.basic_metric import MetricDiff from ...workload import Workload +from ..policies.base_policy import IsolationPolicy, ResourceType class Isolator(metaclass=ABCMeta): - def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: + def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resource: Optional[ResourceType]) -> None: self._prev_metric_diff: MetricDiff = foreground_wl.calc_metric_diff() self._foreground_wl = foreground_wl @@ -18,6 +21,7 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: self._bg_next_step = NextStep.IDLE self._is_first_decision: bool = True + self._contentious_resource: Optional[ResourceType] = cont_resource @abstractmethod def strengthen(self) -> 'Isolator': diff --git a/isolating_controller/isolation/isolators/cache.py b/isolating_controller/isolation/isolators/cache.py index e7f2002..99d0bb3 100644 --- a/isolating_controller/isolation/isolators/cache.py +++ b/isolating_controller/isolation/isolators/cache.py @@ -15,7 +15,7 @@ class CacheIsolator(Isolator): _FORCE_THRESHOLD = 0.1 def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: - super().__init__(foreground_wl, background_wl) + super().__init__(foreground_wl, background_wl, None) self._prev_step: Optional[int] = None self._cur_step: Optional[int] = None diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py index 325210c..b1a14f4 100644 --- a/isolating_controller/isolation/isolators/core.py +++ b/isolating_controller/isolation/isolators/core.py @@ -16,16 +16,13 @@ class CoreIsolator(Isolator): _FORCE_THRESHOLD = 0.1 def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: - super().__init__(foreground_wl, background_wl) + super().__init__(foreground_wl, background_wl, None) self._fg_cpuset: Tuple[int] = foreground_wl.cpuset self._bg_cpuset: Tuple[int] = background_wl.cpuset self._cur_bg_step: int = min(self._bg_cpuset) self._cur_fg_step: int = max(self._fg_cpuset) - #self._fg_next_step = NextStep.IDLE - #self._bg_next_step = NextStep.IDLE - self._fg_grp_name: str = f'{foreground_wl.name}_{foreground_wl.pid}' self._bg_grp_name: str = f'{background_wl.name}_{background_wl.pid}' @@ -196,7 +193,7 @@ def _monitoring_result(self) -> NextStep: if abs(diff_of_diff) <= CoreIsolator._DOD_THRESHOLD \ or abs(curr_diff) <= CoreIsolator._DOD_THRESHOLD: self._bg_next_step = NextStep.STOP - self._fg_next_step = NextStep.STOP # This line depends on bg status + #self._fg_next_step = NextStep.STOP # This line depends on bg status return NextStep.STOP # Case2 : FG shows lower contention than solo-run -> Slower FG or Faster BG diff --git a/isolating_controller/isolation/isolators/swap.py b/isolating_controller/isolation/isolators/swap.py index 2bcdbcd..bedc04e 100644 --- a/isolating_controller/isolation/isolators/swap.py +++ b/isolating_controller/isolation/isolators/swap.py @@ -14,7 +14,7 @@ class SwapIsolator(Isolator): def __init__(self, foreground_wl: Workload, background_wl: Workload, isolation_groups: Dict[IsolationPolicy, int]) -> None: - super().__init__(foreground_wl, background_wl) + super().__init__(foreground_wl, background_wl, None) self._all_groups = isolation_groups self._swap_candidates: Set[Workload] = None diff --git a/isolating_controller/isolation/policies/__init__.py b/isolating_controller/isolation/policies/__init__.py index 6f9c6ee..bde7236 100644 --- a/isolating_controller/isolation/policies/__init__.py +++ b/isolating_controller/isolation/policies/__init__.py @@ -2,6 +2,7 @@ from .base_policy import IsolationPolicy, ResourceType from .diff_policy import DiffPolicy +from .diff_policy_cpu import DiffCPUPolicy from .diff_with_violation_policy import DiffWViolationPolicy from .greedy_diff_policy import GreedyDiffPolicy from .greedy_diff_with_violation_policy import GreedyDiffWViolationPolicy diff --git a/isolating_controller/isolation/policies/base_policy.py b/isolating_controller/isolation/policies/base_policy.py index 10644af..1259ded 100644 --- a/isolating_controller/isolation/policies/base_policy.py +++ b/isolating_controller/isolation/policies/base_policy.py @@ -10,11 +10,15 @@ class ResourceType(IntEnum): - CACHE = 0 - MEMORY = 1 + CPU = 0 + CACHE = 1 + MEMORY = 2 + class IsolationPolicy(metaclass=ABCMeta): _IDLE_ISOLATOR: IdleIsolator = IdleIsolator() + # FIXME : _CPU_THRESHOLD needs test + _CPU_THRESHOLD = 0.01 def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: self._fg_wl = fg_wl @@ -51,6 +55,9 @@ def contentious_resource(self) -> ResourceType: logger = logging.getLogger(__name__) logger.info(repr(metric_diff)) + if abs(metric_diff.local_mem_util_ps) < IsolationPolicy._CPU_THRESHOLD \ + and abs(metric_diff.l3_hit_ratio) < IsolationPolicy._CPU_THRESHOLD: + return ResourceType.CPU if metric_diff.local_mem_util_ps > 0 and metric_diff.l3_hit_ratio > 0: if metric_diff.l3_hit_ratio > metric_diff.local_mem_util_ps: diff --git a/isolating_controller/isolation/policies/diff_policy.py b/isolating_controller/isolation/policies/diff_policy.py index f36c22e..00413ff 100644 --- a/isolating_controller/isolation/policies/diff_policy.py +++ b/isolating_controller/isolation/policies/diff_policy.py @@ -30,10 +30,6 @@ def choose_next_isolator(self) -> bool: resource: ResourceType = self.contentious_resource() - if self._is_sched_isolated and self._is_mem_isolated and self._is_llc_isolated: - self._clear_flags() - logger.debug('****All isolators are applicable for now!****') - if not self._is_llc_isolated and resource is ResourceType.CACHE: self._cur_isolator = self._isolator_map[CacheIsolator] self._is_llc_isolated = True @@ -49,7 +45,7 @@ def choose_next_isolator(self) -> bool: elif not self._is_sched_isolated and resource is ResourceType.MEMORY: self._cur_isolator = self._isolator_map[CoreIsolator] self._is_sched_isolated = True - logger.info(f'Cpuset Isolation for {self._fg_wl} is started') + logger.info(f'Core Isolation for {self._fg_wl} is started') return True else: diff --git a/isolating_controller/isolation/policies/diff_policy_cpu.py b/isolating_controller/isolation/policies/diff_policy_cpu.py new file mode 100644 index 0000000..8289703 --- /dev/null +++ b/isolating_controller/isolation/policies/diff_policy_cpu.py @@ -0,0 +1,61 @@ +# coding: UTF-8 + +import logging + +from .base_policy import IsolationPolicy, ResourceType +from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator +from ...workload import Workload + + +class DiffCPUPolicy(IsolationPolicy): + def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: + super().__init__(fg_wl, bg_wl, skt_id) + + self._is_llc_isolated = False + self._is_mem_isolated = False + self._is_core_isolated = False + + @property + def new_isolator_needed(self) -> bool: + return isinstance(self._cur_isolator, IdleIsolator) + + def _clear_flags(self) -> None: + self._is_llc_isolated = False + self._is_mem_isolated = False + self._is_core_isolated = False + + def choose_next_isolator(self) -> bool: + logger = logging.getLogger(__name__) + logger.debug('looking for new isolation...') + + resource: ResourceType = self.contentious_resource() + + if not self._is_core_isolated and resource is ResourceType.CPU: + self._cur_isolator = self._isolator_map[CoreIsolator] + self._cur_isolator._contentious_resource = ResourceType.CPU + logger.info(f'Core Isolation for {self._fg_wl} is started to isolate {ResourceType.CPU.name}s') + return True + + elif not self._is_llc_isolated and resource is ResourceType.CACHE: + self._cur_isolator = self._isolator_map[CacheIsolator] + self._is_llc_isolated = True + logger.info(f'Cache Isolation for {self._fg_wl} is started to isolate {ResourceType.CACHE.name}s') + return True + + elif not self._is_mem_isolated and resource is ResourceType.MEMORY: + self._cur_isolator = self._isolator_map[MemoryIsolator] + self._is_mem_isolated = True + logger.info(f'Memory Bandwidth Isolation for {self._fg_wl} is started ' + f'to isolate {ResourceType.MEMORY.name} BW') + return True + + elif not self._is_core_isolated and resource is ResourceType.MEMORY: + self._cur_isolator = self._isolator_map[CoreIsolator] + self._is_core_isolated = True + self._cur_isolator._contentious_resource = ResourceType.MEMORY + logger.info(f'Core Isolation for {self._fg_wl} is started to isolate {ResourceType.MEMORY.name} BW ') + return True + + else: + logger.debug('A new Isolator has not been selected.') + return False diff --git a/isolating_controller/isolation/policies/greedy_diff_policy.py b/isolating_controller/isolation/policies/greedy_diff_policy.py index a0a4d43..f6f801c 100644 --- a/isolating_controller/isolation/policies/greedy_diff_policy.py +++ b/isolating_controller/isolation/policies/greedy_diff_policy.py @@ -23,21 +23,28 @@ def choose_next_isolator(self) -> bool: resource: ResourceType = self.contentious_resource() - if resource is ResourceType.CACHE: + if resource is ResourceType.CPU: + self._cur_isolator = self._isolator_map[CoreIsolator] + self._cur_isolator._contentious_resource = ResourceType.CPU + logger.info(f'Core Isolation for {self._fg_wl} is started to isolate {ResourceType.CPU.name}s') + return True + + elif resource is ResourceType.CACHE: self._cur_isolator = self._isolator_map[CacheIsolator] - logger.info(f'Cache Isolation for {self._fg_wl} is started') + logger.info(f'Cache Isolation for {self._fg_wl} is started to isolate {ResourceType.CACHE.name}s') return True elif not self._is_mem_isolated and resource is ResourceType.MEMORY: self._cur_isolator = self._isolator_map[MemoryIsolator] self._is_mem_isolated = True - logger.info(f'Memory Bandwidth Isolation for {self._fg_wl} is started') + logger.info(f'Memory Bandwidth Isolation for {self._fg_wl} is started ' + f'to isolate {ResourceType.MEMORY.name} BW') return True elif resource is ResourceType.MEMORY: self._cur_isolator = self._isolator_map[CoreIsolator] self._is_mem_isolated = False - logger.info(f'Cpuset Isolation for {self._fg_wl} is started') + logger.info(f'Cpuset Isolation for {self._fg_wl} is started to isolate {ResourceType.MEMORY.name} BW') return True else: From 62858e9bc105a73f886464f5628802e704e71dca Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Mon, 24 Sep 2018 18:26:56 +0900 Subject: [PATCH 13/20] fix: Change the diff policies for ResourceType.CPU --- .../isolation/isolators/base_isolator.py | 2 +- .../isolation/isolators/core.py | 20 +++++--- .../isolation/policies/diff_policy.py | 8 +-- .../isolation/policies/diff_policy_cpu.py | 2 +- .../metric_container/basic_metric.py | 3 +- isolating_controller/utils/__init__.py | 1 - isolating_controller/utils/cgroup_cpuset.py | 50 ------------------- 7 files changed, 22 insertions(+), 64 deletions(-) delete mode 100644 isolating_controller/utils/cgroup_cpuset.py diff --git a/isolating_controller/isolation/isolators/base_isolator.py b/isolating_controller/isolation/isolators/base_isolator.py index af129bd..2c119d4 100644 --- a/isolating_controller/isolation/isolators/base_isolator.py +++ b/isolating_controller/isolation/isolators/base_isolator.py @@ -7,7 +7,7 @@ from .. import NextStep from ...metric_container.basic_metric import MetricDiff from ...workload import Workload -from ..policies.base_policy import IsolationPolicy, ResourceType +from ..policies.base_policy import ResourceType class Isolator(metaclass=ABCMeta): diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py index b1a14f4..33fa15d 100644 --- a/isolating_controller/isolation/isolators/core.py +++ b/isolating_controller/isolation/isolators/core.py @@ -10,6 +10,7 @@ from ...utils import Cgroup from ...utils import NumaTopology from ...utils import hyphen +from ..policies.base_policy import ResourceType class CoreIsolator(Isolator): _DOD_THRESHOLD = 0.005 @@ -141,8 +142,8 @@ def _first_decision(self) -> NextStep: logger.debug(f'current diff: {curr_diff:>7.4f}') ## FIXME: Specifying fg's strengthen/weaken condition (related to fg's performance) - fg_strengthen_cond = None - fg_weaken_cond = None + fg_strengthen_cond = self.fg_strengthen_cond(metric_diff.ipc) + fg_weaken_cond = self.fg_weaken_cond(metric_diff.ipc) if curr_diff < 0: if self.is_max_level: self._bg_next_step = NextStep.STOP @@ -167,10 +168,17 @@ def _first_decision(self) -> NextStep: def _monitoring_result(self) -> NextStep: metric_diff = self._foreground_wl.calc_metric_diff() - - curr_diff = metric_diff.local_mem_util_ps - prev_diff = self._prev_metric_diff.local_mem_util_ps - diff_of_diff = curr_diff - prev_diff + curr_diff = None + diff_of_diff = None + + if self._contentious_resource == ResourceType.MEMORY: + curr_diff = metric_diff.local_mem_util_ps + prev_diff = self._prev_metric_diff.local_mem_util_ps + diff_of_diff = curr_diff - prev_diff + elif self._contentious_resource == ResourceType.CPU: + curr_diff = metric_diff.ipc + prev_diff = self._prev_metric_diff.ipc + diff_of_diff = curr_diff - prev_diff logger = logging.getLogger(__name__) logger.debug(f'diff of diff is {diff_of_diff:>7.4f}') diff --git a/isolating_controller/isolation/policies/diff_policy.py b/isolating_controller/isolation/policies/diff_policy.py index 00413ff..84ae81f 100644 --- a/isolating_controller/isolation/policies/diff_policy.py +++ b/isolating_controller/isolation/policies/diff_policy.py @@ -13,7 +13,7 @@ def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: self._is_llc_isolated = False self._is_mem_isolated = False - self._is_sched_isolated = False + self._is_core_isolated = False @property def new_isolator_needed(self) -> bool: @@ -22,7 +22,7 @@ def new_isolator_needed(self) -> bool: def _clear_flags(self) -> None: self._is_llc_isolated = False self._is_mem_isolated = False - self._is_sched_isolated = False + self._is_core_isolated = False def choose_next_isolator(self) -> bool: logger = logging.getLogger(__name__) @@ -42,9 +42,9 @@ def choose_next_isolator(self) -> bool: logger.info(f'Memory Bandwidth Isolation for {self._fg_wl} is started') return True - elif not self._is_sched_isolated and resource is ResourceType.MEMORY: + elif not self._is_core_isolated and resource is ResourceType.MEMORY: self._cur_isolator = self._isolator_map[CoreIsolator] - self._is_sched_isolated = True + self._is_core_isolated = True logger.info(f'Core Isolation for {self._fg_wl} is started') return True diff --git a/isolating_controller/isolation/policies/diff_policy_cpu.py b/isolating_controller/isolation/policies/diff_policy_cpu.py index 8289703..bf904bb 100644 --- a/isolating_controller/isolation/policies/diff_policy_cpu.py +++ b/isolating_controller/isolation/policies/diff_policy_cpu.py @@ -30,7 +30,7 @@ def choose_next_isolator(self) -> bool: resource: ResourceType = self.contentious_resource() - if not self._is_core_isolated and resource is ResourceType.CPU: + if resource is ResourceType.CPU: self._cur_isolator = self._isolator_map[CoreIsolator] self._cur_isolator._contentious_resource = ResourceType.CPU logger.info(f'Core Isolation for {self._fg_wl} is started to isolate {ResourceType.CPU.name}s') diff --git a/isolating_controller/metric_container/basic_metric.py b/isolating_controller/metric_container/basic_metric.py index eb889c7..371184f 100644 --- a/isolating_controller/metric_container/basic_metric.py +++ b/isolating_controller/metric_container/basic_metric.py @@ -132,4 +132,5 @@ def ipc(self): return self._ipc def __repr__(self) -> str: - return f'L3 hit ratio diff: {self._l3_hit_ratio:>6.03f}, Local Memory access diff: {self._local_mem_ps:>6.03f}' + return f'L3 hit ratio diff: {self._l3_hit_ratio:>6.03f}, Local Memory access diff: {self._local_mem_ps:>6.03f},' \ + f'IPC diff: {self.ipc:>06.03f}' diff --git a/isolating_controller/utils/__init__.py b/isolating_controller/utils/__init__.py index fad398f..9902838 100644 --- a/isolating_controller/utils/__init__.py +++ b/isolating_controller/utils/__init__.py @@ -2,7 +2,6 @@ from .cat import CAT from .resctrl import ResCtrl -from .cgroup_cpuset import CgroupCpuset from .cgroup import Cgroup from .dvfs import DVFS from .numa_topology import NumaTopology diff --git a/isolating_controller/utils/cgroup_cpuset.py b/isolating_controller/utils/cgroup_cpuset.py deleted file mode 100644 index 0877bf1..0000000 --- a/isolating_controller/utils/cgroup_cpuset.py +++ /dev/null @@ -1,50 +0,0 @@ -# coding: UTF-8 - -import subprocess -from typing import Set - -import psutil - - -class CgroupCpuset: - MOUNT_POINT = '/sys/fs/cgroup/cpuset' - - @staticmethod - def create_group(name: str) -> None: - subprocess.check_call(args=('sudo', 'mkdir', '-p', f'{CgroupCpuset.MOUNT_POINT}/{name}')) - - @staticmethod - def add_task(name: str, pid: int) -> None: - p = psutil.Process(pid) - - for thread in p.threads(): - subprocess.run(args=('sudo', 'tee', '-a', f'{CgroupCpuset.MOUNT_POINT}/{name}/tasks'), - input=f'{thread.id}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) - - for child in p.children(True): - for thread in child.threads(): - subprocess.run(args=('sudo', 'tee', '-a', f'{CgroupCpuset.MOUNT_POINT}/{name}/tasks'), - input=f'{thread.id}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) - - @staticmethod - def remove_group(name: str) -> None: - subprocess.check_call(args=('sudo', 'rmdir', f'/sys/fs/cgroup/cpuset/{name}')) - - @staticmethod - def assign(group_name: str, core_set: Set[int]) -> None: - subprocess.run(args=('sudo', 'tee', f'/sys/fs/cgroup/cpuset/{group_name}/cpuset.cpus'), - input=','.join(map(str, core_set)), check=True, encoding='ASCII', stdout=subprocess.DEVNULL) - - @staticmethod - def convert_to_set(hyphen_str: str) -> Set[int]: - ret = set() - - for elem in hyphen_str.split(','): - group = tuple(map(int, elem.split('-'))) - - if len(group) is 1: - ret.add(group[0]) - elif len(group) is 2: - ret.update(range(group[0], group[1] + 1)) - - return ret From 656d69216e65341f85c024c1e278d98e33efa1a7 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Tue, 25 Sep 2018 06:38:42 +0900 Subject: [PATCH 14/20] fix: Fix fg_strengthen/weaken_cond not to exceed socket boudnary --- controller.py | 2 +- isolating_controller/isolation/__init__.py | 7 ++ .../isolation/isolators/base_isolator.py | 3 +- .../isolation/isolators/cache.py | 9 ++- .../isolation/isolators/core.py | 67 ++++++++++++------- .../isolation/isolators/memory.py | 8 ++- .../isolation/isolators/swap.py | 1 + .../isolation/policies/__init__.py | 2 +- .../isolation/policies/base_policy.py | 25 +++---- .../isolation/policies/diff_policy.py | 3 +- .../isolation/policies/diff_policy_cpu.py | 3 +- .../policies/diff_with_violation_policy.py | 2 +- .../isolation/policies/greedy_diff_policy.py | 3 +- .../greedy_diff_with_violation_policy.py | 2 +- .../metric_container/basic_metric.py | 17 ++++- pending_queue.py | 1 - 16 files changed, 96 insertions(+), 59 deletions(-) diff --git a/controller.py b/controller.py index 20b2020..1497d7d 100755 --- a/controller.py +++ b/controller.py @@ -20,7 +20,7 @@ import isolating_controller from isolating_controller.isolation import NextStep from isolating_controller.isolation.isolators import Isolator -from isolating_controller.isolation.policies import GreedyDiffWViolationPolicy, DiffCPUPolicy, IsolationPolicy +from isolating_controller.isolation.policies import GreedyDiffWViolationPolicy, DiffCPUPolicy, DiffPolicy, IsolationPolicy from isolating_controller.metric_container.basic_metric import BasicMetric from isolating_controller.workload import Workload from pending_queue import PendingQueue diff --git a/isolating_controller/isolation/__init__.py b/isolating_controller/isolation/__init__.py index 104511c..999192f 100644 --- a/isolating_controller/isolation/__init__.py +++ b/isolating_controller/isolation/__init__.py @@ -8,3 +8,10 @@ class NextStep(IntEnum): WEAKEN = 2 STOP = 3 IDLE = 4 + + +class ResourceType(IntEnum): + CPU = 0 + CACHE = 1 + MEMORY = 2 + Unknown = 3 \ No newline at end of file diff --git a/isolating_controller/isolation/isolators/base_isolator.py b/isolating_controller/isolation/isolators/base_isolator.py index 2c119d4..5c2f734 100644 --- a/isolating_controller/isolation/isolators/base_isolator.py +++ b/isolating_controller/isolation/isolators/base_isolator.py @@ -4,10 +4,9 @@ from typing import Optional -from .. import NextStep +from .. import NextStep, ResourceType from ...metric_container.basic_metric import MetricDiff from ...workload import Workload -from ..policies.base_policy import ResourceType class Isolator(metaclass=ABCMeta): diff --git a/isolating_controller/isolation/isolators/cache.py b/isolating_controller/isolation/isolators/cache.py index 99d0bb3..9f84f71 100644 --- a/isolating_controller/isolation/isolators/cache.py +++ b/isolating_controller/isolation/isolators/cache.py @@ -1,12 +1,11 @@ # coding: UTF-8 import logging -from typing import Optional, Dict, Set +from typing import Optional from .base_isolator import Isolator -from .. import NextStep +from .. import NextStep, ResourceType from ...utils import ResCtrl -from ...utils import NumaTopology from ...workload import Workload @@ -14,8 +13,8 @@ class CacheIsolator(Isolator): _DOD_THRESHOLD = 0.005 _FORCE_THRESHOLD = 0.1 - def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: - super().__init__(foreground_wl, background_wl, None) + def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resource: Optional[ResourceType]) -> None: + super().__init__(foreground_wl, background_wl, cont_resource) self._prev_step: Optional[int] = None self._cur_step: Optional[int] = None diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py index 33fa15d..fa24cdc 100644 --- a/isolating_controller/isolation/isolators/core.py +++ b/isolating_controller/isolation/isolators/core.py @@ -2,22 +2,22 @@ import logging -from typing import Tuple, Set, Dict +from typing import Tuple, Set, Dict, Optional from .base_isolator import Isolator -from .. import NextStep +from .. import NextStep, ResourceType from ...workload import Workload from ...utils import Cgroup from ...utils import NumaTopology from ...utils import hyphen -from ..policies.base_policy import ResourceType + class CoreIsolator(Isolator): _DOD_THRESHOLD = 0.005 _FORCE_THRESHOLD = 0.1 - def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: - super().__init__(foreground_wl, background_wl, None) + def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resource: Optional[ResourceType]) -> None: + super().__init__(foreground_wl, background_wl, cont_resource) self._fg_cpuset: Tuple[int] = foreground_wl.cpuset self._bg_cpuset: Tuple[int] = background_wl.cpuset @@ -128,20 +128,27 @@ def is_min_level(self) -> bool: def _enforce(self) -> None: logger = logging.getLogger(__name__) - logger.info(f'affinity of background is {hyphen.convert_to_hyphen(self._bg_cpuset)}') - logger.info(f'affinity of foreground is {hyphen.convert_to_hyphen(self._fg_cpuset)}') + logger.info(f'after enforcing : self._cur_bg_step is {self._cur_bg_step}') + logger.info(f'after enforcing : self._cur_fg_step is {self._cur_fg_step}') + logger.info(f'after enforcing : affinity of background is {hyphen.convert_to_hyphen(self._bg_cpuset)}') + logger.info(f'after enforcing : affinity of foreground is {hyphen.convert_to_hyphen(self._fg_cpuset)}') self._bg_cgroup.assign_cpus(set(self._bg_cpuset)) self._fg_cgroup.assign_cpus(set(self._fg_cpuset)) def _first_decision(self) -> NextStep: + curr_diff = None metric_diff = self._foreground_wl.calc_metric_diff() - curr_diff = metric_diff.local_mem_util_ps + + if self._contentious_resource == ResourceType.MEMORY: + curr_diff = metric_diff.local_mem_util_ps + elif self._contentious_resource == ResourceType.CPU: + curr_diff = metric_diff.ipc logger = logging.getLogger(__name__) logger.debug(f'current diff: {curr_diff:>7.4f}') - ## FIXME: Specifying fg's strengthen/weaken condition (related to fg's performance) + # FIXME: Specifying fg's strengthen/weaken condition (related to fg's performance) fg_strengthen_cond = self.fg_strengthen_cond(metric_diff.ipc) fg_weaken_cond = self.fg_weaken_cond(metric_diff.ipc) if curr_diff < 0: @@ -170,7 +177,8 @@ def _monitoring_result(self) -> NextStep: metric_diff = self._foreground_wl.calc_metric_diff() curr_diff = None diff_of_diff = None - + logger = logging.getLogger(__name__) + logger.info(f'self._contentious_resource: {self._contentious_resource.name}') if self._contentious_resource == ResourceType.MEMORY: curr_diff = metric_diff.local_mem_util_ps prev_diff = self._prev_metric_diff.local_mem_util_ps @@ -193,45 +201,56 @@ def _monitoring_result(self) -> NextStep: logger.info(f'self.fg_strengthen_cond: {fg_strengthen_cond}') logger.info(f'self.fg_weaken_cond: {fg_weaken_cond}') - # FIXME: Assumption about fg's cpuset IDs are smaller than bg's ones. (kind of hard coded) - max_bg_cpuid = max(self._cpu_topo[self._background_wl.socket_id]) - min_bg_cpuid = max(self._fg_cpuset)+1 - # Case1 : diff is too small to perform isolation if abs(diff_of_diff) <= CoreIsolator._DOD_THRESHOLD \ or abs(curr_diff) <= CoreIsolator._DOD_THRESHOLD: self._bg_next_step = NextStep.STOP - #self._fg_next_step = NextStep.STOP # This line depends on bg status + # self._fg_next_step = NextStep.STOP # This line depends on bg status return NextStep.STOP # Case2 : FG shows lower contention than solo-run -> Slower FG or Faster BG elif curr_diff > 0: self._bg_next_step = NextStep.WEAKEN - if not (min_bg_cpuid < self._cur_bg_step < max_bg_cpuid): + if self.bg_outside_boundary(): self._bg_next_step = NextStep.STOP - if fg_strengthen_cond: + if fg_strengthen_cond is True: self._fg_next_step = NextStep.STRENGTHEN + elif fg_strengthen_cond is False: + self._fg_next_step = NextStep.STOP return NextStep.WEAKEN # Case3 : FG shows higher contention than solo-run else: self._bg_next_step = NextStep.STRENGTHEN - if not (min_bg_cpuid < self._cur_bg_step < max_bg_cpuid): + if self.bg_outside_boundary(): self._bg_next_step = NextStep.STOP if fg_weaken_cond: self._fg_next_step = NextStep.WEAKEN + elif fg_weaken_cond is False: + self._fg_next_step = NextStep.STOP return NextStep.STRENGTHEN - @staticmethod - def fg_strengthen_cond(fg_ipc_diff) -> bool: - if fg_ipc_diff > 0: + def bg_outside_boundary(self) -> bool: + # FIXME: Assumption about fg's cpuset IDs are smaller than bg's ones. (kind of hard coded) + max_bg_cpuid = max(self._cpu_topo[self._background_wl.socket_id]) + min_bg_cpuid = max(self._fg_cpuset)+1 + if not (min_bg_cpuid < self._cur_bg_step < max_bg_cpuid): return True else: return False - @staticmethod - def fg_weaken_cond(fg_ipc_diff) -> bool: - if fg_ipc_diff <= 0: + def fg_strengthen_cond(self, fg_ipc_diff) -> bool: + min_skt_cpuid = min(self._cpu_topo[self._foreground_wl.socket_id]) + if fg_ipc_diff > 0 and self._cur_fg_step > min_skt_cpuid: return True else: return False + + def fg_weaken_cond(self, fg_ipc_diff) -> bool: + if fg_ipc_diff <= 0: + free_cpu = self._cur_bg_step - self._cur_fg_step + if (free_cpu > 0 and self._bg_next_step != NextStep.WEAKEN) \ + or (free_cpu == 0 and self._bg_next_step == NextStep.STOP): + return True + else: + return False diff --git a/isolating_controller/isolation/isolators/memory.py b/isolating_controller/isolation/isolators/memory.py index 3a2e156..107cacb 100644 --- a/isolating_controller/isolation/isolators/memory.py +++ b/isolating_controller/isolation/isolators/memory.py @@ -3,8 +3,10 @@ import logging from itertools import chain +from typing import Optional + from .base_isolator import Isolator -from .. import NextStep +from .. import NextStep, ResourceType from ...utils import DVFS from ...workload import Workload @@ -13,8 +15,8 @@ class MemoryIsolator(Isolator): _DOD_THRESHOLD = 0.005 _FORCE_THRESHOLD = 0.1 - def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: - super().__init__(foreground_wl, background_wl) + def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resource: Optional[ResourceType]) -> None: + super().__init__(foreground_wl, background_wl, cont_resource) self._bg_affinity = background_wl.cpuset diff --git a/isolating_controller/isolation/isolators/swap.py b/isolating_controller/isolation/isolators/swap.py index bedc04e..5857dcf 100644 --- a/isolating_controller/isolation/isolators/swap.py +++ b/isolating_controller/isolation/isolators/swap.py @@ -9,6 +9,7 @@ from ...workload import Workload from ..policies import IsolationPolicy + class SwapIsolator(Isolator): _THRESHOLD = 0.005 diff --git a/isolating_controller/isolation/policies/__init__.py b/isolating_controller/isolation/policies/__init__.py index bde7236..5e517fa 100644 --- a/isolating_controller/isolation/policies/__init__.py +++ b/isolating_controller/isolation/policies/__init__.py @@ -1,6 +1,6 @@ # coding: UTF-8 -from .base_policy import IsolationPolicy, ResourceType +from .base_policy import IsolationPolicy from .diff_policy import DiffPolicy from .diff_policy_cpu import DiffCPUPolicy from .diff_with_violation_policy import DiffWViolationPolicy diff --git a/isolating_controller/isolation/policies/base_policy.py b/isolating_controller/isolation/policies/base_policy.py index 1259ded..b5c2e39 100644 --- a/isolating_controller/isolation/policies/base_policy.py +++ b/isolating_controller/isolation/policies/base_policy.py @@ -1,24 +1,18 @@ # coding: UTF-8 import logging from abc import ABCMeta, abstractmethod -from enum import IntEnum from typing import Mapping, Type -from isolating_controller.metric_container.basic_metric import MetricDiff +from isolating_controller.metric_container.basic_metric import MetricDiff, BasicMetric from ..isolators import CacheIsolator, IdleIsolator, Isolator, MemoryIsolator, CoreIsolator from ...workload import Workload - - -class ResourceType(IntEnum): - CPU = 0 - CACHE = 1 - MEMORY = 2 +from .. import ResourceType class IsolationPolicy(metaclass=ABCMeta): _IDLE_ISOLATOR: IdleIsolator = IdleIsolator() # FIXME : _CPU_THRESHOLD needs test - _CPU_THRESHOLD = 0.01 + _CPU_THRESHOLD = 0.1 def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: self._fg_wl = fg_wl @@ -34,11 +28,12 @@ def __hash__(self) -> int: def __repr__(self) -> str: return f'{self.__class__.__name__} ' + # FIXME: If you use policy without CPUIso., then changing ResourceType.Unknown to ResourceType.Memory def init_isolators(self) -> None: self._isolator_map = dict(( - (CacheIsolator, CacheIsolator(self._fg_wl, self._bg_wl)), - (MemoryIsolator, MemoryIsolator(self._fg_wl, self._bg_wl)), - (CoreIsolator, CoreIsolator(self._fg_wl, self._bg_wl)) + (CacheIsolator, CacheIsolator(self._fg_wl, self._bg_wl, ResourceType.CACHE)), + (MemoryIsolator, MemoryIsolator(self._fg_wl, self._bg_wl, ResourceType.MEMORY)), + (CoreIsolator, CoreIsolator(self._fg_wl, self._bg_wl, ResourceType.Unknown)) )) @property @@ -52,11 +47,13 @@ def choose_next_isolator(self) -> bool: def contentious_resource(self) -> ResourceType: metric_diff: MetricDiff = self._fg_wl.calc_metric_diff() + cur_metric: BasicMetric = self._fg_wl.metrics[0] logger = logging.getLogger(__name__) logger.info(repr(metric_diff)) - if abs(metric_diff.local_mem_util_ps) < IsolationPolicy._CPU_THRESHOLD \ - and abs(metric_diff.l3_hit_ratio) < IsolationPolicy._CPU_THRESHOLD: + logger.info(f'l3_int: {cur_metric.l3_intensity}, mem_int: {cur_metric.mem_intensity}') + if abs(cur_metric.l3_intensity) < IsolationPolicy._CPU_THRESHOLD \ + and abs(cur_metric.mem_intensity) < IsolationPolicy._CPU_THRESHOLD: return ResourceType.CPU if metric_diff.local_mem_util_ps > 0 and metric_diff.l3_hit_ratio > 0: diff --git a/isolating_controller/isolation/policies/diff_policy.py b/isolating_controller/isolation/policies/diff_policy.py index 84ae81f..cef9e77 100644 --- a/isolating_controller/isolation/policies/diff_policy.py +++ b/isolating_controller/isolation/policies/diff_policy.py @@ -2,7 +2,8 @@ import logging -from .base_policy import IsolationPolicy, ResourceType +from .. import ResourceType +from .base_policy import IsolationPolicy from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload diff --git a/isolating_controller/isolation/policies/diff_policy_cpu.py b/isolating_controller/isolation/policies/diff_policy_cpu.py index bf904bb..99be013 100644 --- a/isolating_controller/isolation/policies/diff_policy_cpu.py +++ b/isolating_controller/isolation/policies/diff_policy_cpu.py @@ -2,7 +2,8 @@ import logging -from .base_policy import IsolationPolicy, ResourceType +from .. import ResourceType +from .base_policy import IsolationPolicy from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload diff --git a/isolating_controller/isolation/policies/diff_with_violation_policy.py b/isolating_controller/isolation/policies/diff_with_violation_policy.py index 8df9003..c50b98d 100644 --- a/isolating_controller/isolation/policies/diff_with_violation_policy.py +++ b/isolating_controller/isolation/policies/diff_with_violation_policy.py @@ -2,7 +2,7 @@ import logging -from .base_policy import ResourceType +from .. import ResourceType from .diff_policy import DiffPolicy from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload diff --git a/isolating_controller/isolation/policies/greedy_diff_policy.py b/isolating_controller/isolation/policies/greedy_diff_policy.py index f6f801c..da37000 100644 --- a/isolating_controller/isolation/policies/greedy_diff_policy.py +++ b/isolating_controller/isolation/policies/greedy_diff_policy.py @@ -2,7 +2,8 @@ import logging -from .base_policy import IsolationPolicy, ResourceType +from .. import ResourceType +from .base_policy import IsolationPolicy from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload diff --git a/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py b/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py index 980d178..ce7f3f2 100644 --- a/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py +++ b/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py @@ -2,7 +2,7 @@ import logging -from .base_policy import ResourceType +from .. import ResourceType from .greedy_diff_policy import GreedyDiffPolicy from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, CoreIsolator from ...workload import Workload diff --git a/isolating_controller/metric_container/basic_metric.py b/isolating_controller/metric_container/basic_metric.py index 371184f..8dc8fd0 100644 --- a/isolating_controller/metric_container/basic_metric.py +++ b/isolating_controller/metric_container/basic_metric.py @@ -2,6 +2,8 @@ from time import localtime, strftime +LLC_SIZE: float = 41943040 + class BasicMetric: def __init__(self, l2miss, l3miss, inst, cycles, stall_cycles, wall_cycles, intra_coh, inter_coh, llc_size, @@ -94,10 +96,18 @@ def l3miss_ratio(self): def l3hit_ratio(self) -> float: return 1 - self._l3miss / self._l2miss + @property + def llc_util(self) -> float: + return self._llc_size/LLC_SIZE + @property def l3_intensity(self): l3_hit_ratio = 1 - self.l3miss_ratio - return self._llc_size * l3_hit_ratio + return self.llc_util * l3_hit_ratio + + @property + def mem_intensity(self): + return self.llc_util * self.l3miss_ratio def __str__(self): return ', '.join(map(str, ( @@ -132,5 +142,6 @@ def ipc(self): return self._ipc def __repr__(self) -> str: - return f'L3 hit ratio diff: {self._l3_hit_ratio:>6.03f}, Local Memory access diff: {self._local_mem_ps:>6.03f},' \ - f'IPC diff: {self.ipc:>06.03f}' + return f'L3 hit ratio diff: {self._l3_hit_ratio:>6.03f}, ' \ + f'Local Memory access diff: {self._local_mem_ps:>6.03f}, ' \ + f'IPC diff: {self.ipc:>6.03f}' diff --git a/pending_queue.py b/pending_queue.py index 88d2fe9..6c2fad0 100644 --- a/pending_queue.py +++ b/pending_queue.py @@ -1,7 +1,6 @@ # coding: UTF-8 import logging -from threading import RLock from typing import Dict, List, Sized, Type From 974f940aeba2c18e6464f24f49ecabc3c1ecda44 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Tue, 25 Sep 2018 17:54:49 +0900 Subject: [PATCH 15/20] feat: Add swap_iso.py and related codes --- controller.py | 1 - .../isolation/policies/base_policy.py | 41 ++++++++++ isolating_controller/workload.py | 7 +- swap_iso.py | 81 +++++++++++++++++++ 4 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 swap_iso.py diff --git a/controller.py b/controller.py index 1497d7d..201c1a5 100755 --- a/controller.py +++ b/controller.py @@ -48,7 +48,6 @@ def __init__(self, metric_buf_size: int) -> None: ## FIXME : Hard coded - PendingQueue can have four workloads at most (second argument) self._pending_wl = PendingQueue(DiffCPUPolicy, 2) self._control_thread = ControlThread(self._pending_wl) - self._lock = RLock() def _cbk_wl_creation(self, ch: BlockingChannel, method: Basic.Deliver, _: BasicProperties, body: bytes) -> None: ch.basic_ack(method.delivery_tag) diff --git a/isolating_controller/isolation/policies/base_policy.py b/isolating_controller/isolation/policies/base_policy.py index b5c2e39..6c56413 100644 --- a/isolating_controller/isolation/policies/base_policy.py +++ b/isolating_controller/isolation/policies/base_policy.py @@ -22,6 +22,8 @@ def __init__(self, fg_wl: Workload, bg_wl: Workload, skt_id: int) -> None: self._isolator_map: Mapping[Type[Isolator], Isolator] = dict() self._cur_isolator: Isolator = IsolationPolicy._IDLE_ISOLATOR + self._aggr_ipc_diff: float = None + def __hash__(self) -> int: return self._fg_wl.pid @@ -94,6 +96,45 @@ def cur_isolator(self) -> Isolator: def name(self) -> str: return f'{self._fg_wl.name}({self._fg_wl.pid})' + @property + def aggr_ipc(self) -> float: + return self._aggr_ipc_diff + + @property + def most_cont_workload(self) -> Workload: + fg_wl = self.foreground_workload + bg_wl = self.background_workload + + fg_ipc_diff = fg_wl.ipc_diff + bg_ipc_diff = bg_wl.ipc_diff + + # FIXME: Below condition is likely to fail due to too little differences between fg and bg + if fg_ipc_diff < bg_ipc_diff: + return fg_wl + else: + return bg_wl + + @property + def least_cont_workload(self) -> Workload: + fg_wl = self.foreground_workload + bg_wl = self.background_workload + + fg_ipc_diff = fg_wl.ipc_diff + bg_ipc_diff = bg_wl.ipc_diff + + # FIXME: Below condition is likely to fail due to too little differences between fg and bg + if fg_ipc_diff > bg_ipc_diff: + return fg_wl + else: + return bg_wl + + def update_aggr_ipc(self) -> None: + fg_diff = self._fg_wl.calc_metric_diff() + bg_diff = self._bg_wl.calc_metric_diff() + self._fg_wl._ipc_diff = fg_diff.ipc + self._bg_wl._ipc_diff = bg_diff.ipc + self._aggr_ipc_diff = fg_diff.ipc + bg_diff.ipc + def set_idle_isolator(self) -> None: self._cur_isolator.yield_isolation() self._cur_isolator = IsolationPolicy._IDLE_ISOLATOR diff --git a/isolating_controller/workload.py b/isolating_controller/workload.py index 805f2a1..5bf658f 100644 --- a/isolating_controller/workload.py +++ b/isolating_controller/workload.py @@ -31,7 +31,8 @@ def __init__(self, name: str, wl_type: str, pid: int, perf_pid: int, perf_interv self._perf_interval = perf_interval self._proc_info = psutil.Process(pid) - self._socket_id = None + self._socket_id: int = None + self._ipc_diff: float = None def __repr__(self) -> str: return f'{self._name} (pid: {self._pid})' @@ -73,6 +74,10 @@ def perf_interval(self): def is_running(self) -> bool: return self._proc_info.is_running() + @property + def ipc_diff(self) -> float: + return self._ipc_diff + def calc_metric_diff(self) -> MetricDiff: solorun_data = data_map[self.name] curr_metric: BasicMetric = self._metrics[0] diff --git a/swap_iso.py b/swap_iso.py new file mode 100644 index 0000000..49f2138 --- /dev/null +++ b/swap_iso.py @@ -0,0 +1,81 @@ +# coding: UTF-8 + +import logging + +from enum import IntEnum +from typing import Dict, Set, Optional + +from isolating_controller.workload import Workload +from isolating_controller.isolation.policies.base_policy import IsolationPolicy + + +class SwapNextStep(IntEnum): + OUT = 0 + IN = 1 + + +class SwapIsolator: + # FIXME: This threshold needs tests (How big diff is right for swapping workloads?) + _DIFF_THRESHOLD = 0.001 + + def __init__(self, isolation_groups: Dict[int, IsolationPolicy]) -> None: + """ + + :param isolation_groups: Dict. Key is the number of group and Value is the group itself + """ + self._all_groups = isolation_groups + self._swap_candidates: Dict[SwapNextStep, Workload] = dict() + + self._most_cont_group: Optional[IsolationPolicy] = None + self._least_cont_group: Optional[IsolationPolicy] = None + + self._most_cont_workload: Optional[Workload] = None + self._least_cont_workload: Optional[Workload] = None + + self.ipc_diffs: Dict[float, int] = dict() # key:val = aggr_ipc_diff:grp_idx + + def __del__(self): + logger = logging.getLogger(__name__) + + def update_cont_group(self) -> None: + """ + Most contentious group is the group which shows "the LOWEST aggr. ipc diff" + Least contentious group is the group which shows "the HIGHEST aggr. ipc diff" + + Assumption : Swap Isolator swaps workloads between the most cont. group and the least cont. group + """ + all_ipc_diffs = list() + + # Update Aggr. IPC Diffs of All Groups + for grp_idx, group in self._all_groups.items(): + group.update_aggr_ipc() + aggr_ipc_diff = group.aggr_ipc + all_ipc_diffs.append(aggr_ipc_diff) + self.ipc_diffs[aggr_ipc_diff] = grp_idx + + max_aggr_ipc_diff = max(all_ipc_diffs) + min_aggr_ipc_diff = min(all_ipc_diffs) + + swap_out_grp = self.ipc_diffs[max_aggr_ipc_diff] + swap_in_grp = self.ipc_diffs[min_aggr_ipc_diff] + + self._most_cont_group = swap_out_grp + self._least_cont_group = swap_in_grp + + def choose_swap_candidates(self): + swap_out_grp = self._most_cont_group + swap_in_grp = self._least_cont_group + + # FIXME: This part depends on the swap policy (Which one is selected for swapping) + swap_out_wl = swap_out_grp.most_cont_workload + swap_in_wl = swap_in_grp.most_cont_workload # It selects the bg workload in swap_in group + + self._swap_candidates[SwapNextStep.OUT] = swap_out_wl + self._swap_candidates[SwapNextStep.IN] = swap_in_wl + + def first_decision(self): + return + + def enforce(self): + return + From 70e50e5580b9f1577a378443085223694d40bca1 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Thu, 27 Sep 2018 05:17:18 +0900 Subject: [PATCH 16/20] feat: Add SwapIsolator and related code --- controller.py | 15 +++- .../isolation/policies/base_policy.py | 23 +++++ .../metric_container/basic_metric.py | 4 +- isolating_controller/utils/cgroup.py | 6 ++ swap_iso.py | 90 ++++++++++++++++--- 5 files changed, 121 insertions(+), 17 deletions(-) diff --git a/controller.py b/controller.py index 201c1a5..e7fc6d4 100755 --- a/controller.py +++ b/controller.py @@ -24,7 +24,7 @@ from isolating_controller.metric_container.basic_metric import BasicMetric from isolating_controller.workload import Workload from pending_queue import PendingQueue -from threading import RLock +from swap_iso import SwapIsolator MIN_PYTHON = (3, 6) @@ -140,11 +140,14 @@ def __init__(self, pending_queue: PendingQueue) -> None: self._interval: float = 0.2 # Scheduling interval self._isolation_groups: Dict[IsolationPolicy, int] = dict() + self._all_groups: Dict[int, IsolationPolicy] = dict() + self._swapper: SwapIsolator = None def _isolate_workloads(self) -> None: logger = logging.getLogger(__name__) - ## TODO: Swapper may come here + # TODO: Swapper may come here + self._swapper.try_swap() for group, iteration_num in self._isolation_groups.items(): logger.info('') @@ -193,6 +196,12 @@ def _register_pending_workloads(self) -> None: self._isolation_groups[pending_group] = 0 pending_group.init_isolators() + # init self._all_groups if pending_group exist + if len(self._isolation_groups) > 0: + all_groups = list(self._isolation_groups.keys()) + for idx, group in enumerate(all_groups): + self._all_groups[idx] = group + def _remove_ended_groups(self) -> None: """ deletes the finished workloads(threads) from the dict. @@ -215,6 +224,8 @@ def run(self) -> None: logger = logging.getLogger(__name__) logger.info('starting isolation loop') + # Swapper init + self._swapper = SwapIsolator(self._all_groups) while True: self._remove_ended_groups() self._register_pending_workloads() diff --git a/isolating_controller/isolation/policies/base_policy.py b/isolating_controller/isolation/policies/base_policy.py index 6c56413..b215ae1 100644 --- a/isolating_controller/isolation/policies/base_policy.py +++ b/isolating_controller/isolation/policies/base_policy.py @@ -128,6 +128,19 @@ def least_cont_workload(self) -> Workload: else: return bg_wl + @property + def least_mem_bw_workload(self) -> Workload: + fg_wl = self.foreground_workload + bg_wl = self.background_workload + + fg_mem_bw = fg_wl.metrics[0].local_mem_ps() + bg_mem_bw = bg_wl.metrics[0].local_mem_ps() + + if fg_mem_bw > bg_mem_bw: + return bg_wl + else: + return fg_wl + def update_aggr_ipc(self) -> None: fg_diff = self._fg_wl.calc_metric_diff() bg_diff = self._bg_wl.calc_metric_diff() @@ -135,6 +148,16 @@ def update_aggr_ipc(self) -> None: self._bg_wl._ipc_diff = bg_diff.ipc self._aggr_ipc_diff = fg_diff.ipc + bg_diff.ipc + def contention_diff(self, rtype: ResourceType) -> float: + fg_diff = self._fg_wl.calc_metric_diff() + bg_diff = self._bg_wl.calc_metric_diff() + if rtype is ResourceType.CPU: + return fg_diff.ipc + bg_diff.ipc + elif rtype is ResourceType.CACHE: + return fg_diff.l3_hit_ratio + bg_diff.l3_hit_ratio + elif rtype is ResourceType.MEMORY: + return fg_diff.local_mem_util_ps + bg_diff.local_mem_util_ps + def set_idle_isolator(self) -> None: self._cur_isolator.yield_isolation() self._cur_isolator = IsolationPolicy._IDLE_ISOLATOR diff --git a/isolating_controller/metric_container/basic_metric.py b/isolating_controller/metric_container/basic_metric.py index 8dc8fd0..363c680 100644 --- a/isolating_controller/metric_container/basic_metric.py +++ b/isolating_controller/metric_container/basic_metric.py @@ -73,7 +73,7 @@ def req_date(self): return self._req_date @property - def ipc(self) -> float: + def ipc(self): return self._instructions / self._cycles @property @@ -123,7 +123,7 @@ def __init__(self, curr: BasicMetric, prev: BasicMetric) -> None: self._l3_hit_ratio = curr.l3hit_ratio - prev.l3hit_ratio self._local_mem_ps = curr.local_mem_ps() / prev.local_mem_ps() - 1 self._remote_mem_ps = curr.remote_mem_ps() / prev.remote_mem_ps() - 1 - self._ipc = curr.ipc - prev.ipc + self._ipc = curr.ipc() / prev.ipc() - 1 @property def l3_hit_ratio(self): diff --git a/isolating_controller/utils/cgroup.py b/isolating_controller/utils/cgroup.py index 2f690fd..aadbf7b 100644 --- a/isolating_controller/utils/cgroup.py +++ b/isolating_controller/utils/cgroup.py @@ -59,3 +59,9 @@ def add_tasks(self, pids: Iterable[int]) -> None: def delete(self) -> None: subprocess.check_call(args=('sudo', 'cgdelete', '-r', '-g', self._group_path)) + + def enable_memory_migrate(self) -> None: + subprocess.check_call(args=('cgset', '-r', f'cpuset.memory_migrate=1', self._group_name)) + + def disable_memory_migrate(self) -> None: + subprocess.check_call(args=('cgset', '-r', f'cpuset.memory_migrate=0', self._group_name)) \ No newline at end of file diff --git a/swap_iso.py b/swap_iso.py index 49f2138..f22e249 100644 --- a/swap_iso.py +++ b/swap_iso.py @@ -1,12 +1,15 @@ # coding: UTF-8 +import os +import signal import logging from enum import IntEnum -from typing import Dict, Set, Optional +from typing import Dict, Optional, Tuple from isolating_controller.workload import Workload from isolating_controller.isolation.policies.base_policy import IsolationPolicy +from isolating_controller.utils.cgroup import Cgroup class SwapNextStep(IntEnum): @@ -15,15 +18,16 @@ class SwapNextStep(IntEnum): class SwapIsolator: - # FIXME: This threshold needs tests (How big diff is right for swapping workloads?) - _DIFF_THRESHOLD = 0.001 + # FIXME: This threshold needs tests (How small diff is right for swapping workloads?) + # "-0.5" means the IPCs of workloads in a group drop 50% compared to solo-run + _IPC_DIFF_THRESHOLD = -0.5 def __init__(self, isolation_groups: Dict[int, IsolationPolicy]) -> None: """ - :param isolation_groups: Dict. Key is the number of group and Value is the group itself + :param isolation_groups: Dict. Key is the index of group and Value is the group itself """ - self._all_groups = isolation_groups + self._all_groups: Dict[int, IsolationPolicy] = isolation_groups self._swap_candidates: Dict[SwapNextStep, Workload] = dict() self._most_cont_group: Optional[IsolationPolicy] = None @@ -32,10 +36,11 @@ def __init__(self, isolation_groups: Dict[int, IsolationPolicy]) -> None: self._most_cont_workload: Optional[Workload] = None self._least_cont_workload: Optional[Workload] = None - self.ipc_diffs: Dict[float, int] = dict() # key:val = aggr_ipc_diff:grp_idx + self.aggr_ipc_diffs: Dict[float, int] = dict() # key:val = aggr_ipc_diff:grp_idx def __del__(self): logger = logging.getLogger(__name__) + print('SwapIsolator is closed...') def update_cont_group(self) -> None: """ @@ -51,13 +56,14 @@ def update_cont_group(self) -> None: group.update_aggr_ipc() aggr_ipc_diff = group.aggr_ipc all_ipc_diffs.append(aggr_ipc_diff) - self.ipc_diffs[aggr_ipc_diff] = grp_idx + self.aggr_ipc_diffs[aggr_ipc_diff] = grp_idx max_aggr_ipc_diff = max(all_ipc_diffs) min_aggr_ipc_diff = min(all_ipc_diffs) - swap_out_grp = self.ipc_diffs[max_aggr_ipc_diff] - swap_in_grp = self.ipc_diffs[min_aggr_ipc_diff] + # Lower ipc diff means lower performance relative to solo-run + swap_out_grp = self.aggr_ipc_diffs[min_aggr_ipc_diff] + swap_in_grp = self.aggr_ipc_diffs[max_aggr_ipc_diff] self._most_cont_group = swap_out_grp self._least_cont_group = swap_in_grp @@ -67,8 +73,9 @@ def choose_swap_candidates(self): swap_in_grp = self._least_cont_group # FIXME: This part depends on the swap policy (Which one is selected for swapping) - swap_out_wl = swap_out_grp.most_cont_workload - swap_in_wl = swap_in_grp.most_cont_workload # It selects the bg workload in swap_in group + # TODO: Need Tests for Swap Overhead + swap_out_wl = swap_out_grp.least_mem_bw_workload + swap_in_wl = swap_in_grp.least_mem_bw_workload # It selects the bg workload in swap_in group self._swap_candidates[SwapNextStep.OUT] = swap_out_wl self._swap_candidates[SwapNextStep.IN] = swap_in_wl @@ -76,6 +83,63 @@ def choose_swap_candidates(self): def first_decision(self): return - def enforce(self): - return + def swap_is_needed(self) -> bool: + #aggr_ipc_diff_list = list() + #for _, group in self._all_groups.items(): + # aggr_ipc_diff_list.append(group.aggr_ipc) + + #min_ipc_diff = min(aggr_ipc_diff_list) + #avg_min_ipc_diff = min_ipc_diff/2 + # FIXME: We used the average ipc diff value (We assume two workloads in a group at most) + avg_min_ipc_diff = self._most_cont_group.aggr_ipc/2 + + # TODO: Test the _IPC_DIFF_THRESHOLD + if avg_min_ipc_diff < self._IPC_DIFF_THRESHOLD: + return True + else: + return False + + def do_swap(self) -> None: + # Enable CPUSET memory migration + out_proc, in_proc = self.pre_swap_setup() + + out_cpuset = self._swap_candidates[SwapNextStep.OUT].cpuset + in_cpuset = self._swap_candidates[SwapNextStep.IN].cpuset + out_skt = self._swap_candidates[SwapNextStep.OUT].socket_id + in_skt = self._swap_candidates[SwapNextStep.OUT].socket_id + + # Suspend Procs and Enforce Swap Conf. + os.kill(self._swap_candidates[SwapNextStep.OUT].pid, signal.SIGSTOP) + os.kill(self._swap_candidates[SwapNextStep.IN].pid, signal.SIGSTOP) + + out_proc.assign_cpus(set(in_cpuset)) + out_proc.assign_mems(set(out_skt)) + in_proc.assign_cpus(set(out_cpuset)) + in_proc.assign_mems(set(in_skt)) + + # Resume Procs + os.kill(self._swap_candidates[SwapNextStep.OUT].pid, signal.SIGCONT) + os.kill(self._swap_candidates[SwapNextStep.IN].pid, signal.SIGCONT) + + def pre_swap_setup(self) -> Tuple[Cgroup, Cgroup]: + swap_out_workload = self._swap_candidates[SwapNextStep.OUT] + swap_in_workload = self._swap_candidates[SwapNextStep.IN] + + swap_out_grp_name = f'{swap_out_workload.name}_{swap_out_workload.pid}' + swap_in_grp_name = f'{swap_in_workload.name}_{swap_in_workload.pid}' + + out_proc = Cgroup(swap_out_grp_name, 'cpuset,cpu') + in_proc = Cgroup(swap_in_grp_name, 'cpuset,cpu') + + out_proc.enable_memory_migrate() + in_proc.enable_memory_migrate() + + return out_proc, in_proc + + def try_swap(self) -> None: + self.update_cont_group() + self.choose_swap_candidates() + if self.swap_is_needed: + self.do_swap() + From fdfebd37a4e6555897a4ff8b36045eeea0b6d2d2 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Thu, 27 Sep 2018 21:55:27 +0900 Subject: [PATCH 17/20] fix: Modify CoreIso., MemoryIso., and dvfs.py to set workload's cpufreq. --- .../isolation/isolators/core.py | 9 +++++++ .../isolation/isolators/memory.py | 15 +++++++++-- isolating_controller/utils/dvfs.py | 25 ++++++++++++++++++- isolating_controller/workload.py | 18 +++++++++++++ 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py index fa24cdc..31bfb28 100644 --- a/isolating_controller/isolation/isolators/core.py +++ b/isolating_controller/isolation/isolators/core.py @@ -32,6 +32,11 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resour self._fg_cgroup = Cgroup(self._fg_grp_name, 'cpuset,cpu') self._bg_cgroup = Cgroup(self._bg_grp_name, 'cpuset,cpu') + self._fg_dvfs = self._foreground_wl.dvfs + self._bg_dvfs = self._bakcground_wl.dvfs + + foreground_wl._cgroup = self._fg_cgroup + background_wl._cgroup = self._bg_cgroup cpu_topo, mem_topo = NumaTopology.get_numa_info() self._cpu_topo: Dict[int, Set[int]] = cpu_topo @@ -136,6 +141,10 @@ def _enforce(self) -> None: self._bg_cgroup.assign_cpus(set(self._bg_cpuset)) self._fg_cgroup.assign_cpus(set(self._fg_cpuset)) + # Setting the current workloads' cpu frequencies to newly changed cpuset + self._bg_dvfs.set_freq_cgroup(self._bg_dvfs.cpufreq) + self._fg_dvfs.set_freq_cgroup(self._fg_dvfs.cpufreq) + def _first_decision(self) -> NextStep: curr_diff = None metric_diff = self._foreground_wl.calc_metric_diff() diff --git a/isolating_controller/isolation/isolators/memory.py b/isolating_controller/isolation/isolators/memory.py index 107cacb..44ba9e8 100644 --- a/isolating_controller/isolation/isolators/memory.py +++ b/isolating_controller/isolation/isolators/memory.py @@ -3,7 +3,7 @@ import logging from itertools import chain -from typing import Optional +from typing import Optional, Dict, Set from .base_isolator import Isolator from .. import NextStep, ResourceType @@ -18,13 +18,23 @@ class MemoryIsolator(Isolator): def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resource: Optional[ResourceType]) -> None: super().__init__(foreground_wl, background_wl, cont_resource) + self._fg_grp_name: str = f'{foreground_wl.name}_{foreground_wl.pid}' + self._bg_grp_name: str = f'{background_wl.name}_{background_wl.pid}' + + self._fg_affinity = foreground_wl.cpuset self._bg_affinity = background_wl.cpuset # FIXME: hard coded self._cur_step = DVFS.MAX + self._fg_dvfs: DVFS = DVFS(self._fg_grp_name, self._fg_affinity) + self._bg_dvfs: DVFS = DVFS(self._bg_grp_name, self._bg_affinity) + def __del__(self) -> None: - DVFS.set_freq(DVFS.MAX, chain(self._bg_affinity)) + if self._background_wl.is_running: + DVFS.set_freq(DVFS.MAX, chain(self._bg_affinity)) + if self._foreground_wl.is_running: + DVFS.set_freq(DVFS.MAX, chain(self._fg_affinity)) def strengthen(self) -> 'MemoryIsolator': self._cur_step -= DVFS.STEP @@ -49,6 +59,7 @@ def _enforce(self) -> None: logger.info(f'frequency of cpuset {self._background_wl.cpuset} is {self._cur_step / 1_000_000}GHz') DVFS.set_freq(self._cur_step, self._background_wl.cpuset) + self._bg_dvfs.save_freq(self._cur_step) def _first_decision(self) -> NextStep: metric_diff = self._foreground_wl.calc_metric_diff() diff --git a/isolating_controller/utils/dvfs.py b/isolating_controller/utils/dvfs.py index 8a0593c..835206e 100644 --- a/isolating_controller/utils/dvfs.py +++ b/isolating_controller/utils/dvfs.py @@ -2,7 +2,9 @@ import subprocess from pathlib import Path -from typing import Iterable +from typing import Iterable, Dict +from itertools import chain +from isolating_controller.utils.cgroup import Cgroup class DVFS: @@ -10,6 +12,27 @@ class DVFS: STEP = 100000 MAX = int(Path('/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq').read_text()) + def __init__(self, group_name, cpu_affinity): + self._group_name: str = group_name + self._cur_cgroup = Cgroup(self._group_name, 'cpuset,cpu') + self._cpufreq: Dict[int, int] = dict() + + # FIXME: hard coded to max freq. + self.set_freq_cgroup(DVFS.MAX) + + def set_freq_cgroup(self, target_freq: int): + cur_grp_cpuset = self._cur_cgroup._get_cpu_affinity_from_group() + DVFS.set_freq(target_freq, chain(cur_grp_cpuset)) + + @property + def cpufreq(self): + return self._cpufreq + + def save_freq(self, freq: int): + cpuset = self._cpufreq.keys() + for cpu_id in cpuset: + self._cpufreq[cpu_id] = freq + @staticmethod def set_freq(freq: int, cores: Iterable[int]) -> None: for core in cores: diff --git a/isolating_controller/workload.py b/isolating_controller/workload.py index 5bf658f..9989d40 100644 --- a/isolating_controller/workload.py +++ b/isolating_controller/workload.py @@ -7,6 +7,9 @@ import cpuinfo import psutil +from .utils.dvfs import DVFS +from .utils.cgroup import Cgroup +from .utils.resctrl import ResCtrl from .utils.numa_topology import NumaTopology from .metric_container.basic_metric import BasicMetric, MetricDiff from .solorun_data.datas import data_map @@ -33,6 +36,9 @@ def __init__(self, name: str, wl_type: str, pid: int, perf_pid: int, perf_interv self._proc_info = psutil.Process(pid) self._socket_id: int = None self._ipc_diff: float = None + self._cgroup: Cgroup = None + self._resctrl: ResCtrl = None + self._dvfs: DVFS = None def __repr__(self) -> str: return f'{self._name} (pid: {self._pid})' @@ -78,6 +84,18 @@ def is_running(self) -> bool: def ipc_diff(self) -> float: return self._ipc_diff + @property + def cgroup(self) -> Cgroup: + return self._cgroup + + @property + def resctrl(self) -> ResCtrl: + return self._resctrl + + @property + def dvfs(self) -> DVFS: + return self._dvfs + def calc_metric_diff(self) -> MetricDiff: solorun_data = data_map[self.name] curr_metric: BasicMetric = self._metrics[0] From 1bc0d358e57aac711b76e3a368f626d1a4476058 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Thu, 27 Sep 2018 22:03:04 +0900 Subject: [PATCH 18/20] fix: fix minor typo --- isolating_controller/isolation/isolators/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py index 31bfb28..abaa439 100644 --- a/isolating_controller/isolation/isolators/core.py +++ b/isolating_controller/isolation/isolators/core.py @@ -33,7 +33,7 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resour self._fg_cgroup = Cgroup(self._fg_grp_name, 'cpuset,cpu') self._bg_cgroup = Cgroup(self._bg_grp_name, 'cpuset,cpu') self._fg_dvfs = self._foreground_wl.dvfs - self._bg_dvfs = self._bakcground_wl.dvfs + self._bg_dvfs = self._background_wl.dvfs foreground_wl._cgroup = self._fg_cgroup background_wl._cgroup = self._bg_cgroup From 766596f59008e4f1b97ec2d7ade5a011f0405f84 Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Thu, 27 Sep 2018 22:35:38 +0900 Subject: [PATCH 19/20] feat: Add update_isolation_config func. to store setting to each class --- .../isolation/isolators/base_isolator.py | 9 +++++++++ isolating_controller/isolation/isolators/cache.py | 9 +++++++++ isolating_controller/isolation/isolators/core.py | 7 +++++++ isolating_controller/isolation/isolators/idle.py | 3 +++ isolating_controller/isolation/isolators/memory.py | 9 +++++++++ 5 files changed, 37 insertions(+) diff --git a/isolating_controller/isolation/isolators/base_isolator.py b/isolating_controller/isolation/isolators/base_isolator.py index 5c2f734..ca0a3a3 100644 --- a/isolating_controller/isolation/isolators/base_isolator.py +++ b/isolating_controller/isolation/isolators/base_isolator.py @@ -86,3 +86,12 @@ def decide_next_step(self) -> NextStep: else: return self._monitoring_result() + + @abstractmethod + def update_isolation_config(self) -> None: + """ + Update the isolator configuration parameters to each workloads after enforcing isolation + (E.g., Update the core frequencies after enforcing isolation) + :return: + """ + pass \ No newline at end of file diff --git a/isolating_controller/isolation/isolators/cache.py b/isolating_controller/isolation/isolators/cache.py index 9f84f71..db2f050 100644 --- a/isolating_controller/isolation/isolators/cache.py +++ b/isolating_controller/isolation/isolators/cache.py @@ -25,6 +25,10 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resour self._fg_resctrl = ResCtrl(self._fg_grp_name) self._bg_resctrl = ResCtrl(self._bg_grp_name) + # Save the CacheIsolator setting to `Workload` + foreground_wl._resctrl = self._fg_resctrl + background_wl._resctrl = self._bg_resctrl + def __del__(self) -> None: logger = logging.getLogger(__name__) @@ -109,6 +113,7 @@ def _enforce(self) -> None: if bg_socket_id == 1: ResCtrl.assign_llc(self._bg_resctrl, '1', bg_mask) + self.update_isolation_config() def _first_decision(self) -> NextStep: metric_diff = self._foreground_wl.calc_metric_diff() @@ -159,3 +164,7 @@ def _monitoring_result(self) -> NextStep: return NextStep.STOP else: return NextStep.STRENGTHEN + + def update_isolation_config(self) -> None: + self._foreground_wl._resctrl = self._fg_resctrl + self._background_wl._resctrl = self._bg_resctrl \ No newline at end of file diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py index abaa439..4282787 100644 --- a/isolating_controller/isolation/isolators/core.py +++ b/isolating_controller/isolation/isolators/core.py @@ -35,6 +35,7 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resour self._fg_dvfs = self._foreground_wl.dvfs self._bg_dvfs = self._background_wl.dvfs + # Save the CoreIsolator setting to `Workload` foreground_wl._cgroup = self._fg_cgroup background_wl._cgroup = self._bg_cgroup @@ -145,6 +146,8 @@ def _enforce(self) -> None: self._bg_dvfs.set_freq_cgroup(self._bg_dvfs.cpufreq) self._fg_dvfs.set_freq_cgroup(self._fg_dvfs.cpufreq) + self.update_isolation_config() + def _first_decision(self) -> NextStep: curr_diff = None metric_diff = self._foreground_wl.calc_metric_diff() @@ -263,3 +266,7 @@ def fg_weaken_cond(self, fg_ipc_diff) -> bool: return True else: return False + + def update_isolation_config(self) -> None: + self._foreground_wl._cgroup = self._fg_cgroup + self._background_wl._cgroup = self._bg_cgroup diff --git a/isolating_controller/isolation/isolators/idle.py b/isolating_controller/isolation/isolators/idle.py index 9a2ec15..510ba19 100644 --- a/isolating_controller/isolation/isolators/idle.py +++ b/isolating_controller/isolation/isolators/idle.py @@ -37,3 +37,6 @@ def _monitoring_result(self) -> NextStep: self._fg_next_step = NextStep.IDLE self._bg_next_step = NextStep.IDLE return NextStep.IDLE + + def update_isolation_config(self) -> None: + pass \ No newline at end of file diff --git a/isolating_controller/isolation/isolators/memory.py b/isolating_controller/isolation/isolators/memory.py index 44ba9e8..b2422b3 100644 --- a/isolating_controller/isolation/isolators/memory.py +++ b/isolating_controller/isolation/isolators/memory.py @@ -30,6 +30,10 @@ def __init__(self, foreground_wl: Workload, background_wl: Workload, cont_resour self._fg_dvfs: DVFS = DVFS(self._fg_grp_name, self._fg_affinity) self._bg_dvfs: DVFS = DVFS(self._bg_grp_name, self._bg_affinity) + # Save the MemoryIsolator setting to `Workload` + foreground_wl._dvfs = self._fg_dvfs + background_wl._dvfs = self._bg_dvfs + def __del__(self) -> None: if self._background_wl.is_running: DVFS.set_freq(DVFS.MAX, chain(self._bg_affinity)) @@ -60,6 +64,7 @@ def _enforce(self) -> None: DVFS.set_freq(self._cur_step, self._background_wl.cpuset) self._bg_dvfs.save_freq(self._cur_step) + self.update_isolation_config() def _first_decision(self) -> NextStep: metric_diff = self._foreground_wl.calc_metric_diff() @@ -108,3 +113,7 @@ def _monitoring_result(self) -> NextStep: return NextStep.STOP else: return NextStep.STRENGTHEN + + def update_isolation_config(self) -> None: + self._foreground_wl._dvfs = self._fg_dvfs + self._background_wl._dvfs = self._bg_dvfs From c251e14b3eedff29b9865190176b0aa638b56cdb Mon Sep 17 00:00:00 2001 From: Yoonsung Nam Date: Sat, 29 Sep 2018 16:05:01 +0900 Subject: [PATCH 20/20] fix: Add swap setting related codes --- isolating_controller/utils/cgroup.py | 10 +- isolating_controller/utils/dvfs.py | 2 +- isolating_controller/utils/resctrl.py | 43 +++++++- swap_iso.py | 143 +++++++++++++++++++------- 4 files changed, 151 insertions(+), 47 deletions(-) diff --git a/isolating_controller/utils/cgroup.py b/isolating_controller/utils/cgroup.py index aadbf7b..8d2e2e8 100644 --- a/isolating_controller/utils/cgroup.py +++ b/isolating_controller/utils/cgroup.py @@ -36,19 +36,25 @@ def assign_mems(self, socket_set: Set[int]) -> None: mem_ids = ','.join(map(str, socket_set)) subprocess.check_call(args=('cgset', '-r', f'cpuset.mems={mem_ids}', self._group_name)) - def _get_cpu_affinity_from_group(self) -> Set[int]: + def get_cpu_affinity_from_group(self) -> Set[int]: with open(f'{Cgroup.CPUSET_MOUNT_POINT}/{self._group_name}/cpuset.cpus', "r") as fp: line: str = fp.readline() core_set: Set[int] = convert_to_set(line) return core_set + def get_mem_affinity_from_group(self) -> Set[int]: + with open(f'{Cgroup.CPUSET_MOUNT_POINT}/{self._group_name}/cpuset.mems', "r") as fp: + line: str = fp.readline() + mem_set: Set[int] = convert_to_set(line) + return mem_set + def limit_cpu_quota(self, limit_percentage: float, period: Optional[int]=None) -> None: if period is None: with open(f'{Cgroup.CPU_MOUNT_POINT}/cpu.cfs_period_us', "r") as fp: line: str = fp.readline() period = int(line) - cpu_cores = self._get_cpu_affinity_from_group() + cpu_cores = self.get_cpu_affinity_from_group() quota = int(period * limit_percentage/100 * len(cpu_cores)) subprocess.check_call(args=('cgset', '-r', f'cpu.cfs_quota_us={quota}', self._group_name)) diff --git a/isolating_controller/utils/dvfs.py b/isolating_controller/utils/dvfs.py index 835206e..2adfc9e 100644 --- a/isolating_controller/utils/dvfs.py +++ b/isolating_controller/utils/dvfs.py @@ -21,7 +21,7 @@ def __init__(self, group_name, cpu_affinity): self.set_freq_cgroup(DVFS.MAX) def set_freq_cgroup(self, target_freq: int): - cur_grp_cpuset = self._cur_cgroup._get_cpu_affinity_from_group() + cur_grp_cpuset = self._cur_cgroup.get_cpu_affinity_from_group() DVFS.set_freq(target_freq, chain(cur_grp_cpuset)) @property diff --git a/isolating_controller/utils/resctrl.py b/isolating_controller/utils/resctrl.py index d7f13ed..0bf3825 100644 --- a/isolating_controller/utils/resctrl.py +++ b/isolating_controller/utils/resctrl.py @@ -1,12 +1,8 @@ # coding: UTF-8 import subprocess -import asyncio from pathlib import Path -from typing import Dict, Iterable, List, Tuple - -#import aiofiles -#from aiofiles.base import AiofilesContextManager +from typing import List def len_of_mask(mask: str) -> int: @@ -65,3 +61,40 @@ def gen_mask(start: int, end: int = None) -> str: def remove_group(self) -> None: subprocess.check_call(args=('sudo', 'rmdir', str(ResCtrl.MOUNT_POINT / self._group_name))) + + def get_llc_mask(self) -> List[str]: + """ + :return: `socket_masks` which is the elements of list in hex_str + """ + proc = subprocess.Popen(['cat', f'{ResCtrl.MOUNT_POINT}/{self._group_name}/schemata'], + stdout=subprocess.PIPE) + line = proc.communicate()[0] + striped_schema_line = line.lstrip('L3:').split(';') + socket_masks = list() + for i, item in enumerate(striped_schema_line): + mask = item.lstrip(f'{i}=') + socket_masks.append(mask) + return socket_masks + + @staticmethod + def get_llc_bits_from_mask(input_list: List[str]) -> List[int]: + """ + :param input_list: Assuming the elements of list is hex_str such as "0xfffff" + :return: + """ + output_list = list() + for mask in input_list: + hex_str = mask + hex_int = int(hex_str, 16) + bin_tmp = bin(hex_int) + llc_bits = len(bin_tmp.lstrip('0b')) + output_list.append(llc_bits) + return output_list + + def read_llc_bits(self) -> int: + socket_masks = self.get_llc_mask() + llc_bits_list = ResCtrl.get_llc_bits_from_mask(socket_masks) + ret_llc_bits = 0 + for llc_bits in llc_bits_list: + ret_llc_bits += llc_bits + return ret_llc_bits \ No newline at end of file diff --git a/swap_iso.py b/swap_iso.py index f22e249..b4ed2ff 100644 --- a/swap_iso.py +++ b/swap_iso.py @@ -10,13 +10,53 @@ from isolating_controller.workload import Workload from isolating_controller.isolation.policies.base_policy import IsolationPolicy from isolating_controller.utils.cgroup import Cgroup +from isolating_controller.isolation.isolators import CacheIsolator, CoreIsolator, MemoryIsolator -class SwapNextStep(IntEnum): +class SwapNext(IntEnum): OUT = 0 IN = 1 +class IsoSetting: + def __init__(self, wl: Workload): + self._wl = wl + self._skt_id = wl.socket_id + self._pid = wl.pid + self._cpuset = wl.cgroup.get_cpu_affinity_from_group() + self._mems = wl.cgroup.get_mem_affinity_from_group() + self._cpufreq = wl.dvfs.cpufreq() + self._llc_masks = wl.resctrl.get_llc_mask() + + @property + def workload(self): + return self._wl + + @property + def socket_id(self): + return self._skt_id + + @property + def pid(self): + return self._pid + + @property + def cpuset(self): + return self._cpuset + + @property + def mems(self): + return self._mems + + @property + def cpufreq(self): + return self._cpufreq + + @property + def llc_masks(self): + return self._llc_masks + + class SwapIsolator: # FIXME: This threshold needs tests (How small diff is right for swapping workloads?) # "-0.5" means the IPCs of workloads in a group drop 50% compared to solo-run @@ -24,11 +64,10 @@ class SwapIsolator: def __init__(self, isolation_groups: Dict[int, IsolationPolicy]) -> None: """ - :param isolation_groups: Dict. Key is the index of group and Value is the group itself """ self._all_groups: Dict[int, IsolationPolicy] = isolation_groups - self._swap_candidates: Dict[SwapNextStep, Workload] = dict() + self._swap_candidates: Dict[SwapNext, Workload] = dict() self._most_cont_group: Optional[IsolationPolicy] = None self._least_cont_group: Optional[IsolationPolicy] = None @@ -36,7 +75,9 @@ def __init__(self, isolation_groups: Dict[int, IsolationPolicy]) -> None: self._most_cont_workload: Optional[Workload] = None self._least_cont_workload: Optional[Workload] = None - self.aggr_ipc_diffs: Dict[float, int] = dict() # key:val = aggr_ipc_diff:grp_idx + # FIXME: Aggr. IPC Diffs may be changed to Agg. Inst. Diffs + self.aggr_ipc_diffs: Dict[float, IsolationPolicy] = dict() # key:val = aggr_ipc_diff:group + self._saved_group_setting: Tuple[IsoSetting, IsoSetting] = None def __del__(self): logger = logging.getLogger(__name__) @@ -56,7 +97,7 @@ def update_cont_group(self) -> None: group.update_aggr_ipc() aggr_ipc_diff = group.aggr_ipc all_ipc_diffs.append(aggr_ipc_diff) - self.aggr_ipc_diffs[aggr_ipc_diff] = grp_idx + self.aggr_ipc_diffs[aggr_ipc_diff] = group max_aggr_ipc_diff = max(all_ipc_diffs) min_aggr_ipc_diff = min(all_ipc_diffs) @@ -68,7 +109,7 @@ def update_cont_group(self) -> None: self._most_cont_group = swap_out_grp self._least_cont_group = swap_in_grp - def choose_swap_candidates(self): + def choose_swap_candidates(self) -> None: swap_out_grp = self._most_cont_group swap_in_grp = self._least_cont_group @@ -77,19 +118,10 @@ def choose_swap_candidates(self): swap_out_wl = swap_out_grp.least_mem_bw_workload swap_in_wl = swap_in_grp.least_mem_bw_workload # It selects the bg workload in swap_in group - self._swap_candidates[SwapNextStep.OUT] = swap_out_wl - self._swap_candidates[SwapNextStep.IN] = swap_in_wl - - def first_decision(self): - return + self._swap_candidates[SwapNext.OUT] = swap_out_wl + self._swap_candidates[SwapNext.IN] = swap_in_wl def swap_is_needed(self) -> bool: - #aggr_ipc_diff_list = list() - #for _, group in self._all_groups.items(): - # aggr_ipc_diff_list.append(group.aggr_ipc) - - #min_ipc_diff = min(aggr_ipc_diff_list) - #avg_min_ipc_diff = min_ipc_diff/2 # FIXME: We used the average ipc diff value (We assume two workloads in a group at most) avg_min_ipc_diff = self._most_cont_group.aggr_ipc/2 @@ -101,45 +133,78 @@ def swap_is_needed(self) -> bool: def do_swap(self) -> None: # Enable CPUSET memory migration - out_proc, in_proc = self.pre_swap_setup() + self.set_memory_migrate_on() - out_cpuset = self._swap_candidates[SwapNextStep.OUT].cpuset - in_cpuset = self._swap_candidates[SwapNextStep.IN].cpuset - out_skt = self._swap_candidates[SwapNextStep.OUT].socket_id - in_skt = self._swap_candidates[SwapNextStep.OUT].socket_id + out_iso_conf = self._saved_group_setting[SwapNext.OUT] + in_iso_conf = self._saved_group_setting[SwapNext.IN] # Suspend Procs and Enforce Swap Conf. - os.kill(self._swap_candidates[SwapNextStep.OUT].pid, signal.SIGSTOP) - os.kill(self._swap_candidates[SwapNextStep.IN].pid, signal.SIGSTOP) + os.kill(out_iso_conf.pid, signal.SIGSTOP) + os.kill(in_iso_conf.pid, signal.SIGSTOP) - out_proc.assign_cpus(set(in_cpuset)) - out_proc.assign_mems(set(out_skt)) - in_proc.assign_cpus(set(out_cpuset)) - in_proc.assign_mems(set(in_skt)) + self.apply_saved_iso_setting() # Resume Procs - os.kill(self._swap_candidates[SwapNextStep.OUT].pid, signal.SIGCONT) - os.kill(self._swap_candidates[SwapNextStep.IN].pid, signal.SIGCONT) + os.kill(out_iso_conf.pid, signal.SIGCONT) + os.kill(in_iso_conf.pid, signal.SIGCONT) - def pre_swap_setup(self) -> Tuple[Cgroup, Cgroup]: - swap_out_workload = self._swap_candidates[SwapNextStep.OUT] - swap_in_workload = self._swap_candidates[SwapNextStep.IN] + def set_memory_migrate_on(self) -> None: + swap_out_workload = self._swap_candidates[SwapNext.OUT] + swap_in_workload = self._swap_candidates[SwapNext.IN] swap_out_grp_name = f'{swap_out_workload.name}_{swap_out_workload.pid}' swap_in_grp_name = f'{swap_in_workload.name}_{swap_in_workload.pid}' - out_proc = Cgroup(swap_out_grp_name, 'cpuset,cpu') - in_proc = Cgroup(swap_in_grp_name, 'cpuset,cpu') - - out_proc.enable_memory_migrate() - in_proc.enable_memory_migrate() + out_cgroup = Cgroup(swap_out_grp_name, 'cpuset,cpu') + in_cgroup = Cgroup(swap_in_grp_name, 'cpuset,cpu') - return out_proc, in_proc + out_cgroup.enable_memory_migrate() + in_cgroup.enable_memory_migrate() def try_swap(self) -> None: self.update_cont_group() self.choose_swap_candidates() if self.swap_is_needed: + self.save_group_setting() self.do_swap() + def save_group_setting(self) -> None: + # TODO: Before do_swap, swapper should save the group's isolation setting + out_proc: Workload = self._swap_candidates[SwapNext.OUT] + in_proc: Workload = self._swap_candidates[SwapNext.IN] + out_iso_conf = IsoSetting(out_proc) + in_iso_conf = IsoSetting(in_proc) + self._saved_group_setting = (out_iso_conf, in_iso_conf) + + def apply_saved_iso_setting(self) -> None: + # TODO: After do_swap, swapper should load the group's isolation setting + out_iso_conf = self._saved_group_setting[SwapNext.OUT] + in_iso_conf = self._saved_group_setting[SwapNext.IN] + + swap_out_wl = out_iso_conf.workload + swap_in_wl = in_iso_conf.workload + + # Apply CPUSET + swap_out_wl.cgroup.assign_cpus(in_iso_conf.cpuset) + swap_in_wl.cgroup.assign_cpus(out_iso_conf.cpuset) + + # Apply Mems + swap_out_wl.cgroup.assign_mems(in_iso_conf.mems) + swap_in_wl.cgroup.assign_mems(out_iso_conf.mems) + + # Apply CPU freq + swap_out_wl.dvfs.set_freq_cgroup(in_iso_conf.cpufreq) + swap_in_wl.dvfs.set_freq_cgroup(out_iso_conf.cpufreq) + + # Apply llc masks + swap_out_wl.resctrl.assign_llc(in_iso_conf.llc_masks) + swap_in_wl.resctrl.assign_llc(out_iso_conf.llc_masks) + + + + + + + +