diff --git a/controller.py b/controller.py index 994df41..f63b014 100755 --- a/controller.py +++ b/controller.py @@ -2,140 +2,42 @@ # coding: UTF-8 import argparse -import functools -import json +import datetime import logging +import os import subprocess import sys import time -from threading import Thread -from typing import Dict +from typing import Dict, Optional -import pika import psutil -from pika import BasicProperties -from pika.adapters.blocking_connection import BlockingChannel -from pika.spec import Basic import isolating_controller from isolating_controller.isolation import NextStep from isolating_controller.isolation.isolators import Isolator -from isolating_controller.isolation.policies import DiffPolicy, IsolationPolicy -from isolating_controller.metric_container.basic_metric import BasicMetric -from isolating_controller.workload import Workload +from isolating_controller.isolation.policies import AggressiveWViolationPolicy, IsolationPolicy +from isolating_controller.isolation.swapper import SwapIsolator from pending_queue import PendingQueue +from polling_thread import PollingThread MIN_PYTHON = (3, 6) -class Singleton(type): - _instances = {} - - def __call__(cls, *args, **kwargs): - if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) - return cls._instances[cls] - - -class MainController(metaclass=Singleton): +class Controller: def __init__(self, metric_buf_size: int) -> None: - self._metric_buf_size = metric_buf_size - - self._rmq_host = 'localhost' - self._rmq_creation_queue = 'workload_creation' - - self._pending_wl = PendingQueue(DiffPolicy) - self._control_thread = ControlThread(self._pending_wl) - - def _cbk_wl_creation(self, ch: BlockingChannel, method: Basic.Deliver, _: BasicProperties, body: bytes) -> None: - ch.basic_ack(method.delivery_tag) - - arr = body.decode().strip().split(',') - - logger = logging.getLogger('monitoring.workload_creation') - logger.debug(f'{arr} is received from workload_creation queue') - - if len(arr) != 4: - return - - wl_name, pid, perf_pid, perf_interval = arr - pid = int(pid) - perf_pid = int(perf_pid) - perf_interval = int(perf_interval) - - if not psutil.pid_exists(pid): - return + self._pending_queue: PendingQueue = PendingQueue(AggressiveWViolationPolicy) - workload = Workload(wl_name, pid, perf_pid, perf_interval) + self._interval: float = 0.2 # scheduling interval (sec) + self._profile_interval: float = 1.0 # check interval for phase change (sec) + self._solorun_interval: float = 2.0 # the FG's solorun profiling interval (sec) + self._solorun_count: Dict[IsolationPolicy, Optional[int]] = dict() - # FIXME: hard coded - if wl_name == 'SP': - self._pending_wl.add_bg(workload) - else: - self._pending_wl.add_fg(workload) - - logger.info(f'{workload} is created') - - wl_queue_name = '{}({})'.format(wl_name, pid) - ch.queue_declare(wl_queue_name) - ch.basic_consume(functools.partial(self._cbk_wl_monitor, workload), wl_queue_name) - - def _cbk_wl_monitor(self, workload: Workload, - ch: BlockingChannel, method: Basic.Deliver, _: BasicProperties, body: bytes) -> None: - metric = json.loads(body.decode()) - ch.basic_ack(method.delivery_tag) - - item = BasicMetric(metric['l2miss'], - metric['l3miss'], - metric['instructions'], - metric['cycles'], - metric['stall_cycles'], - metric['wall_cycles'], - metric['intra_coh'], - metric['inter_coh'], - metric['llc_size'], - metric['local_mem'], - metric['remote_mem'], - workload.perf_interval) - - logger = logging.getLogger(f'monitoring.metric.{workload}') - logger.debug(f'{metric} is given from ') - - metric_que = workload.metrics - - if len(metric_que) == self._metric_buf_size: - metric_que.pop() - - metric_que.appendleft(item) - - def run(self) -> None: - logger = logging.getLogger(__name__) - - self._control_thread.start() - - connection = pika.BlockingConnection(pika.ConnectionParameters(host=self._rmq_host)) - channel = connection.channel() - - channel.queue_declare(self._rmq_creation_queue) - channel.basic_consume(self._cbk_wl_creation, self._rmq_creation_queue) - - try: - logger.debug('starting consuming thread') - channel.start_consuming() - - except KeyboardInterrupt: - channel.close() - connection.close() - - -class ControlThread(Thread): - def __init__(self, pending_queue: PendingQueue) -> None: - super().__init__(daemon=True) + self._isolation_groups: Dict[IsolationPolicy, int] = dict() - self._pending_queue: PendingQueue = pending_queue + self._polling_thread = PollingThread(metric_buf_size, self._pending_queue) - self._interval: int = 2 # Scheduling interval - self._isolation_groups: Dict[IsolationPolicy, int] = dict() + # Swapper init + self._swapper: SwapIsolator = SwapIsolator(self._isolation_groups) def _isolate_workloads(self) -> None: logger = logging.getLogger(__name__) @@ -145,6 +47,28 @@ def _isolate_workloads(self) -> None: logger.info(f'***************isolation of {group.name} #{iteration_num}***************') try: + if group.in_solorun_profiling: + if iteration_num - self._solorun_count[group] >= int(self._solorun_interval / self._interval): + logger.info('Stopping solorun profiling...') + + group.stop_solorun_profiling() + del self._solorun_count[group] + + logger.info('skipping isolation... because corun data isn\'t collected yet') + else: + logger.info('skipping isolation because of solorun profiling...') + + continue + + # TODO: first expression can lead low reactivity + elif iteration_num % int(self._profile_interval / self._interval) == 0 and group.profile_needed(): + logger.info('Starting solorun profiling...') + group.start_solorun_profiling() + self._solorun_count[group] = iteration_num + group.set_idle_isolator() + logger.info('skipping isolation because of solorun profiling...') + continue + if group.new_isolator_needed: group.choose_next_isolator() @@ -173,6 +97,10 @@ def _isolate_workloads(self) -> None: finally: self._isolation_groups[group] += 1 + if len(tuple(g for g in self._isolation_groups if g.safe_to_swap)) >= 2: + if self._swapper.swap_is_needed(): + self._swapper.do_swap() + def _register_pending_workloads(self) -> None: """ This function detects and registers the spawned workloads(threads). @@ -185,7 +113,6 @@ def _register_pending_workloads(self) -> None: logger.info(f'{pending_group} is created') self._isolation_groups[pending_group] = 0 - pending_group.init_isolators() def _remove_ended_groups(self) -> None: """ @@ -203,9 +130,15 @@ def _remove_ended_groups(self) -> None: logger.info(f'{group} of {ended_workload.name} is ended') # remove from containers + group.reset() del self._isolation_groups[group] + if group.in_solorun_profiling: + group.background_workload.resume() + del self._solorun_count[group] def run(self) -> None: + self._polling_thread.start() + logger = logging.getLogger(__name__) logger.info('starting isolation loop') @@ -214,7 +147,6 @@ def run(self) -> None: self._register_pending_workloads() time.sleep(self._interval) - self._isolate_workloads() @@ -223,24 +155,32 @@ def main() -> None: parser.add_argument('-b', '--metric-buf-size', dest='buf_size', default='50', type=int, help='metric buffer size per thread. (default : 50)') + os.makedirs('logs', exist_ok=True) + args = parser.parse_args() + formatter = logging.Formatter('%(asctime)s [%(levelname)s]: %(message)s') stream_handler = logging.StreamHandler() - stream_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s]: %(message)s')) + file_handler = logging.FileHandler(f'logs/debug_{datetime.datetime.now().isoformat()}.log') + stream_handler.setFormatter(formatter) + file_handler.setFormatter(formatter) controller_logger = logging.getLogger(__name__) controller_logger.setLevel(logging.INFO) controller_logger.addHandler(stream_handler) + controller_logger.addHandler(file_handler) module_logger = logging.getLogger(isolating_controller.__name__) module_logger.setLevel(logging.DEBUG) module_logger.addHandler(stream_handler) + module_logger.addHandler(file_handler) monitoring_logger = logging.getLogger('monitoring') monitoring_logger.setLevel(logging.INFO) monitoring_logger.addHandler(stream_handler) + monitoring_logger.addHandler(file_handler) - controller = MainController(args.buf_size) + controller = Controller(args.buf_size) controller.run() diff --git a/isolating_controller/isolation/__init__.py b/isolating_controller/isolation/__init__.py index 104511c..9075717 100644 --- a/isolating_controller/isolation/__init__.py +++ b/isolating_controller/isolation/__init__.py @@ -8,3 +8,10 @@ class NextStep(IntEnum): WEAKEN = 2 STOP = 3 IDLE = 4 + + +class ResourceType(IntEnum): + CPU = 0 + CACHE = 1 + MEMORY = 2 + Unknown = 3 diff --git a/isolating_controller/isolation/isolators/__init__.py b/isolating_controller/isolation/isolators/__init__.py index b0084b8..6ec23c9 100644 --- a/isolating_controller/isolation/isolators/__init__.py +++ b/isolating_controller/isolation/isolators/__init__.py @@ -1,8 +1,10 @@ # coding: UTF-8 -from .base_isolator import Isolator +from .affinity import AffinityIsolator +from .base import Isolator from .cache import CacheIsolator +from .core import CoreIsolator from .idle import IdleIsolator from .memory import MemoryIsolator from .schedule import SchedIsolator diff --git a/isolating_controller/isolation/isolators/affinity.py b/isolating_controller/isolation/isolators/affinity.py new file mode 100644 index 0000000..9b88cec --- /dev/null +++ b/isolating_controller/isolation/isolators/affinity.py @@ -0,0 +1,57 @@ +# coding: UTF-8 + +import logging +from typing import Optional + +from .base import Isolator +from ...metric_container.basic_metric import MetricDiff +from ...workload import Workload + + +class AffinityIsolator(Isolator): + def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: + super().__init__(foreground_wl, background_wl) + + self._cur_step: int = self._foreground_wl.orig_bound_cores[-1] + + self._stored_config: Optional[int] = None + + @classmethod + def _get_metric_type_from(cls, metric_diff: MetricDiff) -> float: + return metric_diff.instruction_ps + + def strengthen(self) -> 'AffinityIsolator': + self._cur_step += 1 + return self + + @property + def is_max_level(self) -> bool: + # FIXME: hard coded + return self._cur_step + 1 == self._background_wl.bound_cores[0] + + @property + def is_min_level(self) -> bool: + return self._foreground_wl.orig_bound_cores == self._foreground_wl.bound_cores + + def weaken(self) -> 'AffinityIsolator': + self._cur_step -= 1 + return self + + def enforce(self) -> None: + logger = logging.getLogger(__name__) + logger.info(f'affinity of foreground is {self._foreground_wl.orig_bound_cores[0]}-{self._cur_step}') + + self._foreground_wl.bound_cores = range(self._foreground_wl.orig_bound_cores[0], self._cur_step + 1) + + def reset(self) -> None: + if self._foreground_wl.is_running: + self._foreground_wl.bound_cores = self._foreground_wl.orig_bound_cores + + def store_cur_config(self) -> None: + self._stored_config = self._cur_step + + def load_cur_config(self) -> None: + super().load_cur_config() + + self._cur_step = self._stored_config + self._stored_config = None diff --git a/isolating_controller/isolation/isolators/base.py b/isolating_controller/isolation/isolators/base.py new file mode 100644 index 0000000..459a538 --- /dev/null +++ b/isolating_controller/isolation/isolators/base.py @@ -0,0 +1,159 @@ +# coding: UTF-8 + +import logging +from abc import ABCMeta, abstractmethod +from typing import Any, ClassVar, Optional + +from .. import NextStep +from ...metric_container.basic_metric import MetricDiff +from ...workload import Workload + + +class Isolator(metaclass=ABCMeta): + _DOD_THRESHOLD: ClassVar[float] = 0.005 + _FORCE_THRESHOLD: ClassVar[float] = 0.05 + + def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: + self._prev_metric_diff: MetricDiff = None + + self._foreground_wl = foreground_wl + self._background_wl = background_wl + + self._fg_next_step = NextStep.IDLE + self._bg_next_step = NextStep.IDLE + + self._is_first_decision: bool = True + + self._stored_config: Optional[Any] = None + + def __del__(self): + self.reset() + + @abstractmethod + def strengthen(self) -> 'Isolator': + """ + Adjust the isolation parameter to allocate more resources to the foreground workload. + (Does not actually isolate) + + :return: current isolator object for method chaining + :rtype: Isolator + """ + pass + + @property + @abstractmethod + def is_max_level(self) -> bool: + pass + + @property + @abstractmethod + def is_min_level(self) -> bool: + pass + + @abstractmethod + def weaken(self) -> 'Isolator': + """ + Adjust the isolation parameter to allocate less resources to the foreground workload. + (Does not actually isolate) + + :return: current isolator object for method chaining + :rtype: Isolator + """ + pass + + @abstractmethod + def enforce(self) -> None: + """Actually applies the isolation parameter that set on the current object""" + pass + + def yield_isolation(self) -> None: + """ + Declare to stop the configuration search for the current isolator. + Must be called when the current isolator yields the initiative. + """ + self._is_first_decision = True + + def _first_decision(self, cur_metric_diff: MetricDiff) -> NextStep: + curr_diff = self._get_metric_type_from(cur_metric_diff) + + logger = logging.getLogger(__name__) + logger.debug(f'current diff: {curr_diff:>7.4f}') + + if curr_diff < 0: + if self.is_max_level: + return NextStep.STOP + else: + return NextStep.STRENGTHEN + elif curr_diff <= self._FORCE_THRESHOLD: + return NextStep.STOP + else: + if self.is_min_level: + return NextStep.STOP + else: + return NextStep.WEAKEN + + def _monitoring_result(self, prev_metric_diff: MetricDiff, cur_metric_diff: MetricDiff) -> NextStep: + curr_diff = self._get_metric_type_from(cur_metric_diff) + prev_diff = self._get_metric_type_from(prev_metric_diff) + diff_of_diff = curr_diff - prev_diff + + logger = logging.getLogger(__name__) + logger.debug(f'diff of diff is {diff_of_diff:>7.4f}') + logger.debug(f'current diff: {curr_diff:>7.4f}, previous diff: {prev_diff:>7.4f}') + + if abs(diff_of_diff) <= self._DOD_THRESHOLD \ + or abs(curr_diff) <= self._DOD_THRESHOLD: + return NextStep.STOP + + elif curr_diff > 0: + if self.is_min_level: + return NextStep.STOP + else: + return NextStep.WEAKEN + + else: + if self.is_max_level: + return NextStep.STOP + else: + return NextStep.STRENGTHEN + + @classmethod + @abstractmethod + def _get_metric_type_from(cls, metric_diff: MetricDiff) -> float: + pass + + def decide_next_step(self) -> NextStep: + curr_metric_diff = self._foreground_wl.calc_metric_diff() + + if self._is_first_decision: + self._is_first_decision = False + next_step = self._first_decision(curr_metric_diff) + + else: + next_step = self._monitoring_result(self._prev_metric_diff, curr_metric_diff) + + self._prev_metric_diff = curr_metric_diff + + return next_step + + @abstractmethod + def reset(self) -> None: + """Restore to initial configuration""" + pass + + def change_fg_wl(self, new_workload: Workload) -> None: + self._foreground_wl = new_workload + self._prev_metric_diff = new_workload.calc_metric_diff() + + def change_bg_wl(self, new_workload: Workload) -> None: + self._background_wl = new_workload + + @abstractmethod + def store_cur_config(self) -> None: + """Store the current configuration""" + pass + + def load_cur_config(self) -> None: + """Load the current configuration""" + if self._stored_config is None: + raise ValueError('Store configuration first!') diff --git a/isolating_controller/isolation/isolators/base_isolator.py b/isolating_controller/isolation/isolators/base_isolator.py deleted file mode 100644 index 51a3129..0000000 --- a/isolating_controller/isolation/isolators/base_isolator.py +++ /dev/null @@ -1,82 +0,0 @@ -# coding: UTF-8 - -from abc import ABCMeta, abstractmethod - -from .. import NextStep -from ...metric_container.basic_metric import MetricDiff -from ...workload import Workload - - -class Isolator(metaclass=ABCMeta): - def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: - self._prev_metric_diff: MetricDiff = foreground_wl.calc_metric_diff() - - self._foreground_wl = foreground_wl - self._background_wl = background_wl - - self._is_fist_decision: bool = True - - @abstractmethod - def strengthen(self) -> 'Isolator': - """ - Adjust the isolation parameter to allocate more resources to the foreground workload. - (Does not actually isolate) - - :return: current isolator object for method chaining - :rtype: Isolator - """ - pass - - @property - @abstractmethod - def is_max_level(self) -> bool: - pass - - @property - @abstractmethod - def is_min_level(self) -> bool: - pass - - @abstractmethod - def weaken(self) -> 'Isolator': - """ - Adjust the isolation parameter to allocate less resources to the foreground workload. - (Does not actually isolate) - - :return: current isolator object for method chaining - :rtype: Isolator - """ - pass - - @abstractmethod - def _enforce(self) -> None: - pass - - def enforce(self) -> None: - """Actually applies the isolation parameter that set on the current object""" - self._prev_metric_diff: MetricDiff = self._foreground_wl.calc_metric_diff() - - self._enforce() - - def yield_isolation(self) -> None: - """ - Declare to stop the configuration search for the current isolator. - Must be called when the current isolator yields the initiative. - """ - self._is_fist_decision = True - - @abstractmethod - def _first_decision(self) -> NextStep: - pass - - @abstractmethod - def _monitoring_result(self) -> NextStep: - pass - - def decide_next_step(self) -> NextStep: - if self._is_fist_decision: - self._is_fist_decision = False - return self._first_decision() - - else: - return self._monitoring_result() diff --git a/isolating_controller/isolation/isolators/cache.py b/isolating_controller/isolation/isolators/cache.py index 5def5d2..ae72263 100644 --- a/isolating_controller/isolation/isolators/cache.py +++ b/isolating_controller/isolation/isolators/cache.py @@ -1,52 +1,32 @@ # coding: UTF-8 import logging -from typing import Optional +from typing import Optional, Tuple -from .base_isolator import Isolator -from .. import NextStep -from ...utils import CAT +from .base import Isolator +from ...metric_container.basic_metric import MetricDiff +from ...utils import ResCtrl, numa_topology from ...workload import Workload class CacheIsolator(Isolator): - _DOD_THRESHOLD = 0.005 - _FORCE_THRESHOLD = 0.1 - def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: super().__init__(foreground_wl, background_wl) self._prev_step: Optional[int] = None self._cur_step: Optional[int] = None - self._fg_grp_name = f'{foreground_wl.name}_{foreground_wl.pid}' - CAT.create_group(self._fg_grp_name) - for tid in foreground_wl.all_child_tid(): - CAT.add_task(self._fg_grp_name, tid) - - self._bg_grp_name = f'{background_wl.name}_{background_wl.pid}' - CAT.create_group(self._bg_grp_name) - for tid in background_wl.all_child_tid(): - CAT.add_task(self._bg_grp_name, tid) - - def __del__(self) -> None: - logger = logging.getLogger(__name__) - - if self._foreground_wl.is_running: - logger.debug(f'reset resctrl configuration of {self._foreground_wl}') - # FIXME: hard coded - CAT.assign(self._fg_grp_name, '1', CAT.gen_mask(0, CAT.MAX)) + self._stored_config: Optional[Tuple[int, int]] = None - if self._background_wl.is_running: - logger.debug(f'reset resctrl configuration of {self._background_wl}') - # FIXME: hard coded - CAT.assign(self._bg_grp_name, '1', CAT.gen_mask(0, CAT.MAX)) + @classmethod + def _get_metric_type_from(cls, metric_diff: MetricDiff) -> float: + return metric_diff.l3_hit_ratio def strengthen(self) -> 'CacheIsolator': self._prev_step = self._cur_step if self._cur_step is None: - self._cur_step = CAT.MAX // 2 + self._cur_step = ResCtrl.MAX_BITS // 2 else: self._cur_step += 1 @@ -66,81 +46,50 @@ def weaken(self) -> 'CacheIsolator': @property def is_max_level(self) -> bool: # FIXME: hard coded - return self._cur_step is not None and self._cur_step + CAT.STEP >= CAT.MAX + return self._cur_step is not None and self._cur_step + ResCtrl.STEP >= ResCtrl.MAX_BITS @property def is_min_level(self) -> bool: # FIXME: hard coded - return self._cur_step is None or self._cur_step - CAT.STEP <= CAT.MIN + return self._cur_step is None or self._cur_step - ResCtrl.STEP < ResCtrl.MIN_BITS - def _enforce(self) -> None: + def enforce(self) -> None: logger = logging.getLogger(__name__) if self._cur_step is None: logger.info('CAT off') - - # FIXME: hard coded - mask = CAT.gen_mask(0, CAT.MAX) - CAT.assign(self._fg_grp_name, '1', mask) - CAT.assign(self._bg_grp_name, '1', mask) + self.reset() else: - logger.info(f'foreground : background = {self._cur_step} : {CAT.MAX - self._cur_step}') + logger.info(f'foreground : background = {self._cur_step} : {ResCtrl.MAX_BITS - self._cur_step}') - # FIXME: hard coded - fg_mask = CAT.gen_mask(0, self._cur_step) - CAT.assign(self._fg_grp_name, '1', fg_mask) + # FIXME: hard coded -> The number of socket is two at most + masks = [ResCtrl.MIN_MASK, ResCtrl.MIN_MASK] + masks[self._foreground_wl.cur_socket_id()] = ResCtrl.gen_mask(0, self._cur_step) + self._foreground_wl.resctrl.assign_llc(*masks) - # FIXME: hard coded - bg_mask = CAT.gen_mask(self._cur_step) - CAT.assign(self._bg_grp_name, '1', bg_mask) + # FIXME: hard coded -> The number of socket is two at most + masks = [ResCtrl.MIN_MASK, ResCtrl.MIN_MASK] + masks[self._background_wl.cur_socket_id()] = ResCtrl.gen_mask(self._cur_step) + self._background_wl.resctrl.assign_llc(*masks) - def _first_decision(self) -> NextStep: - metric_diff = self._foreground_wl.calc_metric_diff() - curr_diff = metric_diff.l3_hit_ratio - - logger = logging.getLogger(__name__) - logger.debug(f'current diff: {curr_diff:>7.4f}') + def reset(self) -> None: + masks = [ResCtrl.MIN_MASK] * (max(numa_topology.cur_online_nodes()) + 1) - if curr_diff < 0: - if self.is_max_level: - return NextStep.STOP - else: - return NextStep.STRENGTHEN - elif curr_diff <= CacheIsolator._FORCE_THRESHOLD: - return NextStep.STOP - else: - if self.is_min_level: - return NextStep.STOP - else: - return NextStep.WEAKEN + if self._background_wl.is_running: + bg_masks = masks.copy() + bg_masks[self._background_wl.cur_socket_id()] = ResCtrl.MAX_MASK + self._background_wl.resctrl.assign_llc(*bg_masks) - # TODO: consider turn off cache partitioning - def _monitoring_result(self) -> NextStep: - metric_diff = self._foreground_wl.calc_metric_diff() + if self._foreground_wl.is_running: + masks[self._foreground_wl.cur_socket_id()] = ResCtrl.MAX_MASK + self._foreground_wl.resctrl.assign_llc(*masks) - curr_diff = metric_diff.l3_hit_ratio - prev_diff = self._prev_metric_diff.l3_hit_ratio - diff_of_diff = curr_diff - prev_diff + def store_cur_config(self) -> None: + self._stored_config = (self._prev_step, self._cur_step) - logger = logging.getLogger(__name__) - logger.debug(f'diff of diff is {diff_of_diff:>7.4f}') - logger.debug(f'current diff: {curr_diff:>7.4f}, previous diff: {prev_diff:>7.4f}') - - if self._cur_step is not None \ - and not (CAT.MIN < self._cur_step < CAT.MAX) \ - or abs(diff_of_diff) <= CacheIsolator._DOD_THRESHOLD \ - or abs(curr_diff) <= CacheIsolator._DOD_THRESHOLD: - return NextStep.STOP - - elif curr_diff > 0: - if self.is_min_level: - return NextStep.STOP - else: - return NextStep.WEAKEN + def load_cur_config(self) -> None: + super().load_cur_config() - else: - if self.is_max_level: - return NextStep.STOP - else: - return NextStep.STRENGTHEN + self._prev_step, self._cur_step = self._stored_config + self._stored_config = None diff --git a/isolating_controller/isolation/isolators/core.py b/isolating_controller/isolation/isolators/core.py new file mode 100644 index 0000000..104ef10 --- /dev/null +++ b/isolating_controller/isolation/isolators/core.py @@ -0,0 +1,222 @@ +# coding: UTF-8 + +import logging +from typing import ClassVar, Optional, Tuple + +from .base import Isolator +from .. import NextStep, ResourceType +from ...metric_container.basic_metric import MetricDiff +from ...workload import Workload + + +class CoreIsolator(Isolator): + _INST_PS_THRESHOLD: ClassVar[float] = -0.5 + + def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: + super().__init__(foreground_wl, background_wl) + + # FIXME: hard coded (contiguous allocation) + self._cur_fg_step: int = foreground_wl.orig_bound_cores[-1] + self._cur_bg_step: int = background_wl.orig_bound_cores[0] + + self._bg_next_step: NextStep = NextStep.IDLE + self._fg_next_step: NextStep = NextStep.IDLE + + self._contentious_resource: ResourceType = ResourceType.MEMORY + + self._stored_config: Optional[Tuple[int, int]] = None + + def strengthen(self) -> 'CoreIsolator': + """ + Strengthen reduces the number of CPUs assigned to BG workloads and increase that of FG workload + TODO: Changing step size, if needed + """ + # NOTE: Caller is assumed that BG workload + + if self._bg_next_step == NextStep.STRENGTHEN: + self._cur_bg_step += 1 + + if self._fg_next_step == NextStep.WEAKEN: + self._cur_fg_step += 1 + + return self + + def weaken(self) -> 'CoreIsolator': + """ + Weaken increase the number of CPUs assigned to BG workloads and decrease that of FG workload + TODO: Changing step size, if needed + """ + # NOTE: Caller is assumed that BG workload + + if self._bg_next_step == NextStep.WEAKEN: + self._cur_bg_step -= 1 + + if self._fg_next_step == NextStep.STRENGTHEN: + self._cur_fg_step -= 1 + + return self + + @property + def is_max_level(self) -> bool: + # FIXME: hard coded (contiguous allocation) + return self._cur_bg_step == self._background_wl.orig_bound_cores[-1] and \ + self._cur_fg_step == self._cur_bg_step - 1 + + @property + def is_min_level(self) -> bool: + return self._cur_bg_step == self._background_wl.orig_bound_cores[0] and \ + self._cur_fg_step == self._foreground_wl.orig_bound_cores[-1] + + def enforce(self) -> None: + logger = logging.getLogger(__name__) + logger.debug(f'fg affinity : {self._foreground_wl.orig_bound_cores[0]}-{self._cur_fg_step}') + logger.debug(f'bg affinity : {self._cur_bg_step}-{self._background_wl.orig_bound_cores[-1]}') + + # FIXME: hard coded (contiguous allocation) + self._foreground_wl.bound_cores = range(self._foreground_wl.orig_bound_cores[0], self._cur_fg_step + 1) + self._background_wl.bound_cores = range(self._cur_bg_step, self._background_wl.orig_bound_cores[-1] + 1) + + def _first_decision(self, metric_diff: MetricDiff) -> NextStep: + curr_diff = None + + if self._contentious_resource == ResourceType.MEMORY: + curr_diff = metric_diff.local_mem_util_ps + elif self._contentious_resource == ResourceType.CPU: + curr_diff = metric_diff.instruction_ps + + logger = logging.getLogger(__name__) + logger.debug(f'current diff: {curr_diff:>7.4f}') + + # FIXME: Specifying fg's strengthen/weaken condition (related to fg's performance) + if curr_diff < 0: + if self.is_max_level: + return NextStep.STOP + else: + return self._strengthen_condition(metric_diff.instruction_ps) + + elif curr_diff <= CoreIsolator._FORCE_THRESHOLD: + return NextStep.STOP + + else: + if self.is_min_level: + return NextStep.STOP + else: + return self._weaken_condition(metric_diff.instruction_ps) + + def _monitoring_result(self, prev_metric_diff: MetricDiff, cur_metric_diff: MetricDiff) -> NextStep: + logger = logging.getLogger(__name__) + logger.info(f'self._contentious_resource: {self._contentious_resource.name}') + + curr_diff = None + diff_of_diff = None + if self._contentious_resource == ResourceType.MEMORY: + curr_diff = cur_metric_diff.local_mem_util_ps + prev_diff = prev_metric_diff.local_mem_util_ps + diff_of_diff = curr_diff - prev_diff + elif self._contentious_resource == ResourceType.CPU: + curr_diff = cur_metric_diff.instruction_ps + prev_diff = prev_metric_diff.instruction_ps + diff_of_diff = curr_diff - prev_diff + + logger.debug(f'diff of diff is {diff_of_diff:>7.4f}') + logger.debug(f'current diff: {curr_diff:>7.4f}, previous diff: {prev_diff:>7.4f}') + + # Case1 : diff is too small to perform isolation + if abs(diff_of_diff) <= CoreIsolator._DOD_THRESHOLD \ + or abs(curr_diff) <= CoreIsolator._DOD_THRESHOLD: + return NextStep.STOP + + # Case2 : FG shows lower contention than solo-run -> Slower FG or Faster BG + elif curr_diff > 0: + if self.is_min_level: + return NextStep.STOP + else: + return self._weaken_condition(cur_metric_diff.instruction_ps) + + # Case3 : FG shows higher contention than solo-run + else: + if self.is_max_level: + return NextStep.STOP + else: + return self._strengthen_condition(cur_metric_diff.instruction_ps) + + def _weaken_condition(self, fg_instruction_ps: float) -> NextStep: + # BG Next Step Decision + # ResourceType.CPU - If FG workload not fully use all its assigned cores..., then BG can weaken! + if self._contentious_resource == ResourceType.CPU: + fg_not_used_cores = len(self._foreground_wl.bound_cores) - self._foreground_wl.number_of_threads + + if fg_not_used_cores == 0: + self._bg_next_step = NextStep.IDLE + elif fg_not_used_cores > 0: + self._bg_next_step = NextStep.WEAKEN + # ResourceType.MEMORY - If BG workload was strengthened than its assigned cores, then BG can weaken! + elif self._contentious_resource == ResourceType.MEMORY: + if self._cur_bg_step == self._background_wl.orig_bound_cores[0]: + self._bg_next_step = NextStep.IDLE + else: + self._bg_next_step = NextStep.WEAKEN + + # FIXME: Specifying fg's strengthen/weaken condition (related to fg's performance) + # FIXME: hard coded (contiguous allocation) + # FG Next Step Decision + if fg_instruction_ps > self._INST_PS_THRESHOLD and self._foreground_wl.orig_bound_cores[-1] < self._cur_fg_step: + self._fg_next_step = NextStep.STRENGTHEN + else: + self._fg_next_step = NextStep.IDLE + + if self._bg_next_step is NextStep.IDLE and self._fg_next_step is NextStep.IDLE: + return NextStep.STOP + else: + return NextStep.WEAKEN + + def _strengthen_condition(self, fg_instruction_ps: float) -> NextStep: + logger = logging.getLogger(__name__) + + # BG Next Step Decision + # ResourceType.CPU - If FG workload shows low performance and FG's threads are larger than its assigned cores, + # then BG can strengthen! + if self._contentious_resource == ResourceType.CPU: + if fg_instruction_ps > self._INST_PS_THRESHOLD: + self._bg_next_step = NextStep.IDLE + elif fg_instruction_ps <= self._INST_PS_THRESHOLD and \ + self._foreground_wl.number_of_threads > len(self._foreground_wl.bound_cores): + self._bg_next_step = NextStep.STRENGTHEN + + # ResourceType.MEMORY - If BG workload can strengthen its cores... , then strengthen BG's cores! + elif self._contentious_resource == ResourceType.MEMORY: + if self._cur_bg_step == self._background_wl.orig_bound_cores[-1]: + self._bg_next_step = NextStep.IDLE + else: + self._bg_next_step = NextStep.STRENGTHEN + + # FIXME: hard coded (contiguous allocation) + # FG Next Step Decision + logger.debug(f'FG threads: {self._foreground_wl.number_of_threads}, ' + f'orig_bound_cores: {self._foreground_wl.orig_bound_cores}') + if fg_instruction_ps < self._INST_PS_THRESHOLD \ + and (self._bg_next_step is NextStep.STRENGTHEN or self._cur_bg_step - self._cur_fg_step > 1) \ + and self._foreground_wl.number_of_threads > len(self._foreground_wl.orig_bound_cores): + self._fg_next_step = NextStep.WEAKEN + else: + self._fg_next_step = NextStep.IDLE + + if self._bg_next_step is NextStep.IDLE and self._fg_next_step is NextStep.IDLE: + return NextStep.STOP + else: + return NextStep.STRENGTHEN + + def reset(self) -> None: + if self._background_wl.is_running: + self._background_wl.bound_cores = self._background_wl.orig_bound_cores + if self._foreground_wl.is_running: + self._foreground_wl.bound_cores = self._foreground_wl.orig_bound_cores + + def store_cur_config(self) -> None: + self._stored_config = (self._cur_fg_step, self._cur_bg_step) + + def load_cur_config(self) -> None: + super().load_cur_config() + + self._cur_fg_step, self._cur_bg_step = self._stored_config + self._stored_config = None diff --git a/isolating_controller/isolation/isolators/idle.py b/isolating_controller/isolation/isolators/idle.py index d720a59..aa4b8ea 100644 --- a/isolating_controller/isolation/isolators/idle.py +++ b/isolating_controller/isolation/isolators/idle.py @@ -1,13 +1,18 @@ # coding: UTF-8 -from .base_isolator import Isolator +from .base import Isolator from .. import NextStep +from ...metric_container.basic_metric import MetricDiff class IdleIsolator(Isolator): def __init__(self) -> None: pass + @classmethod + def _get_metric_type_from(cls, metric_diff: MetricDiff) -> float: + pass + def strengthen(self) -> 'Isolator': pass @@ -22,14 +27,27 @@ def is_min_level(self) -> bool: def weaken(self) -> 'Isolator': pass - def _enforce(self) -> None: + def enforce(self) -> None: pass - def _first_decision(self) -> NextStep: + def _first_decision(self, _) -> NextStep: + self._fg_next_step = NextStep.IDLE + self._bg_next_step = NextStep.IDLE return NextStep.IDLE def decide_next_step(self) -> NextStep: return self._monitoring_result() - def _monitoring_result(self) -> NextStep: + def _monitoring_result(self, **kwargs) -> NextStep: + self._fg_next_step = NextStep.IDLE + self._bg_next_step = NextStep.IDLE return NextStep.IDLE + + def reset(self) -> None: + pass + + def store_cur_config(self) -> None: + pass + + def load_cur_config(self) -> None: + pass diff --git a/isolating_controller/isolation/isolators/memory.py b/isolating_controller/isolation/isolators/memory.py index 3a2e156..8dcefe3 100644 --- a/isolating_controller/isolation/isolators/memory.py +++ b/isolating_controller/isolation/isolators/memory.py @@ -1,28 +1,25 @@ # coding: UTF-8 import logging -from itertools import chain +from typing import Optional -from .base_isolator import Isolator -from .. import NextStep +from .base import Isolator +from ...metric_container.basic_metric import MetricDiff from ...utils import DVFS from ...workload import Workload class MemoryIsolator(Isolator): - _DOD_THRESHOLD = 0.005 - _FORCE_THRESHOLD = 0.1 - def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: super().__init__(foreground_wl, background_wl) - self._bg_affinity = background_wl.cpuset - # FIXME: hard coded - self._cur_step = DVFS.MAX + self._cur_step: int = DVFS.MAX + self._stored_config: Optional[int] = None - def __del__(self) -> None: - DVFS.set_freq(DVFS.MAX, chain(self._bg_affinity)) + @classmethod + def _get_metric_type_from(cls, metric_diff: MetricDiff) -> float: + return metric_diff.local_mem_util_ps def strengthen(self) -> 'MemoryIsolator': self._cur_step -= DVFS.STEP @@ -40,58 +37,22 @@ def is_max_level(self) -> bool: @property def is_min_level(self) -> bool: # FIXME: hard coded - return DVFS.MAX <= self._cur_step + DVFS.STEP + return DVFS.MAX < self._cur_step + DVFS.STEP - def _enforce(self) -> None: + def enforce(self) -> None: logger = logging.getLogger(__name__) - logger.info(f'frequency of cpuset {self._background_wl.cpuset} is {self._cur_step / 1_000_000}GHz') + logger.info(f'frequency of bound_cores {self._background_wl.bound_cores} is {self._cur_step / 1_000_000}GHz') - DVFS.set_freq(self._cur_step, self._background_wl.cpuset) + DVFS.set_freq(self._cur_step, self._background_wl.bound_cores) - def _first_decision(self) -> NextStep: - metric_diff = self._foreground_wl.calc_metric_diff() - curr_diff = metric_diff.local_mem_util_ps + def reset(self) -> None: + DVFS.set_freq(DVFS.MAX, self._background_wl.orig_bound_cores) - logger = logging.getLogger(__name__) - logger.debug(f'current diff: {curr_diff:>7.4f}') - - if curr_diff < 0: - if self.is_max_level: - return NextStep.STOP - else: - return NextStep.STRENGTHEN - elif curr_diff <= MemoryIsolator._FORCE_THRESHOLD: - return NextStep.STOP - else: - if self.is_min_level: - return NextStep.STOP - else: - return NextStep.WEAKEN - - def _monitoring_result(self) -> NextStep: - metric_diff = self._foreground_wl.calc_metric_diff() - - curr_diff = metric_diff.local_mem_util_ps - prev_diff = self._prev_metric_diff.local_mem_util_ps - diff_of_diff = curr_diff - prev_diff + def store_cur_config(self) -> None: + self._stored_config = self._cur_step - logger = logging.getLogger(__name__) - logger.debug(f'diff of diff is {diff_of_diff:>7.4f}') - logger.debug(f'current diff: {curr_diff:>7.4f}, previous diff: {prev_diff:>7.4f}') - - if not (DVFS.MIN < self._cur_step < DVFS.MAX) \ - or abs(diff_of_diff) <= MemoryIsolator._DOD_THRESHOLD \ - or abs(curr_diff) <= MemoryIsolator._DOD_THRESHOLD: - return NextStep.STOP - - elif curr_diff > 0: - if self.is_max_level: - return NextStep.STOP - else: - return NextStep.WEAKEN - - else: - if self.is_min_level: - return NextStep.STOP - else: - return NextStep.STRENGTHEN + def load_cur_config(self) -> None: + super().load_cur_config() + + self._cur_step = self._stored_config + self._stored_config = None diff --git a/isolating_controller/isolation/isolators/schedule.py b/isolating_controller/isolation/isolators/schedule.py index 78ae378..6db2431 100644 --- a/isolating_controller/isolation/isolators/schedule.py +++ b/isolating_controller/isolation/isolators/schedule.py @@ -1,34 +1,25 @@ # coding: UTF-8 import logging +from typing import Optional -from .base_isolator import Isolator -from .. import NextStep -from ...utils import CgroupCpuset +from .base import Isolator +from ...metric_container.basic_metric import MetricDiff from ...workload import Workload class SchedIsolator(Isolator): - _DOD_THRESHOLD = 0.005 - _FORCE_THRESHOLD = 0.1 - def __init__(self, foreground_wl: Workload, background_wl: Workload) -> None: super().__init__(foreground_wl, background_wl) # FIXME: hard coded - self._cur_step = 24 - - self._bg_grp_name = f'{background_wl.name}_{background_wl.pid}' - self._prev_bg_affinity = background_wl.cpuset + self._cur_step = background_wl.orig_bound_cores[0] - CgroupCpuset.create_group(self._bg_grp_name) - CgroupCpuset.add_task(self._bg_grp_name, background_wl.pid) - # FIXME: hard coded - CgroupCpuset.assign(self._bg_grp_name, set(range(self._cur_step, 32))) + self._stored_config: Optional[int] = None - def __del__(self) -> None: - if self._background_wl.is_running: - CgroupCpuset.assign(self._bg_grp_name, set(self._prev_bg_affinity)) + @classmethod + def _get_metric_type_from(cls, metric_diff: MetricDiff) -> float: + return metric_diff.local_mem_util_ps def strengthen(self) -> 'SchedIsolator': self._cur_step += 1 @@ -40,61 +31,30 @@ def weaken(self) -> 'SchedIsolator': @property def is_max_level(self) -> bool: - # FIXME: hard coded - return self._cur_step == 31 + return self._cur_step == self._background_wl.orig_bound_cores[-1] @property def is_min_level(self) -> bool: # FIXME: hard coded - return self._cur_step == 24 + return self._cur_step - 1 == self._foreground_wl.bound_cores[-1] - def _enforce(self) -> None: + def enforce(self) -> None: logger = logging.getLogger(__name__) # FIXME: hard coded - logger.info(f'affinity of background is {self._cur_step}-31') + logger.info(f'affinity of background is {self._cur_step}-{self._background_wl.orig_bound_cores[-1]}') # FIXME: hard coded - CgroupCpuset.assign(self._bg_grp_name, set(range(self._cur_step, 32))) - - def _first_decision(self) -> NextStep: - metric_diff = self._foreground_wl.calc_metric_diff() - curr_diff = metric_diff.local_mem_util_ps + self._background_wl.bound_cores = range(self._cur_step, self._background_wl.orig_bound_cores[-1] + 1) - logger = logging.getLogger(__name__) - logger.debug(f'current diff: {curr_diff:>7.4f}') - - if curr_diff < 0: - if self.is_max_level: - return NextStep.STOP - else: - return NextStep.STRENGTHEN - elif curr_diff <= SchedIsolator._FORCE_THRESHOLD: - return NextStep.STOP - else: - if self.is_min_level: - return NextStep.STOP - else: - return NextStep.WEAKEN - - def _monitoring_result(self) -> NextStep: - metric_diff = self._foreground_wl.calc_metric_diff() - - curr_diff = metric_diff.local_mem_util_ps - prev_diff = self._prev_metric_diff.local_mem_util_ps - diff_of_diff = curr_diff - prev_diff - - logger = logging.getLogger(__name__) - logger.debug(f'diff of diff is {diff_of_diff:>7.4f}') - logger.debug(f'current diff: {curr_diff:>7.4f}, previous diff: {prev_diff:>7.4f}') + def reset(self) -> None: + if self._background_wl.is_running: + self._background_wl.bound_cores = self._background_wl.orig_bound_cores - # FIXME: hard coded - if not (24 < self._cur_step < 31) \ - or abs(diff_of_diff) <= SchedIsolator._DOD_THRESHOLD \ - or abs(curr_diff) <= SchedIsolator._DOD_THRESHOLD: - return NextStep.STOP + def store_cur_config(self) -> None: + self._stored_config = self._cur_step - elif curr_diff > 0: - return NextStep.WEAKEN + def load_cur_config(self) -> None: + super().load_cur_config() - else: - return NextStep.STRENGTHEN + self._cur_step = self._stored_config + self._stored_config = None diff --git a/isolating_controller/isolation/policies/__init__.py b/isolating_controller/isolation/policies/__init__.py index 6f9c6ee..bcd7ef3 100644 --- a/isolating_controller/isolation/policies/__init__.py +++ b/isolating_controller/isolation/policies/__init__.py @@ -1,7 +1,10 @@ # coding: UTF-8 -from .base_policy import IsolationPolicy, ResourceType -from .diff_policy import DiffPolicy -from .diff_with_violation_policy import DiffWViolationPolicy -from .greedy_diff_policy import GreedyDiffPolicy -from .greedy_diff_with_violation_policy import GreedyDiffWViolationPolicy +from .aggressive import AggressivePolicy +from .aggressive_with_violation import AggressiveWViolationPolicy +from .base import IsolationPolicy +from .defensive import DefensivePolicy +from .defensive_cpu import DefensiveCPUPolicy +from .defensive_with_violation import DefensiveWViolationPolicy +from .greedy import GreedyPolicy +from .greedy_with_violation import GreedyWViolationPolicy diff --git a/isolating_controller/isolation/policies/aggressive.py b/isolating_controller/isolation/policies/aggressive.py new file mode 100644 index 0000000..7b653f8 --- /dev/null +++ b/isolating_controller/isolation/policies/aggressive.py @@ -0,0 +1,52 @@ +# coding: UTF-8 + +import logging + +from .base import IsolationPolicy +from .. import ResourceType +from ..isolators import AffinityIsolator, CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator +from ...workload import Workload + + +class AggressivePolicy(IsolationPolicy): + def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: + super().__init__(fg_wl, bg_wl) + + self._is_mem_isolated = False + + @property + def new_isolator_needed(self) -> bool: + return isinstance(self._cur_isolator, IdleIsolator) + + def choose_next_isolator(self) -> bool: + logger = logging.getLogger(__name__) + logger.debug('looking for new isolation...') + + # if foreground is web server (CPU critical) + if len(self._fg_wl.bound_cores) * 2 < self._fg_wl.number_of_threads: + if AffinityIsolator in self._isolator_map and not self._isolator_map[AffinityIsolator].is_max_level: + self._cur_isolator = self._isolator_map[AffinityIsolator] + logger.info(f'Starting {self._cur_isolator.__class__.__name__}...') + return True + + for resource, diff_value in self.contentious_resources(): + if resource is ResourceType.CACHE: + isolator = self._isolator_map[CacheIsolator] + elif resource is ResourceType.MEMORY: + if self._is_mem_isolated: + isolator = self._isolator_map[SchedIsolator] + self._is_mem_isolated = False + else: + isolator = self._isolator_map[MemoryIsolator] + self._is_mem_isolated = True + else: + raise NotImplementedError(f'Unknown ResourceType: {resource}') + + if diff_value < 0 and not isolator.is_max_level or \ + diff_value > 0 and not isolator.is_min_level: + self._cur_isolator = isolator + logger.info(f'Starting {self._cur_isolator.__class__.__name__}...') + return True + + logger.debug('A new Isolator has not been selected') + return False diff --git a/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py b/isolating_controller/isolation/policies/aggressive_with_violation.py similarity index 67% rename from isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py rename to isolating_controller/isolation/policies/aggressive_with_violation.py index a10ef8b..3682122 100644 --- a/isolating_controller/isolation/policies/greedy_diff_with_violation_policy.py +++ b/isolating_controller/isolation/policies/aggressive_with_violation.py @@ -1,15 +1,16 @@ # coding: UTF-8 import logging +from typing import ClassVar -from .base_policy import ResourceType -from .greedy_diff_policy import GreedyDiffPolicy -from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator +from .aggressive import AggressivePolicy +from .. import ResourceType +from ..isolators import AffinityIsolator, CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator from ...workload import Workload -class GreedyDiffWViolationPolicy(GreedyDiffPolicy): - VIOLATION_THRESHOLD = 3 +class AggressiveWViolationPolicy(AggressivePolicy): + VIOLATION_THRESHOLD: ClassVar[int] = 3 def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: super().__init__(fg_wl, bg_wl) @@ -17,12 +18,15 @@ def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: self._violation_count: int = 0 def _check_violation(self) -> bool: + if isinstance(self._cur_isolator, AffinityIsolator): + return False + resource: ResourceType = self.contentious_resource() return \ resource is ResourceType.CACHE and not isinstance(self._cur_isolator, CacheIsolator) \ - or resource is ResourceType.MEMORY and (not isinstance(self._cur_isolator, MemoryIsolator) - and not isinstance(self._cur_isolator, SchedIsolator)) + or resource is ResourceType.MEMORY and not (isinstance(self._cur_isolator, MemoryIsolator) + or isinstance(self._cur_isolator, SchedIsolator)) @property def new_isolator_needed(self) -> bool: @@ -35,7 +39,7 @@ def new_isolator_needed(self) -> bool: self._violation_count += 1 - if self._violation_count >= GreedyDiffWViolationPolicy.VIOLATION_THRESHOLD: + if self._violation_count >= AggressiveWViolationPolicy.VIOLATION_THRESHOLD: logger.info('new isolator is required due to violation') self.set_idle_isolator() self._violation_count = 0 diff --git a/isolating_controller/isolation/policies/base.py b/isolating_controller/isolation/policies/base.py new file mode 100644 index 0000000..cd74176 --- /dev/null +++ b/isolating_controller/isolation/policies/base.py @@ -0,0 +1,190 @@ +# coding: UTF-8 + +import logging +from abc import ABCMeta, abstractmethod +from typing import ClassVar, Dict, Tuple, Type + +from .. import ResourceType +from ..isolators import CacheIsolator, IdleIsolator, Isolator, MemoryIsolator, SchedIsolator +from ..isolators.affinity import AffinityIsolator +from ...metric_container.basic_metric import BasicMetric, MetricDiff +from ...workload import Workload + + +class IsolationPolicy(metaclass=ABCMeta): + _IDLE_ISOLATOR: ClassVar[IdleIsolator] = IdleIsolator() + _VERIFY_THRESHOLD: ClassVar[int] = 3 + + def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: + self._fg_wl = fg_wl + self._bg_wl = bg_wl + + self._isolator_map: Dict[Type[Isolator], Isolator] = dict(( + (CacheIsolator, CacheIsolator(self._fg_wl, self._bg_wl)), + (AffinityIsolator, AffinityIsolator(self._fg_wl, self._bg_wl)), + (SchedIsolator, SchedIsolator(self._fg_wl, self._bg_wl)), + (MemoryIsolator, MemoryIsolator(self._fg_wl, self._bg_wl)), + )) + self._cur_isolator: Isolator = IsolationPolicy._IDLE_ISOLATOR + + self._in_solorun_profile: bool = False + self._cached_fg_num_threads: int = fg_wl.number_of_threads + self._solorun_verify_violation_count: int = 0 + + def __hash__(self) -> int: + return id(self) + + def __repr__(self) -> str: + return f'{self.__class__.__name__} ' + + def __del__(self) -> None: + isolators = tuple(self._isolator_map.keys()) + for isolator in isolators: + del self._isolator_map[isolator] + + @property + @abstractmethod + def new_isolator_needed(self) -> bool: + pass + + @abstractmethod + def choose_next_isolator(self) -> bool: + pass + + def contentious_resource(self) -> ResourceType: + return self.contentious_resources()[0][0] + + def contentious_resources(self) -> Tuple[Tuple[ResourceType, float], ...]: + metric_diff: MetricDiff = self._fg_wl.calc_metric_diff() + + logger = logging.getLogger(__name__) + logger.info(f'foreground : {metric_diff}') + logger.info(f'background : {self._bg_wl.calc_metric_diff()}') + + resources = ((ResourceType.CACHE, metric_diff.l3_hit_ratio), + (ResourceType.MEMORY, metric_diff.local_mem_util_ps)) + + if all(v > 0 for m, v in resources): + return tuple(sorted(resources, key=lambda x: x[1], reverse=True)) + + else: + return tuple(sorted(resources, key=lambda x: x[1])) + + @property + def foreground_workload(self) -> Workload: + return self._fg_wl + + @foreground_workload.setter + def foreground_workload(self, new_workload: Workload): + self._fg_wl = new_workload + for isolator in self._isolator_map.values(): + isolator.change_fg_wl(new_workload) + isolator.enforce() + + @property + def background_workload(self) -> Workload: + return self._bg_wl + + @background_workload.setter + def background_workload(self, new_workload: Workload): + self._bg_wl = new_workload + for isolator in self._isolator_map.values(): + isolator.change_bg_wl(new_workload) + isolator.enforce() + + @property + def ended(self) -> bool: + return not self._fg_wl.is_running or not self._bg_wl.is_running + + @property + def cur_isolator(self) -> Isolator: + return self._cur_isolator + + @property + def name(self) -> str: + return f'{self._fg_wl.name}({self._fg_wl.pid})' + + def set_idle_isolator(self) -> None: + self._cur_isolator.yield_isolation() + self._cur_isolator = IsolationPolicy._IDLE_ISOLATOR + + def reset(self) -> None: + for isolator in self._isolator_map.values(): + isolator.reset() + + # Solorun profiling related + + @property + def in_solorun_profiling(self) -> bool: + return self._in_solorun_profile + + def start_solorun_profiling(self) -> None: + """ profile solorun status of a workload """ + if self._in_solorun_profile: + raise ValueError('Stop the ongoing solorun profiling first!') + + self._in_solorun_profile = True + self._cached_fg_num_threads = self._fg_wl.number_of_threads + self._solorun_verify_violation_count = 0 + + # suspend all workloads and their perf agents + self._bg_wl.pause() + + self._fg_wl.metrics.clear() + + # store current configuration + for isolator in self._isolator_map.values(): + isolator.store_cur_config() + isolator.reset() + + def stop_solorun_profiling(self) -> None: + if not self._in_solorun_profile: + raise ValueError('Start solorun profiling first!') + + logger = logging.getLogger(__name__) + logger.debug(f'number of collected solorun data: {len(self._fg_wl.metrics)}') + self._fg_wl.avg_solorun_data = BasicMetric.calc_avg(self._fg_wl.metrics) + logger.debug(f'calculated average solorun data: {self._fg_wl.avg_solorun_data}') + + logger.debug('Enforcing restored configuration...') + # restore stored configuration + for isolator in self._isolator_map.values(): + isolator.load_cur_config() + isolator.enforce() + + self._fg_wl.metrics.clear() + + self._bg_wl.resume() + + self._in_solorun_profile = False + + def profile_needed(self) -> bool: + """ + This function checks if the profiling procedure should be called + :return: Decision whether to initiate online solorun profiling + """ + logger = logging.getLogger(__name__) + + if self._fg_wl.avg_solorun_data is None: + logger.debug('initialize solorun data') + return True + + if not self._fg_wl.calc_metric_diff().verify(): + self._solorun_verify_violation_count += 1 + + if self._solorun_verify_violation_count == self._VERIFY_THRESHOLD: + logger.debug(f'fail to verify solorun data. {{{self._fg_wl.calc_metric_diff()}}}') + return True + + cur_num_threads = self._fg_wl.number_of_threads + if cur_num_threads is not 0 and self._cached_fg_num_threads != cur_num_threads: + logger.debug(f'number of threads. cached: {self._cached_fg_num_threads}, current : {cur_num_threads}') + return True + + return False + + # Swapper related + + @property + def safe_to_swap(self) -> bool: + return not self._in_solorun_profile and len(self._fg_wl.metrics) > 0 and self._fg_wl.calc_metric_diff().verify() diff --git a/isolating_controller/isolation/policies/base_policy.py b/isolating_controller/isolation/policies/base_policy.py deleted file mode 100644 index f72fa9c..0000000 --- a/isolating_controller/isolation/policies/base_policy.py +++ /dev/null @@ -1,95 +0,0 @@ -# coding: UTF-8 -import logging -from abc import ABCMeta, abstractmethod -from enum import IntEnum -from typing import Mapping, Type - -from isolating_controller.metric_container.basic_metric import MetricDiff -from ..isolators import CacheIsolator, IdleIsolator, Isolator, MemoryIsolator, SchedIsolator -from ...workload import Workload - - -class ResourceType(IntEnum): - CACHE = 0 - MEMORY = 1 - - -class IsolationPolicy(metaclass=ABCMeta): - _IDLE_ISOLATOR: IdleIsolator = IdleIsolator() - - def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: - self._fg_wl = fg_wl - self._bg_wl = bg_wl - - self._isolator_map: Mapping[Type[Isolator], Isolator] = dict() - self._cur_isolator: Isolator = IsolationPolicy._IDLE_ISOLATOR - - def __hash__(self) -> int: - return self._fg_wl.pid - - def __repr__(self) -> str: - return f'{self.__class__.__name__} ' - - def init_isolators(self) -> None: - self._isolator_map = dict(( - (CacheIsolator, CacheIsolator(self._fg_wl, self._bg_wl)), - (MemoryIsolator, MemoryIsolator(self._fg_wl, self._bg_wl)), - (SchedIsolator, SchedIsolator(self._fg_wl, self._bg_wl)) - )) - - @property - @abstractmethod - def new_isolator_needed(self) -> bool: - pass - - @abstractmethod - def choose_next_isolator(self) -> bool: - pass - - def contentious_resource(self) -> ResourceType: - metric_diff: MetricDiff = self._fg_wl.calc_metric_diff() - - logger = logging.getLogger(__name__) - logger.info(repr(metric_diff)) - - if metric_diff.local_mem_util_ps > 0 and metric_diff.l3_hit_ratio > 0: - if metric_diff.l3_hit_ratio > metric_diff.local_mem_util_ps: - return ResourceType.CACHE - else: - return ResourceType.MEMORY - - elif metric_diff.local_mem_util_ps < 0 < metric_diff.l3_hit_ratio: - return ResourceType.MEMORY - - elif metric_diff.l3_hit_ratio < 0 < metric_diff.local_mem_util_ps: - return ResourceType.CACHE - - else: - if metric_diff.l3_hit_ratio > metric_diff.local_mem_util_ps: - return ResourceType.MEMORY - else: - return ResourceType.CACHE - - @property - def foreground_workload(self) -> Workload: - return self._fg_wl - - @property - def background_workload(self) -> Workload: - return self._bg_wl - - @property - def ended(self) -> bool: - return not self._fg_wl.is_running or not self._bg_wl.is_running - - @property - def cur_isolator(self) -> Isolator: - return self._cur_isolator - - @property - def name(self) -> str: - return f'{self._fg_wl.name}({self._fg_wl.pid})' - - def set_idle_isolator(self) -> None: - self._cur_isolator.yield_isolation() - self._cur_isolator = IsolationPolicy._IDLE_ISOLATOR diff --git a/isolating_controller/isolation/policies/diff_policy.py b/isolating_controller/isolation/policies/defensive.py similarity index 78% rename from isolating_controller/isolation/policies/diff_policy.py rename to isolating_controller/isolation/policies/defensive.py index 56975d9..1388391 100644 --- a/isolating_controller/isolation/policies/diff_policy.py +++ b/isolating_controller/isolation/policies/defensive.py @@ -2,18 +2,19 @@ import logging -from .base_policy import IsolationPolicy, ResourceType +from .base import IsolationPolicy +from .. import ResourceType from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator from ...workload import Workload -class DiffPolicy(IsolationPolicy): +class DefensivePolicy(IsolationPolicy): def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: super().__init__(fg_wl, bg_wl) self._is_llc_isolated = False self._is_mem_isolated = False - self._is_sched_isolated = False + self._is_core_isolated = False @property def new_isolator_needed(self) -> bool: @@ -22,7 +23,7 @@ def new_isolator_needed(self) -> bool: def _clear_flags(self) -> None: self._is_llc_isolated = False self._is_mem_isolated = False - self._is_sched_isolated = False + self._is_core_isolated = False def choose_next_isolator(self) -> bool: logger = logging.getLogger(__name__) @@ -30,7 +31,7 @@ def choose_next_isolator(self) -> bool: resource: ResourceType = self.contentious_resource() - if self._is_sched_isolated and self._is_mem_isolated and self._is_llc_isolated: + if self._is_core_isolated and self._is_mem_isolated and self._is_llc_isolated: self._clear_flags() logger.debug('****All isolators are applicable for now!****') @@ -46,10 +47,10 @@ def choose_next_isolator(self) -> bool: logger.info(f'Memory Bandwidth Isolation for {self._fg_wl} is started') return True - elif not self._is_sched_isolated and resource is ResourceType.MEMORY: + elif not self._is_core_isolated and resource is ResourceType.MEMORY: self._cur_isolator = self._isolator_map[SchedIsolator] - self._is_sched_isolated = True - logger.info(f'Cpuset Isolation for {self._fg_wl} is started') + self._is_core_isolated = True + logger.info(f'Core Isolation for {self._fg_wl} is started') return True else: diff --git a/isolating_controller/isolation/policies/defensive_cpu.py b/isolating_controller/isolation/policies/defensive_cpu.py new file mode 100644 index 0000000..f413d45 --- /dev/null +++ b/isolating_controller/isolation/policies/defensive_cpu.py @@ -0,0 +1,62 @@ +# coding: UTF-8 + +import logging + +from .base import IsolationPolicy +from .. import ResourceType +from ..isolators import CacheIsolator, CoreIsolator, IdleIsolator, MemoryIsolator +from ...workload import Workload + + +class DefensiveCPUPolicy(IsolationPolicy): + def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: + super().__init__(fg_wl, bg_wl) + + self._is_llc_isolated = False + self._is_mem_isolated = False + self._is_core_isolated = False + + @property + def new_isolator_needed(self) -> bool: + return isinstance(self._cur_isolator, IdleIsolator) + + def _clear_flags(self) -> None: + self._is_llc_isolated = False + self._is_mem_isolated = False + self._is_core_isolated = False + + def choose_next_isolator(self) -> bool: + logger = logging.getLogger(__name__) + logger.debug('looking for new isolation...') + + resource: ResourceType = self.contentious_resource() + + if resource is ResourceType.CPU: + self._cur_isolator = self._isolator_map[CoreIsolator] + self._cur_isolator._contentious_resource = ResourceType.CPU + logger.info(f'Core Isolation for {self._fg_wl} is started to isolate {ResourceType.CPU.name}s') + return True + + elif not self._is_llc_isolated and resource is ResourceType.CACHE: + self._cur_isolator = self._isolator_map[CacheIsolator] + self._is_llc_isolated = True + logger.info(f'Cache Isolation for {self._fg_wl} is started to isolate {ResourceType.CACHE.name}s') + return True + + elif not self._is_mem_isolated and resource is ResourceType.MEMORY: + self._cur_isolator = self._isolator_map[MemoryIsolator] + self._is_mem_isolated = True + logger.info(f'Memory Bandwidth Isolation for {self._fg_wl} is started ' + f'to isolate {ResourceType.MEMORY.name} BW') + return True + + elif not self._is_core_isolated and resource is ResourceType.MEMORY: + self._cur_isolator = self._isolator_map[CoreIsolator] + self._is_core_isolated = True + self._cur_isolator._contentious_resource = ResourceType.MEMORY + logger.info(f'Core Isolation for {self._fg_wl} is started to isolate {ResourceType.MEMORY.name} BW ') + return True + + else: + logger.debug('A new Isolator has not been selected.') + return False diff --git a/isolating_controller/isolation/policies/diff_with_violation_policy.py b/isolating_controller/isolation/policies/defensive_with_violation.py similarity index 84% rename from isolating_controller/isolation/policies/diff_with_violation_policy.py rename to isolating_controller/isolation/policies/defensive_with_violation.py index 6b457a4..bacea21 100644 --- a/isolating_controller/isolation/policies/diff_with_violation_policy.py +++ b/isolating_controller/isolation/policies/defensive_with_violation.py @@ -1,15 +1,16 @@ # coding: UTF-8 import logging +from typing import ClassVar -from .base_policy import ResourceType -from .diff_policy import DiffPolicy +from .defensive import DefensivePolicy +from .. import ResourceType from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator from ...workload import Workload -class DiffWViolationPolicy(DiffPolicy): - VIOLATION_THRESHOLD = 3 +class DefensiveWViolationPolicy(DefensivePolicy): + VIOLATION_THRESHOLD: ClassVar[int] = 3 def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: super().__init__(fg_wl, bg_wl) @@ -35,7 +36,7 @@ def new_isolator_needed(self) -> bool: self._violation_count += 1 - if self._violation_count >= DiffWViolationPolicy.VIOLATION_THRESHOLD: + if self._violation_count >= DefensiveWViolationPolicy.VIOLATION_THRESHOLD: logger.info('new isolator is required due to violation') self.set_idle_isolator() self._clear_flags() diff --git a/isolating_controller/isolation/policies/greedy_diff_policy.py b/isolating_controller/isolation/policies/greedy.py similarity index 57% rename from isolating_controller/isolation/policies/greedy_diff_policy.py rename to isolating_controller/isolation/policies/greedy.py index 4cd1fad..d04f68f 100644 --- a/isolating_controller/isolation/policies/greedy_diff_policy.py +++ b/isolating_controller/isolation/policies/greedy.py @@ -2,12 +2,13 @@ import logging -from .base_policy import IsolationPolicy, ResourceType -from ..isolators import CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator +from .base import IsolationPolicy +from .. import ResourceType +from ..isolators import AffinityIsolator, CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator from ...workload import Workload -class GreedyDiffPolicy(IsolationPolicy): +class GreedyPolicy(IsolationPolicy): def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: super().__init__(fg_wl, bg_wl) @@ -21,23 +22,30 @@ def choose_next_isolator(self) -> bool: logger = logging.getLogger(__name__) logger.debug('looking for new isolation...') + # if foreground is web server (CPU critical) + if len(self._fg_wl.bound_cores) * 2 < self._fg_wl.number_of_threads: + if AffinityIsolator in self._isolator_map and not self._isolator_map[AffinityIsolator].is_max_level: + self._cur_isolator = self._isolator_map[AffinityIsolator] + logger.info(f'Starting {self._cur_isolator.__class__.__name__}...') + return True + resource: ResourceType = self.contentious_resource() if resource is ResourceType.CACHE: self._cur_isolator = self._isolator_map[CacheIsolator] - logger.info(f'Cache Isolation for {self._fg_wl} is started') + logger.info(f'Starting {self._cur_isolator.__class__.__name__}...') return True elif not self._is_mem_isolated and resource is ResourceType.MEMORY: self._cur_isolator = self._isolator_map[MemoryIsolator] self._is_mem_isolated = True - logger.info(f'Memory Bandwidth Isolation for {self._fg_wl} is started') + logger.info(f'Starting {self._cur_isolator.__class__.__name__}...') return True elif resource is ResourceType.MEMORY: self._cur_isolator = self._isolator_map[SchedIsolator] self._is_mem_isolated = False - logger.info(f'Cpuset Isolation for {self._fg_wl} is started') + logger.info(f'Starting {self._cur_isolator.__class__.__name__}...') return True else: diff --git a/isolating_controller/isolation/policies/greedy_with_violation.py b/isolating_controller/isolation/policies/greedy_with_violation.py new file mode 100644 index 0000000..25da150 --- /dev/null +++ b/isolating_controller/isolation/policies/greedy_with_violation.py @@ -0,0 +1,55 @@ +# coding: UTF-8 + +import logging +from typing import ClassVar + +from .greedy import GreedyPolicy +from .. import ResourceType +from ..isolators import AffinityIsolator, CacheIsolator, IdleIsolator, MemoryIsolator, SchedIsolator +from ...workload import Workload + + +class GreedyWViolationPolicy(GreedyPolicy): + VIOLATION_THRESHOLD: ClassVar[int] = 3 + + def __init__(self, fg_wl: Workload, bg_wl: Workload) -> None: + super().__init__(fg_wl, bg_wl) + + self._violation_count: int = 0 + + def _check_violation(self) -> bool: + if isinstance(self._cur_isolator, AffinityIsolator): + return False + + resource: ResourceType = self.contentious_resource() + + return \ + resource is ResourceType.CACHE and not isinstance(self._cur_isolator, CacheIsolator) \ + or resource is ResourceType.MEMORY and not (isinstance(self._cur_isolator, MemoryIsolator) + or isinstance(self._cur_isolator, SchedIsolator)) + + @property + def new_isolator_needed(self) -> bool: + if isinstance(self._cur_isolator, IdleIsolator): + return True + + if self._check_violation(): + logger = logging.getLogger(__name__) + logger.info(f'violation is occurred. current isolator type : {self._cur_isolator.__class__.__name__}') + + self._violation_count += 1 + + if self._violation_count >= GreedyWViolationPolicy.VIOLATION_THRESHOLD: + logger.info('new isolator is required due to violation') + self.set_idle_isolator() + self._violation_count = 0 + return True + + return False + + def choose_next_isolator(self) -> bool: + if super().choose_next_isolator(): + self._violation_count = 0 + return True + + return False diff --git a/isolating_controller/isolation/swapper.py b/isolating_controller/isolation/swapper.py new file mode 100644 index 0000000..8ab4b9e --- /dev/null +++ b/isolating_controller/isolation/swapper.py @@ -0,0 +1,141 @@ +# coding: UTF-8 + +import logging +import subprocess +import time +from typing import Dict, Optional, Set, Tuple + +import psutil + +from .policies.base import IsolationPolicy +from ..metric_container.basic_metric import MetricDiff + + +class SwapIsolator: + # FIXME: This threshold needs tests (How small diff is right for swapping workloads?) + # "-0.5" means the IPCs of workloads in a group drop 50% compared to solo-run + _INST_DIFF_THRESHOLD = -1 + _VIOLATION_THRESHOLD = 3 + _INTERVAL = 2000 + + def __init__(self, isolation_groups: Dict[IsolationPolicy, int]) -> None: + """ + :param isolation_groups: Dict. Key is the index of group and Value is the group itself + """ + self._all_groups: Dict[IsolationPolicy, int] = isolation_groups + + self._prev_grp: Set[IsolationPolicy] = set() + self._violation_count: int = 0 + self._last_swap: int = 0 + + def _select_cont_groups(self) -> Optional[Tuple[IsolationPolicy, IsolationPolicy]]: + """ + Most contentious group is the group which shows "the LOWEST aggr. ipc diff" + Least contentious group is the group which shows "the HIGHEST aggr. ipc diff" + + Assumption : Swap Isolator swaps workloads between the most cont. group and the least cont. group + """ + logger = logging.getLogger(__name__) + + contentions: Tuple[Tuple[IsolationPolicy, MetricDiff], ...] = tuple( + (group, group.foreground_workload.calc_metric_diff()) + for group in self._all_groups.keys() + ) + + # TODO: more efficient implementation + for idx, (group1, g1_fg_diff) in enumerate(contentions): + for group2, g2_fg_diff in contentions[idx + 1:]: + g1_bg_curr_cores = len(group1.background_workload.cgroup_cpuset.read_cpus()) + g2_bg_curr_cores = len(group2.background_workload.cgroup_cpuset.read_cpus()) + + g1_fg_cont = g1_fg_diff.instruction_ps + g2_fg_cont = g2_fg_diff.instruction_ps + + g1_bg_cont = group1.background_workload.calc_metric_diff().instruction_ps + g2_bg_cont = group2.background_workload.calc_metric_diff().instruction_ps + current = abs(g1_fg_cont + g1_bg_cont) + abs(g2_fg_cont + g2_bg_cont) + + g1_bg_cont = group1 \ + .background_workload \ + .calc_metric_diff(g2_bg_curr_cores / g1_bg_curr_cores) \ + .instruction_ps + g2_bg_cont = group2 \ + .background_workload \ + .calc_metric_diff(g1_bg_curr_cores / g2_bg_curr_cores) \ + .instruction_ps + future = abs(g1_fg_cont + g2_bg_cont) + abs(g2_fg_cont + g1_bg_cont) + + benefit = current - future + logger.debug(f'Calculating swaption benefit. ' + f'current: {current:>7.4f}, future: {future:>7.4}, benefit: {benefit:>7.4}') + + if benefit > 0.1: + logger.debug(f'{group1} and {group2} is selected as swap candidate') + return group1, group2 + + return None + + def swap_is_needed(self) -> bool: + if time.time() - self._last_swap <= self._INTERVAL / 1_000: + return False + + logger = logging.getLogger(__name__) + groups = self._select_cont_groups() + + if groups is None: + self._prev_grp.clear() + self._violation_count = 0 + logger.debug(f'violation count of swaption is cleared') + return False + + if len(self._prev_grp) is 2 \ + and groups[0] in self._prev_grp \ + and groups[1] in self._prev_grp: + self._violation_count += 1 + logger.debug( + f'violation count of {groups[0].background_workload}, ' + f'{groups[1].background_workload} is {self._violation_count}') + return self._violation_count >= self._VIOLATION_THRESHOLD + + else: + self._prev_grp.clear() + self._prev_grp.add(groups[0]) + self._prev_grp.add(groups[1]) + self._violation_count = 1 + return False + + def do_swap(self) -> None: + logger = logging.getLogger(__name__) + group1, group2 = tuple(self._prev_grp) + logger.info(f'Starting swaption between {group1.background_workload} and {group2.background_workload}...') + + workload1 = group1.background_workload + workload2 = group2.background_workload + + # Enable CPUSET memory migration + workload1.cgroup_cpuset.set_memory_migrate(True) + workload2.cgroup_cpuset.set_memory_migrate(True) + + try: + # Suspend Procs and Enforce Swap Conf. + workload1.pause() + workload2.pause() + + tmp1, tmp2 = workload2.orig_bound_mems, workload1.orig_bound_mems + workload2.orig_bound_mems, workload1.orig_bound_mems = tmp2, tmp1 + tmp1, tmp2 = workload2.orig_bound_cores, workload1.orig_bound_cores + workload2.orig_bound_cores, workload1.orig_bound_cores = tmp2, tmp1 + + group1.background_workload = workload2 + group2.background_workload = workload1 + + except (psutil.NoSuchProcess, subprocess.CalledProcessError, ProcessLookupError) as e: + logger.warning('Error occurred during swaption', e) + + finally: + # Resume Procs + workload1.resume() + workload2.resume() + self._violation_count = 0 + self._prev_grp.clear() + self._last_swap = time.time() diff --git a/isolating_controller/metric_container/basic_metric.py b/isolating_controller/metric_container/basic_metric.py index fcb79d4..d4aecef 100644 --- a/isolating_controller/metric_container/basic_metric.py +++ b/isolating_controller/metric_container/basic_metric.py @@ -1,24 +1,45 @@ # coding: UTF-8 -from time import localtime, strftime +from statistics import mean +from typing import Iterable + +from cpuinfo import cpuinfo + +LLC_SIZE = int(cpuinfo.get_cpu_info()['l3_cache_size'].split()[0]) * 1024 class BasicMetric: - def __init__(self, l2miss, l3miss, inst, cycles, stall_cycles, wall_cycles, intra_coh, inter_coh, llc_size, - local_mem, remote_mem, interval: int): + def __init__(self, l2miss, l3miss, inst, cycles, stall_cycles, wall_cycles, intra_coh, + inter_coh, llc_size, local_mem, remote_mem, interval): self._l2miss = l2miss self._l3miss = l3miss self._instructions = inst - self._wall_cycles = wall_cycles self._cycles = cycles self._stall_cycles = stall_cycles + self._wall_cycles = wall_cycles self._intra_coh = intra_coh self._inter_coh = inter_coh self._llc_size = llc_size self._local_mem = local_mem self._remote_mem = remote_mem self._interval = interval - self._req_date = strftime("%I:%M:%S", localtime()) + + @classmethod + def calc_avg(cls, metrics: Iterable['BasicMetric']) -> 'BasicMetric': + return BasicMetric( + mean(metric._l2miss for metric in metrics), + mean(metric._l3miss for metric in metrics), + mean(metric._instructions for metric in metrics), + mean(metric._cycles for metric in metrics), + mean(metric._stall_cycles for metric in metrics), + mean(metric._wall_cycles for metric in metrics), + mean(metric._intra_coh for metric in metrics), + mean(metric._inter_coh for metric in metrics), + mean(metric._llc_size for metric in metrics), + mean(metric._local_mem for metric in metrics), + mean(metric._remote_mem for metric in metrics), + mean(metric._interval for metric in metrics), + ) @property def l2miss(self): @@ -32,6 +53,14 @@ def l3miss(self): def instruction(self): return self._instructions + @property + def instruction_ps(self): + return self._instructions * (1000 / self._interval) + + @property + def wall_cycles(self): + return self._wall_cycles + @property def cycles(self): return self._cycles @@ -56,6 +85,7 @@ def llc_size(self): def local_mem(self) -> float: return self._local_mem + @property def local_mem_ps(self) -> float: return self._local_mem * (1000 / self._interval) @@ -63,31 +93,28 @@ def local_mem_ps(self) -> float: def remote_mem(self): return self._remote_mem + @property def remote_mem_ps(self) -> float: return self._remote_mem * (1000 / self._interval) @property - def req_date(self): - return self._req_date - - @property - def ipc(self): + def ipc(self) -> float: return self._instructions / self._cycles @property - def intra_coh_ratio(self): + def intra_coh_ratio(self) -> float: return self._intra_coh / self._l2miss @property - def inter_coh_ratio(self): + def inter_coh_ratio(self) -> float: return self._inter_coh / self._l2miss @property - def coh_ratio(self): + def coh_ratio(self) -> float: return (self._inter_coh + self._intra_coh) / self._l2miss @property - def l3miss_ratio(self): + def l3miss_ratio(self) -> float: return self._l3miss / self._l2miss @property @@ -95,36 +122,59 @@ def l3hit_ratio(self) -> float: return 1 - self._l3miss / self._l2miss @property - def l3_intensity(self): - l3_hit_ratio = 1 - self.l3miss_ratio - return self._llc_size * l3_hit_ratio + def l3_util(self) -> float: + return self._llc_size / LLC_SIZE - def __str__(self): - return ', '.join(map(str, ( - self._l2miss, self._l3miss, self._instructions, self._cycles, self._stall_cycles, - self._intra_coh, self._inter_coh, self._llc_size, self._req_date))) + @property + def l3_intensity(self) -> float: + return self.l3_util * self.l3hit_ratio - def __repr__(self): - return self.__str__() + @property + def mem_intensity(self) -> float: + return self.l3_util * self.l3miss_ratio + + def __repr__(self) -> str: + return ', '.join(map(str, ( + self._l2miss, self._l3miss, self._instructions, self._cycles, self._stall_cycles, self._wall_cycles, + self._intra_coh, self._inter_coh, self._llc_size, self._local_mem, self._remote_mem, self._interval))) class MetricDiff: - def __init__(self, curr: BasicMetric, prev: BasicMetric) -> None: + # FIXME: hard coded + _MAX_MEM_BANDWIDTH_PS = 68 * 1024 * 1024 * 1024 + + def __init__(self, curr: BasicMetric, prev: BasicMetric, core_norm: float = 1) -> None: self._l3_hit_ratio = curr.l3hit_ratio - prev.l3hit_ratio - self._local_mem_ps = curr.local_mem_ps() / prev.local_mem_ps() - 1 - self._remote_mem_ps = curr.remote_mem_ps() / prev.remote_mem_ps() - 1 + + if curr.local_mem_ps == 0: + if prev.local_mem_ps == 0: + self._local_mem_ps = 0 + else: + self._local_mem_ps = prev.local_mem_ps / self._MAX_MEM_BANDWIDTH_PS + elif prev.local_mem_ps == 0: + # TODO: is it fair? + self._local_mem_ps = -curr.local_mem_ps / self._MAX_MEM_BANDWIDTH_PS + else: + self._local_mem_ps = curr.local_mem_ps / (prev.local_mem_ps * core_norm) - 1 + + self._instruction_ps = curr.instruction_ps / (prev.instruction_ps * core_norm) - 1 @property - def l3_hit_ratio(self): + def l3_hit_ratio(self) -> float: return self._l3_hit_ratio @property - def local_mem_util_ps(self): + def local_mem_util_ps(self) -> float: return self._local_mem_ps @property - def remote_mem_ps(self): - return self._remote_mem_ps + def instruction_ps(self) -> float: + return self._instruction_ps + + def verify(self) -> bool: + return self._local_mem_ps <= 1 and self._instruction_ps <= 1 def __repr__(self) -> str: - return f'L3 hit ratio diff: {self._l3_hit_ratio:>6.03f}, Local Memory access diff: {self._local_mem_ps:>6.03f}' + return f'L3 hit ratio diff: {self._l3_hit_ratio:>6.03f}, ' \ + f'Local Memory access diff: {self._local_mem_ps:>6.03f}, ' \ + f'Instructions per sec. diff: {self._instruction_ps:>6.03f}' diff --git a/isolating_controller/solorun_data/bt.json b/isolating_controller/solorun_data/bt.json new file mode 120000 index 0000000..4707e3c --- /dev/null +++ b/isolating_controller/solorun_data/bt.json @@ -0,0 +1 @@ +../../solorun_data/8core/bt.json \ No newline at end of file diff --git a/isolating_controller/solorun_data/facesim.json b/isolating_controller/solorun_data/facesim.json new file mode 120000 index 0000000..0239831 --- /dev/null +++ b/isolating_controller/solorun_data/facesim.json @@ -0,0 +1 @@ +../../solorun_data/8core/facesim.json \ No newline at end of file diff --git a/isolating_controller/solorun_data/particlefilter.json b/isolating_controller/solorun_data/particlefilter.json new file mode 120000 index 0000000..4c464ab --- /dev/null +++ b/isolating_controller/solorun_data/particlefilter.json @@ -0,0 +1 @@ +../../solorun_data/8core/particlefilter.json \ No newline at end of file diff --git a/isolating_controller/solorun_data/swaptions.json b/isolating_controller/solorun_data/swaptions.json new file mode 120000 index 0000000..f3191cd --- /dev/null +++ b/isolating_controller/solorun_data/swaptions.json @@ -0,0 +1 @@ +../../solorun_data/8core/swaptions.json \ No newline at end of file diff --git a/isolating_controller/solorun_data/ua.json b/isolating_controller/solorun_data/ua.json new file mode 120000 index 0000000..10506e3 --- /dev/null +++ b/isolating_controller/solorun_data/ua.json @@ -0,0 +1 @@ +../../solorun_data/8core/ua.json \ No newline at end of file diff --git a/isolating_controller/utils/__init__.py b/isolating_controller/utils/__init__.py index 37479ca..9b40f6a 100644 --- a/isolating_controller/utils/__init__.py +++ b/isolating_controller/utils/__init__.py @@ -1,5 +1,4 @@ # coding: UTF-8 -from .cat import CAT -from .cgroup_cpuset import CgroupCpuset from .dvfs import DVFS +from .resctrl import ResCtrl diff --git a/isolating_controller/utils/cat.py b/isolating_controller/utils/cat.py deleted file mode 100644 index d44d12c..0000000 --- a/isolating_controller/utils/cat.py +++ /dev/null @@ -1,57 +0,0 @@ -# coding: UTF-8 - -import os -import subprocess -from pathlib import Path -from typing import Iterable, Optional - - -def len_of_mask(mask: str) -> int: - cnt = 0 - num = int(mask, 16) - while num is not 0: - cnt += 1 - num >>= 1 - return cnt - - -class CAT: - MOUNT_POINT = Path('/sys/fs/resctrl') - - MIN = int((MOUNT_POINT / 'info' / 'L3' / 'min_cbm_bits').read_text()) - STEP = 1 - MAX = len_of_mask((MOUNT_POINT / 'info' / 'L3' / 'cbm_mask').read_text()) - - @staticmethod - def create_group(name: str) -> None: - subprocess.check_call(args=('sudo', 'mkdir', '-p', str(CAT.MOUNT_POINT / name))) - - @staticmethod - def add_task(name: str, pid: int) -> None: - subprocess.run(args=('sudo', 'tee', '-a', str(CAT.MOUNT_POINT / name / 'tasks')), - input=f'{pid}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) - - @staticmethod - def remove_group(name: str) -> None: - subprocess.check_call(args=('sudo', 'rmdir', str(CAT.MOUNT_POINT / name))) - - @staticmethod - def assign(name: str, *masks: Iterable[str]) -> None: - masks = (f'{i}={mask}' for i, mask in enumerate(masks)) - mask = ';'.join(masks) - subprocess.run(args=('sudo', 'tee', str(CAT.MOUNT_POINT / name / 'schemata')), - input=f'L3:{mask}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) - - @staticmethod - def gen_mask(start: int, end: Optional[int] = None) -> str: - if end is None or end > CAT.MAX: - end = CAT.MAX - - if start < 0: - raise ValueError('start must be greater than 0') - - return format(((1 << (end - start)) - 1) << (CAT.MAX - end), 'x') - - -if not os.path.ismount(str(CAT.MOUNT_POINT)): - subprocess.check_call(args=('sudo', 'mount', '-t', 'resctrl', 'resctrl', str(CAT.MOUNT_POINT))) diff --git a/isolating_controller/utils/cgroup/__init__.py b/isolating_controller/utils/cgroup/__init__.py new file mode 100644 index 0000000..8e90c79 --- /dev/null +++ b/isolating_controller/utils/cgroup/__init__.py @@ -0,0 +1,5 @@ +# coding: UTF-8 + +from .base import BaseCgroup +from .cpu import Cpu +from .cpuset import CpuSet diff --git a/isolating_controller/utils/cgroup/base.py b/isolating_controller/utils/cgroup/base.py new file mode 100644 index 0000000..649c47f --- /dev/null +++ b/isolating_controller/utils/cgroup/base.py @@ -0,0 +1,32 @@ +# coding: UTF-8 + +import getpass +import grp +import os +import subprocess +from abc import ABCMeta +from typing import ClassVar, Iterable + + +class BaseCgroup(metaclass=ABCMeta): + MOUNT_POINT: ClassVar[str] = '/sys/fs/cgroup' + CONTROLLER: ClassVar[str] = str() + + def __init__(self, group_name: str) -> None: + self._group_name: str = group_name + self._group_path: str = f'{self.CONTROLLER}:{group_name}' + + def create_group(self) -> None: + uname: str = getpass.getuser() + gid: int = os.getegid() + gname: str = grp.getgrgid(gid).gr_name + + subprocess.check_call(args=( + 'sudo', 'cgcreate', '-a', f'{uname}:{gname}', '-d', '755', '-f', + '644', '-t', f'{uname}:{gname}', '-s', '644', '-g', self._group_path)) + + def add_tasks(self, pids: Iterable[int]) -> None: + subprocess.check_call(args=('cgclassify', '-g', self._group_path, '--sticky', *map(str, pids))) + + def delete(self) -> None: + subprocess.check_call(args=('sudo', 'cgdelete', '-r', '-g', self._group_path)) diff --git a/isolating_controller/utils/cgroup/cpu.py b/isolating_controller/utils/cgroup/cpu.py new file mode 100644 index 0000000..481f38c --- /dev/null +++ b/isolating_controller/utils/cgroup/cpu.py @@ -0,0 +1,15 @@ +# coding: UTF-8 + + +import subprocess +from typing import ClassVar + +from .base import BaseCgroup + + +class Cpu(BaseCgroup): + CONTROLLER: ClassVar[str] = 'cpu' + + def limit_cpu_quota(self, quota: int, period: int) -> None: + subprocess.check_call(args=('cgset', '-r', f'cpu.cfs_quota_us={quota}', self._group_name)) + subprocess.check_call(args=('cgset', '-r', f'cpu.cfs_period_us={period}', self._group_name)) diff --git a/isolating_controller/utils/cgroup/cpuset.py b/isolating_controller/utils/cgroup/cpuset.py new file mode 100644 index 0000000..17515d4 --- /dev/null +++ b/isolating_controller/utils/cgroup/cpuset.py @@ -0,0 +1,35 @@ +# coding: UTF-8 + + +import subprocess +from typing import ClassVar, Iterable, Set + +from .base import BaseCgroup +from ..hyphen import convert_to_set + + +class CpuSet(BaseCgroup): + CONTROLLER: ClassVar[str] = 'cpuset' + + def assign_cpus(self, core_set: Iterable[int]) -> None: + core_ids = ','.join(map(str, core_set)) + subprocess.check_call(args=('cgset', '-r', f'cpuset.cpus={core_ids}', self._group_name)) + + def assign_mems(self, socket_set: Iterable[int]) -> None: + mem_ids = ','.join(map(str, socket_set)) + subprocess.check_call(args=('cgset', '-r', f'cpuset.mems={mem_ids}', self._group_name)) + + def set_memory_migrate(self, flag: bool) -> None: + subprocess.check_call(args=('cgset', '-r', f'cpuset.memory_migrate={int(flag)}', self._group_name)) + + def read_cpus(self) -> Set[int]: + cpus = subprocess.check_output(args=('cgget', '-nvr', 'cpuset.cpus', self._group_name), encoding='ASCII') + if cpus is '': + raise ProcessLookupError() + return convert_to_set(cpus) + + def read_mems(self) -> Set[int]: + mems = subprocess.check_output(args=('cgget', '-nvr', 'cpuset.mems', self._group_name), encoding='ASCII') + if mems is '': + raise ProcessLookupError() + return convert_to_set(mems) diff --git a/isolating_controller/utils/cgroup_cpuset.py b/isolating_controller/utils/cgroup_cpuset.py deleted file mode 100644 index 0877bf1..0000000 --- a/isolating_controller/utils/cgroup_cpuset.py +++ /dev/null @@ -1,50 +0,0 @@ -# coding: UTF-8 - -import subprocess -from typing import Set - -import psutil - - -class CgroupCpuset: - MOUNT_POINT = '/sys/fs/cgroup/cpuset' - - @staticmethod - def create_group(name: str) -> None: - subprocess.check_call(args=('sudo', 'mkdir', '-p', f'{CgroupCpuset.MOUNT_POINT}/{name}')) - - @staticmethod - def add_task(name: str, pid: int) -> None: - p = psutil.Process(pid) - - for thread in p.threads(): - subprocess.run(args=('sudo', 'tee', '-a', f'{CgroupCpuset.MOUNT_POINT}/{name}/tasks'), - input=f'{thread.id}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) - - for child in p.children(True): - for thread in child.threads(): - subprocess.run(args=('sudo', 'tee', '-a', f'{CgroupCpuset.MOUNT_POINT}/{name}/tasks'), - input=f'{thread.id}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) - - @staticmethod - def remove_group(name: str) -> None: - subprocess.check_call(args=('sudo', 'rmdir', f'/sys/fs/cgroup/cpuset/{name}')) - - @staticmethod - def assign(group_name: str, core_set: Set[int]) -> None: - subprocess.run(args=('sudo', 'tee', f'/sys/fs/cgroup/cpuset/{group_name}/cpuset.cpus'), - input=','.join(map(str, core_set)), check=True, encoding='ASCII', stdout=subprocess.DEVNULL) - - @staticmethod - def convert_to_set(hyphen_str: str) -> Set[int]: - ret = set() - - for elem in hyphen_str.split(','): - group = tuple(map(int, elem.split('-'))) - - if len(group) is 1: - ret.add(group[0]) - elif len(group) is 2: - ret.update(range(group[0], group[1] + 1)) - - return ret diff --git a/isolating_controller/utils/dvfs.py b/isolating_controller/utils/dvfs.py index 8a0593c..221e2c9 100644 --- a/isolating_controller/utils/dvfs.py +++ b/isolating_controller/utils/dvfs.py @@ -2,16 +2,36 @@ import subprocess from pathlib import Path -from typing import Iterable +from typing import ClassVar, Iterable + +from isolating_controller.utils.cgroup import CpuSet class DVFS: - MIN = int(Path('/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq').read_text()) - STEP = 100000 - MAX = int(Path('/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq').read_text()) + MIN: ClassVar[int] = int(Path('/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq').read_text()) + STEP: ClassVar[int] = 100000 + MAX: ClassVar[int] = int(Path('/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq').read_text()) + + def __init__(self, group_name): + self._group_name: str = group_name + self._cur_cgroup = CpuSet(self._group_name) + + def set_freq_cgroup(self, target_freq: int): + """ + Set the frequencies to current cgroup cpusets + :param target_freq: freq. to set to cgroup cpuset + :return: + """ + DVFS.set_freq(target_freq, self._cur_cgroup.read_cpus()) @staticmethod def set_freq(freq: int, cores: Iterable[int]) -> None: + """ + Set the freq. to the specified cores + :param freq: freq. to set + :param cores: + :return: + """ for core in cores: subprocess.run(args=('sudo', 'tee', f'/sys/devices/system/cpu/cpu{core}/cpufreq/scaling_max_freq'), check=True, input=f'{freq}\n', encoding='ASCII', stdout=subprocess.DEVNULL) diff --git a/isolating_controller/utils/hyphen.py b/isolating_controller/utils/hyphen.py new file mode 100644 index 0000000..0ac117e --- /dev/null +++ b/isolating_controller/utils/hyphen.py @@ -0,0 +1,22 @@ +# coding: UTF-8 + +from typing import Iterable, Set + + +def convert_to_set(hyphen_str: str) -> Set[int]: + ret = set() + + for elem in hyphen_str.split(','): + group = tuple(map(int, elem.split('-'))) + + if len(group) is 1: + ret.add(group[0]) + elif len(group) is 2: + ret.update(range(group[0], group[1] + 1)) + + return ret + + +def convert_to_hyphen(core_ids: Iterable[int]) -> str: + # TODO + return ','.join(map(str, set(core_ids))) diff --git a/isolating_controller/utils/numa_topology.py b/isolating_controller/utils/numa_topology.py new file mode 100644 index 0000000..c3868c0 --- /dev/null +++ b/isolating_controller/utils/numa_topology.py @@ -0,0 +1,58 @@ +# coding: UTF-8 + +from pathlib import Path +from typing import Dict, Mapping, Set + +from .hyphen import convert_to_set + +_BASE_PATH: Path = Path('/sys/devices/system/node') + + +def get_mem_topo() -> Set[int]: + has_memory_path = _BASE_PATH / 'has_memory' + + with has_memory_path.open() as fp: + line: str = fp.readline() + mem_topo = convert_to_set(line) + + # TODO: get_mem_topo can be enhanced by using real numa memory access latency + + return mem_topo + + +def cur_online_nodes() -> Set[int]: + online_path: Path = _BASE_PATH / 'online' + + with online_path.open() as fp: + line: str = fp.readline() + node_list = convert_to_set(line) + + return node_list + + +def core_belongs_to(socket_id: int) -> Set[int]: + cpulist_path: Path = _BASE_PATH / f'node{socket_id}/cpulist' + + with cpulist_path.open() as fp: + line: str = fp.readline() + return convert_to_set(line) + + +def _node_to_core() -> Dict[int, Set[int]]: + node_list = cur_online_nodes() + return dict((socket_id, core_belongs_to(socket_id)) for socket_id in node_list) + + +def _core_to_node() -> Dict[int, int]: + ret_dict: Dict[int, int] = dict() + node_list = cur_online_nodes() + + for socket_id in node_list: + for core_id in core_belongs_to(socket_id): + ret_dict[core_id] = socket_id + + return ret_dict + + +node_to_core: Mapping[int, Set[int]] = _node_to_core() # key: socket id, value: corresponding core ids +core_to_node: Mapping[int, int] = _core_to_node() # key: core id, value: corresponding socket id diff --git a/isolating_controller/utils/resctrl.py b/isolating_controller/utils/resctrl.py new file mode 100644 index 0000000..6c15a0f --- /dev/null +++ b/isolating_controller/utils/resctrl.py @@ -0,0 +1,117 @@ +# coding: UTF-8 + +import re +import subprocess +from pathlib import Path +from typing import ClassVar, List, Pattern, Tuple + + +def len_of_mask(mask: str) -> int: + cnt = 0 + num = int(mask, 16) + while num is not 0: + cnt += 1 + num >>= 1 + return cnt + + +def bits_to_mask(bits: int) -> str: + return f'{bits:x}' + + +class ResCtrl: + MOUNT_POINT: ClassVar[Path] = Path('/sys/fs/resctrl') + MAX_MASK: ClassVar[str] = Path('/sys/fs/resctrl/info/L3/cbm_mask').read_text(encoding='ASCII').strip() + MAX_BITS: ClassVar[int] = len_of_mask((MOUNT_POINT / 'info' / 'L3' / 'cbm_mask').read_text()) + MIN_BITS: ClassVar[int] = int((MOUNT_POINT / 'info' / 'L3' / 'min_cbm_bits').read_text()) + MIN_MASK: ClassVar[str] = bits_to_mask(MIN_BITS) + STEP: ClassVar[int] = 1 + _read_regex: ClassVar[Pattern] = re.compile(r'L3:((\d+=[0-9a-fA-F]+;?)*)', re.MULTILINE) + + def __init__(self, group_name: str) -> None: + self._group_name: str = group_name + self._group_path: Path = ResCtrl.MOUNT_POINT / f'{group_name}' + + @property + def group_name(self): + return self._group_name + + @group_name.setter + def group_name(self, new_name): + self._group_name = new_name + self._group_path: Path = ResCtrl.MOUNT_POINT / new_name + + def add_task(self, pid: int) -> None: + subprocess.run(args=('sudo', 'tee', str(self._group_path / 'tasks')), + input=f'{pid}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) + + def assign_llc(self, *masks: str) -> None: + masks = (f'{i}={mask}' for i, mask in enumerate(masks)) + mask = ';'.join(masks) + # subprocess.check_call('ls -ll /sys/fs/resctrl/', shell=True) + subprocess.run(args=('sudo', 'tee', str(self._group_path / 'schemata')), + input=f'L3:{mask}\n', check=True, encoding='ASCII', stdout=subprocess.DEVNULL) + + def read_assigned_llc(self) -> Tuple[int, ...]: + schemata = self._group_path / 'schemata' + if not schemata.is_file(): + raise ProcessLookupError() + + with schemata.open() as fp: + content: str = fp.read().strip() + + l3_schemata = ResCtrl._read_regex.search(content).group(1) + + # example: [('0', '00fff'), ('1', 'fff00')] + pairs: List[Tuple[str, str]] = sorted(tuple(pair.split('=')) for pair in l3_schemata.split(';')) + return tuple(len_of_mask(mask) for socket, mask in pairs) + + @staticmethod + def gen_mask(start: int, end: int = None) -> str: + if end is None or end > ResCtrl.MAX_BITS: + end = ResCtrl.MAX_BITS + + if start < 0: + raise ValueError('start must be greater than 0') + + return format(((1 << (end - start)) - 1) << (ResCtrl.MAX_BITS - end), 'x') + + def remove_group(self) -> None: + subprocess.check_call(args=('sudo', 'rmdir', str(self._group_path))) + + def get_llc_mask(self) -> List[str]: + """ + :return: `socket_masks` which is the elements of list in hex_str + """ + proc = subprocess.Popen(['cat', f'{ResCtrl.MOUNT_POINT}/{self._group_name}/schemata'], + stdout=subprocess.PIPE) + line = proc.communicate()[0].decode().lstrip() + striped_schema_line = line.lstrip('L3:').rstrip('\n').split(';') + socket_masks = list() + for i, item in enumerate(striped_schema_line): + mask = item.lstrip(f'{i}=') + socket_masks.append(mask) + return socket_masks + + @staticmethod + def get_llc_bits_from_mask(input_list: List[str]) -> List[int]: + """ + :param input_list: Assuming the elements of list is hex_str such as "0xfffff" + :return: + """ + output_list = list() + for mask in input_list: + hex_str = mask + hex_int = int(hex_str, 16) + bin_tmp = bin(hex_int) + llc_bits = len(bin_tmp.lstrip('0b')) + output_list.append(llc_bits) + return output_list + + def read_llc_bits(self) -> int: + socket_masks = self.get_llc_mask() + llc_bits_list = ResCtrl.get_llc_bits_from_mask(socket_masks) + ret_llc_bits = 0 + for llc_bits in llc_bits_list: + ret_llc_bits += llc_bits + return ret_llc_bits diff --git a/isolating_controller/workload.py b/isolating_controller/workload.py index edcec88..8929de5 100644 --- a/isolating_controller/workload.py +++ b/isolating_controller/workload.py @@ -2,36 +2,69 @@ from collections import deque from itertools import chain -from typing import Deque, Tuple +from typing import Deque, Iterable, Optional, Set, Tuple -import cpuinfo import psutil from .metric_container.basic_metric import BasicMetric, MetricDiff from .solorun_data.datas import data_map - -L3_SIZE = int(cpuinfo.get_cpu_info()['l3_cache_size'].split()[0]) * 1024 +from .utils import DVFS, ResCtrl, numa_topology +from .utils.cgroup import Cpu, CpuSet class Workload: """ - Workload class This class abstracts the process and contains the related metrics to represent its characteristics - ControlThread schedules the groups of `Workload' instances to enforce their scheduling decisions + Controller schedules the groups of `Workload' instances to enforce their scheduling decisions """ - def __init__(self, name: str, pid: int, perf_pid: int, perf_interval: int) -> None: + def __init__(self, name: str, wl_type: str, pid: int, perf_pid: int, perf_interval: int) -> None: self._name = name + self._wl_type = wl_type self._pid = pid self._metrics: Deque[BasicMetric] = deque() self._perf_pid = perf_pid self._perf_interval = perf_interval self._proc_info = psutil.Process(pid) + self._perf_info = psutil.Process(perf_pid) + + self._cgroup_cpuset = CpuSet(self.group_name) + self._cgroup_cpu = Cpu(self.group_name) + self._resctrl = ResCtrl(self.group_name) + self._dvfs = DVFS(self.group_name) + + # This variable is used to contain the recent avg. status + self._avg_solorun_data: Optional[BasicMetric] = None + + if wl_type == 'bg': + self._avg_solorun_data = data_map[name] + + self._orig_bound_cores: Tuple[int, ...] = tuple(self._cgroup_cpuset.read_cpus()) + self._orig_bound_mems: Set[int] = self._cgroup_cpuset.read_mems() def __repr__(self) -> str: return f'{self._name} (pid: {self._pid})' + def __hash__(self) -> int: + return self._pid + + @property + def cgroup_cpuset(self) -> CpuSet: + return self._cgroup_cpuset + + @property + def cgroup_cpu(self) -> Cpu: + return self._cgroup_cpu + + @property + def resctrl(self) -> ResCtrl: + return self._resctrl + + @property + def dvfs(self) -> DVFS: + return self._dvfs + @property def name(self) -> str: return self._name @@ -40,17 +73,45 @@ def name(self) -> str: def pid(self) -> int: return self._pid + @property + def wl_type(self) -> str: + return self._wl_type + @property def metrics(self) -> Deque[BasicMetric]: return self._metrics @property - def cpuset(self) -> Tuple[int, ...]: - return tuple(self._proc_info.cpu_affinity()) + def bound_cores(self) -> Tuple[int, ...]: + return tuple(self._cgroup_cpuset.read_cpus()) + + @bound_cores.setter + def bound_cores(self, core_ids: Iterable[int]): + self._cgroup_cpuset.assign_cpus(core_ids) @property - def perf_pid(self) -> int: - return self._perf_pid + def orig_bound_cores(self) -> Tuple[int, ...]: + return self._orig_bound_cores + + @orig_bound_cores.setter + def orig_bound_cores(self, orig_bound_cores: Tuple[int, ...]) -> None: + self._orig_bound_cores = orig_bound_cores + + @property + def bound_mems(self) -> Tuple[int, ...]: + return tuple(self._cgroup_cpuset.read_mems()) + + @bound_mems.setter + def bound_mems(self, affinity: Iterable[int]): + self._cgroup_cpuset.assign_mems(affinity) + + @property + def orig_bound_mems(self) -> Set[int]: + return self._orig_bound_mems + + @orig_bound_mems.setter + def orig_bound_mems(self, orig_bound_mems: Set[int]) -> None: + self._orig_bound_mems = orig_bound_mems @property def perf_interval(self): @@ -60,11 +121,28 @@ def perf_interval(self): def is_running(self) -> bool: return self._proc_info.is_running() - def calc_metric_diff(self) -> MetricDiff: - solorun_data = data_map[self.name] - curr_metric: BasicMetric = self._metrics[0] + @property + def group_name(self) -> str: + return f'{self.name}_{self.pid}' - return MetricDiff(curr_metric, solorun_data) + @property + def number_of_threads(self) -> int: + try: + return self._proc_info.num_threads() + except psutil.NoSuchProcess: + return 0 + + @property + def avg_solorun_data(self) -> Optional[BasicMetric]: + return self._avg_solorun_data + + @avg_solorun_data.setter + def avg_solorun_data(self, new_data: BasicMetric) -> None: + self._avg_solorun_data = new_data + + def calc_metric_diff(self, core_norm: float = 1) -> MetricDiff: + curr_metric: BasicMetric = self._metrics[0] + return MetricDiff(curr_metric, self._avg_solorun_data, core_norm) def all_child_tid(self) -> Tuple[int, ...]: try: @@ -74,3 +152,20 @@ def all_child_tid(self) -> Tuple[int, ...]: )) except psutil.NoSuchProcess: return tuple() + + def cur_socket_id(self) -> int: + sockets = frozenset(numa_topology.core_to_node[core_id] for core_id in self.bound_cores) + + # FIXME: hard coded + if len(sockets) is not 1: + raise NotImplementedError(f'Workload spans multiple sockets. {sockets}') + else: + return next(iter(sockets)) + + def pause(self) -> None: + self._proc_info.suspend() + self._perf_info.suspend() + + def resume(self) -> None: + self._proc_info.resume() + self._perf_info.resume() diff --git a/pending_queue.py b/pending_queue.py index e5dbdcf..7b81419 100644 --- a/pending_queue.py +++ b/pending_queue.py @@ -1,7 +1,8 @@ # coding: UTF-8 import logging -from typing import Dict, List, Sized, Tuple, Type +from collections import defaultdict +from typing import DefaultDict, Dict, List, Sized, Tuple, Type from isolating_controller.isolation.policies import IsolationPolicy from isolating_controller.workload import Workload @@ -13,6 +14,7 @@ def __init__(self, policy_type: Type[IsolationPolicy]) -> None: self._bg_q: Dict[Tuple[int, ...], Workload] = dict() self._fg_q: Dict[Tuple[int, ...], Workload] = dict() + self._ready_queue: DefaultDict[int, List[Workload]] = defaultdict(list) self._pending_list: List[IsolationPolicy] = list() def __len__(self) -> int: @@ -20,35 +22,26 @@ def __len__(self) -> int: filter(lambda x: len(x.foreground_workload.metrics) > 0 and len(x.background_workload.metrics) > 0, self._pending_list))) - def add_bg(self, workload: Workload) -> None: - logger = logging.getLogger(__name__) - logger.info(f'{workload} is ready for active as Background') + def add(self, workload: Workload) -> None: + logger = logging.getLogger('monitoring.pending_queue') + logger.info(f'{workload} is ready for active') - # FIXME: hard coded - other_cpuset = tuple(map(lambda x: x - 8, workload.cpuset)) - - if other_cpuset in self._fg_q: - new_group = self._policy_type(self._fg_q[other_cpuset], workload) - self._pending_list.append(new_group) - del self._fg_q[other_cpuset] - - else: - self._bg_q[workload.cpuset] = workload - - def add_fg(self, workload: Workload) -> None: - logger = logging.getLogger(__name__) - logger.info(f'{workload} is ready for active as Foreground') + ready_queue = self._ready_queue[workload.cur_socket_id()] + ready_queue.append(workload) # FIXME: hard coded - other_cpuset = tuple(map(lambda x: x + 8, workload.cpuset)) - - if other_cpuset in self._bg_q: - new_group = self._policy_type(self._bg_q[other_cpuset], workload) + if len(ready_queue) is 2 and ready_queue[0].wl_type != ready_queue[1].wl_type: + if ready_queue[0].wl_type == 'fg': + fg = ready_queue[0] + bg = ready_queue[1] + else: + fg = ready_queue[1] + bg = ready_queue[0] + + new_group = self._policy_type(fg, bg) self._pending_list.append(new_group) - del self._bg_q[other_cpuset] - else: - self._fg_q[workload.cpuset] = workload + self._ready_queue[workload.cur_socket_id()] = list() def pop(self) -> IsolationPolicy: if len(self) is 0: diff --git a/polling_thread.py b/polling_thread.py new file mode 100644 index 0000000..aab6e5f --- /dev/null +++ b/polling_thread.py @@ -0,0 +1,113 @@ +# coding: UTF-8 + +import functools +import json +import logging +from threading import Thread + +import pika +import psutil +from pika import BasicProperties +from pika.adapters.blocking_connection import BlockingChannel +from pika.spec import Basic + +from isolating_controller.metric_container.basic_metric import BasicMetric +from isolating_controller.workload import Workload +from pending_queue import PendingQueue + + +class Singleton(type): + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) + return cls._instances[cls] + + +class PollingThread(Thread, metaclass=Singleton): + def __init__(self, metric_buf_size: int, pending_queue: PendingQueue) -> None: + super().__init__(daemon=True) + self._metric_buf_size = metric_buf_size + + self._rmq_host = 'localhost' + self._rmq_creation_queue = 'workload_creation' + + self._pending_wl = pending_queue + + def _cbk_wl_creation(self, ch: BlockingChannel, method: Basic.Deliver, _: BasicProperties, body: bytes) -> None: + ch.basic_ack(method.delivery_tag) + + arr = body.decode().strip().split(',') + + logger = logging.getLogger('monitoring.workload_creation') + logger.debug(f'{arr} is received from workload_creation queue') + + if len(arr) != 5: + return + + wl_identifier, wl_type, pid, perf_pid, perf_interval = arr + pid = int(pid) + perf_pid = int(perf_pid) + perf_interval = int(perf_interval) + item = wl_identifier.split('_') + wl_name = item[0] + + if not psutil.pid_exists(pid): + return + + workload = Workload(wl_name, wl_type, pid, perf_pid, perf_interval) + if wl_type == 'bg': + logger.info(f'{workload} is background process') + else: + logger.info(f'{workload} is foreground process') + + self._pending_wl.add(workload) + + wl_queue_name = '{}({})'.format(wl_name, pid) + ch.queue_declare(wl_queue_name) + ch.basic_consume(functools.partial(self._cbk_wl_monitor, workload), wl_queue_name) + + def _cbk_wl_monitor(self, workload: Workload, + ch: BlockingChannel, method: Basic.Deliver, _: BasicProperties, body: bytes) -> None: + metric = json.loads(body.decode()) + ch.basic_ack(method.delivery_tag) + + item = BasicMetric(metric['l2miss'], + metric['l3miss'], + metric['instructions'], + metric['cycles'], + metric['stall_cycles'], + metric['wall_cycles'], + metric['intra_coh'], + metric['inter_coh'], + metric['llc_size'], + metric['local_mem'], + metric['remote_mem'], + workload.perf_interval) + + logger = logging.getLogger(f'monitoring.metric.{workload}') + logger.debug(f'{metric} is given from ') + + metric_que = workload.metrics + + if len(metric_que) == self._metric_buf_size: + metric_que.pop() + + metric_que.appendleft(item) + + def run(self) -> None: + connection = pika.BlockingConnection(pika.ConnectionParameters(host=self._rmq_host)) + channel = connection.channel() + + channel.queue_declare(self._rmq_creation_queue) + channel.basic_consume(self._cbk_wl_creation, self._rmq_creation_queue) + + try: + logger = logging.getLogger('monitoring') + logger.debug('starting consuming thread') + channel.start_consuming() + + except KeyboardInterrupt: + channel.close() + connection.close() diff --git a/solorun_data/8core/bt.json b/solorun_data/8core/bt.json new file mode 100644 index 0000000..b84f233 --- /dev/null +++ b/solorun_data/8core/bt.json @@ -0,0 +1,15 @@ +{ + "name": "BT", + "runtime": 180.0266306400299, + "l2miss": 209131122.35161108, + "l3miss": 112225709.15771621, + "instructions": 37314385197.06614, + "cycles": 16792362436.009045, + "stall_cycles": 3760861210.672696, + "wall_cycles": 2114139368.920294, + "intra_coh": 21035.449406444317, + "inter_coh": 15877.778405879028, + "llc_size": 41439544.54618474, + "local_mem": 8457671237.173544, + "remote_mem": 69865673.44262296 +} \ No newline at end of file diff --git a/solorun_data/8core/canneal.json b/solorun_data/8core/canneal.json index 00c56bc..9fb74ca 100644 --- a/solorun_data/8core/canneal.json +++ b/solorun_data/8core/canneal.json @@ -1,15 +1,15 @@ { "name": "canneal", - "runtime": 50.875282287597656, - "l2miss": 60792682.894211575, - "l3miss": 41080933.25349302, - "instructions": 2573983428.3233533, - "cycles": 10176539363.932137, - "stall_cycles": 8380277883.153692, - "wall_cycles": 2100813560.0598803, - "intra_coh": 274952.89421157684, - "inter_coh": 0.01996007984031936, + "runtime": 58.566492795944214, + "l2miss": 55642619.19650655, + "l3miss": 36857580.69868996, + "instructions": 2294152843.772926, + "cycles": 7968185474.899564, + "stall_cycles": 6375642091.790394, + "wall_cycles": 2109140231.161572, + "intra_coh": 381280.0349344978, + "inter_coh": 6730.183406113537, "llc_size": 41439544.54618474, - "local_mem": 3492287862.0359282, - "remote_mem": 4319358.72255489 + "local_mem": 2853812912.6288214, + "remote_mem": 7267913.781659389 } \ No newline at end of file diff --git a/solorun_data/8core/facesim.json b/solorun_data/8core/facesim.json new file mode 100644 index 0000000..4751737 --- /dev/null +++ b/solorun_data/8core/facesim.json @@ -0,0 +1,15 @@ +{ + "name": "facesim", + "runtime": 90.43694233894348, + "l2miss": 166282636.99095023, + "l3miss": 24719981.719457015, + "instructions": 25880380705.06787, + "cycles": 13042011225.690044, + "stall_cycles": 2752212959.6040726, + "wall_cycles": 2112787419.7511313, + "intra_coh": 3078449.954751131, + "inter_coh": 13725.961538461539, + "llc_size": 41439544.54618474, + "local_mem": 1171378898.8235295, + "remote_mem": 13001927.239819003 +} \ No newline at end of file diff --git a/solorun_data/8core/fluidanimate.json b/solorun_data/8core/fluidanimate.json index 4d4f2c4..bb1a841 100644 --- a/solorun_data/8core/fluidanimate.json +++ b/solorun_data/8core/fluidanimate.json @@ -1,15 +1,15 @@ { "name": "fluidanimate", - "runtime": 58.332513093948364, - "l2miss": 79196720.6445993, - "l3miss": 50765289.181184664, - "instructions": 34181753854.285713, - "cycles": 19516734522.456444, - "stall_cycles": 2102575306.5156794, - "wall_cycles": 2100813560.0598803, - "intra_coh": 5085844.651567944, - "inter_coh": 0.3484320557491289, + "runtime": 70.6184606552124, + "l2miss": 66363428.112798266, + "l3miss": 41495381.67751265, + "instructions": 28449764402.603035, + "cycles": 15807868146.637745, + "stall_cycles": 3367753444.584237, + "wall_cycles": 2111054726.59436, + "intra_coh": 4452528.806941432, + "inter_coh": 13572.002892263197, "llc_size": 415121465.087108, - "local_mem": 3349890907.8745646, - "remote_mem": 18919809.337979093 + "local_mem": 2700881194.3890095, + "remote_mem": 44826055.35791756 } \ No newline at end of file diff --git a/solorun_data/8core/freqmine.json b/solorun_data/8core/freqmine.json index 9780603..fe6c887 100644 --- a/solorun_data/8core/freqmine.json +++ b/solorun_data/8core/freqmine.json @@ -1,15 +1,15 @@ { "name": "freqmine", - "runtime": 69.44226408004761, - "l2miss": 68033447.65306123, - "l3miss": 10817418.483965015, - "instructions": 35975762909.854225, - "cycles": 19932432780.072887, - "stall_cycles": 4954574076.209912, - "wall_cycles": 2100459532.9446065, - "intra_coh": 17507614.15451895, - "inter_coh": 0.29154518950437314, + "runtime": 85.67928528785706, + "l2miss": 57477559.75029726, + "l3miss": 9170439.53626635, + "instructions": 29334012536.206898, + "cycles": 16076459416.147444, + "stall_cycles": 3861194938.287753, + "wall_cycles": 2110738194.9702735, + "intra_coh": 14914421.248513674, + "inter_coh": 15639.417360285373, "llc_size": 385291493.877551, - "local_mem": 577093202.0991254, - "remote_mem": 3535695.860058309 + "local_mem": 495354752.15219975, + "remote_mem": 6255921.617122473 } \ No newline at end of file diff --git a/solorun_data/8core/kmeans.json b/solorun_data/8core/kmeans.json index 43c65c5..5f5dce8 100644 --- a/solorun_data/8core/kmeans.json +++ b/solorun_data/8core/kmeans.json @@ -1,15 +1,15 @@ { "name": "kmeans", - "runtime": 31.43491005897522, - "l2miss": 192604611.46579805, - "l3miss": 23967362.54071661, - "instructions": 17425663303.094463, - "cycles": 11884070884.723127, - "stall_cycles": 4724979741.205212, - "wall_cycles": 2101847222.2475572, - "intra_coh": 133296346.05863193, - "inter_coh": 2.2801302931596092, + "runtime": 40.35035014152527, + "l2miss": 185422775.6756757, + "l3miss": 19463647.593307592, + "instructions": 13773966502.368084, + "cycles": 9490958570.579151, + "stall_cycles": 3637396000.0, + "wall_cycles": 2109259512.4581723, + "intra_coh": 127064823.73230374, + "inter_coh": 11076.190476190477, "llc_size": 40324710.4, - "local_mem": 1667139777.4592834, - "remote_mem": 5069967.426710098 + "local_mem": 1297946805.868726, + "remote_mem": 6836998.918918919 } \ No newline at end of file diff --git a/solorun_data/8core/nn.json b/solorun_data/8core/nn.json index 42bab74..df5d6a2 100644 --- a/solorun_data/8core/nn.json +++ b/solorun_data/8core/nn.json @@ -1,15 +1,15 @@ { "name": "nn", - "runtime": 65.96833348274231, - "l2miss": 98489406.00308642, - "l3miss": 8380.293209876543, - "instructions": 10738637686.69753, - "cycles": 20138784439.367287, - "stall_cycles": 12751693133.67284, - "wall_cycles": 2103901838.425926, - "intra_coh": 70078248.8425926, - "inter_coh": 1.9290123456790123, + "runtime": 74.64870262145996, + "l2miss": 93579887.43448275, + "l3miss": 1771182.1103448276, + "instructions": 9515542575.682758, + "cycles": 16870611064.193104, + "stall_cycles": 10460715115.931034, + "wall_cycles": 2114596987.3655174, + "intra_coh": 63552416.08275862, + "inter_coh": 22423.144827586206, "llc_size": 40148534.0621118, - "local_mem": 165206344.69135803, - "remote_mem": 724132.3456790124 + "local_mem": 131959673.82068965, + "remote_mem": 14083460.413793104 } \ No newline at end of file diff --git a/solorun_data/8core/particlefilter.json b/solorun_data/8core/particlefilter.json new file mode 100644 index 0000000..2d5a3e9 --- /dev/null +++ b/solorun_data/8core/particlefilter.json @@ -0,0 +1,15 @@ +{ + "name": "particlefilter", + "runtime": 88.62292170524597, + "l2miss": 1644216187.5072298, + "l3miss": 716752.0994794678, + "instructions": 26348741815.98612, + "cycles": 9531916426.674377, + "stall_cycles": 888746471.1162521, + "wall_cycles": 2112264332.689416, + "intra_coh": 25106.81318681319, + "inter_coh": 13258.438403701562, + "llc_size": 41439544.54618474, + "local_mem": 45910071.67148641, + "remote_mem": 1370608.3053788315 +} \ No newline at end of file diff --git a/solorun_data/8core/raytrace.json b/solorun_data/8core/raytrace.json index 29dc9a3..2d1be8d 100644 --- a/solorun_data/8core/raytrace.json +++ b/solorun_data/8core/raytrace.json @@ -1,15 +1,15 @@ { "name": "raytrace", - "runtime": 75.00958156585693, - "l2miss": 37303490.04048583, - "l3miss": 8992488.259109313, - "instructions": 15097455247.354925, - "cycles": 7698034081.48448, - "stall_cycles": 2193381188.205128, - "wall_cycles": 2100733789.3387315, - "intra_coh": 603332.7395411606, - "inter_coh": 7.57085020242915, + "runtime": 96.63842177391052, + "l2miss": 29602739.589689635, + "l3miss": 7303374.339821147, + "instructions": 11771374169.647552, + "cycles": 5870027242.009469, + "stall_cycles": 1570524903.093109, + "wall_cycles": 2107925825.0184112, + "intra_coh": 478563.11415044713, + "inter_coh": 6612.183061546555, "llc_size": 329469194.70985156, - "local_mem": 652059320.48583, - "remote_mem": 2034180.8367071524 + "local_mem": 511230443.26144135, + "remote_mem": 2943431.7096265126 } \ No newline at end of file diff --git a/solorun_data/8core/sp.json b/solorun_data/8core/sp.json index 8fc70e0..3f48687 100644 --- a/solorun_data/8core/sp.json +++ b/solorun_data/8core/sp.json @@ -1,15 +1,15 @@ { "name": "SP", - "runtime": 127.34041666984558, - "l2miss": 811554100.6671963, - "l3miss": 395381549.872915, - "instructions": 31966554430.548054, - "cycles": 20707372417.760128, - "stall_cycles": 8246697940.540112, - "wall_cycles": 2103639492.8911834, - "intra_coh": 9351.890389197775, - "inter_coh": 0.023828435266084195, + "runtime": 139.1068513393402, + "l2miss": 732227921.8221735, + "l3miss": 372740335.3311379, + "instructions": 29445354524.156605, + "cycles": 16824258389.937798, + "stall_cycles": 5539390817.672887, + "wall_cycles": 2112598046.6154408, + "intra_coh": 60722.45883644347, + "inter_coh": 15559.180387852177, "llc_size": 40323737.27388535, - "local_mem": 30224151708.975376, - "remote_mem": 124499140.01588562 + "local_mem": 27333130453.56751, + "remote_mem": 225875950.7647274 } \ No newline at end of file diff --git a/solorun_data/8core/streamcluster.json b/solorun_data/8core/streamcluster.json index 299adb9..d8e33d2 100644 --- a/solorun_data/8core/streamcluster.json +++ b/solorun_data/8core/streamcluster.json @@ -1,15 +1,15 @@ { "name": "streamcluster", - "runtime": 100.40241861343384, - "l2miss": 173347568.74529484, - "l3miss": 147510949.77415305, - "instructions": 10874242181.191969, - "cycles": 20151487489.05897, - "stall_cycles": 14274633174.85571, - "wall_cycles": 2101089271.7691345, - "intra_coh": 14622179.72396487, - "inter_coh": 0.12547051442910917, + "runtime": 108.04844522476196, + "l2miss": 168123959.88711193, + "l3miss": 141570986.0583255, + "instructions": 10224582760.649107, + "cycles": 16460289317.262463, + "stall_cycles": 10945278054.506115, + "wall_cycles": 2110291760.0282218, + "intra_coh": 13866344.04515522, + "inter_coh": 7407.25305738476, "llc_size": 41851667.692307696, - "local_mem": 9555511427.051443, - "remote_mem": 11094249.836888332 + "local_mem": 9034444626.60395, + "remote_mem": 15611501.335841957 } \ No newline at end of file diff --git a/solorun_data/8core/swaptions.json b/solorun_data/8core/swaptions.json new file mode 100644 index 0000000..2e44d8c --- /dev/null +++ b/solorun_data/8core/swaptions.json @@ -0,0 +1,15 @@ +{ + "name": "swaptions", + "runtime": 51.841299295425415, + "l2miss": 7875566.221335992, + "l3miss": 70218.84346959123, + "instructions": 30888370965.343967, + "cycles": 16816697239.282152, + "stall_cycles": 3303835114.4366903, + "wall_cycles": 2116106348.8534398, + "intra_coh": 6450924.366899301, + "inter_coh": 21939.86041874377, + "llc_size": 41439544.54618474, + "local_mem": 602434.6161515453, + "remote_mem": 165963.54935194418 +} \ No newline at end of file diff --git a/solorun_data/8core/ua.json b/solorun_data/8core/ua.json new file mode 100644 index 0000000..b3d611b --- /dev/null +++ b/solorun_data/8core/ua.json @@ -0,0 +1,15 @@ +{ + "name": "UA", + "runtime": 191.63576126098633, + "l2miss": 234905209.9230565, + "l3miss": 161282014.08331123, + "instructions": 20563319157.1186, + "cycles": 15614710154.592731, + "stall_cycles": 5650998669.477315, + "wall_cycles": 2113380014.3857787, + "intra_coh": 354513.00079596706, + "inter_coh": 14184.600689838153, + "llc_size": 41439544.54618474, + "local_mem": 15061261994.75723, + "remote_mem": 95241943.85778722 +} \ No newline at end of file