-
Notifications
You must be signed in to change notification settings - Fork 1
系统监测
系统检测原理及实现
这里主要讲述Watch_Dogs-Client系统中有关系统监测的原理和实现代码
部分思路参考了deepin操作系统的系统监视器原理介绍文章 - 深度系统监视器原理剖析
在许多类 Unix 计算机系统中, procfs 是 进程 文件系统 (file system) 的缩写,包含一个伪文件系统(启动时动态生成的文件系统),用于通过内核访问进程信息。这个文件系统通常被挂载到 /proc 目录。由于 /proc 不是一个真正的文件系统,它也就不占用存储空间,只是占用有限的内存。
通过解析/proc文件系统的数据可以实现大部分所需的系统检测功能
建议阅读此部分前, 请先行阅读深度系统监视器原理剖析, 以取得更好地理解效果
以下所有的示例代码和实现功能均在 Core/sys_monitor.py 中, 相关单元测试见 Test/test_sys_monitor.py
- /proc 官方文档
- /proc 官方文档(中文)
- wiki - procfs
- System monitor for deepin
- 阿里巴巴的系统监视工具 tasr中的相关源码 mod_proc.c
CPU的占有率计算公式
- workTime = user + nice + system;
- totalTime = return user + nice + system + idle + iowait + irq + softirq + steal;
- cpuPercent = (currentWorkTime - prevWorkTime) / (currentTotalTime - prevTotalTime)
@wrap_process_exceptions
def get_cpu_total_time_by_cores(self):
"""获取各核心cpu时间 - /proc/stat"""
cpu_total_times = {}
with open("/proc/stat", "r") as cpu_stat:
for line in cpu_stat:
if line.startswith("cpu"):
cpu_name = line.split(' ')[0].strip()
if cpu_name != "cpu":
user, nice, system, idle, iowait, irq, softirq, steal, guest, guestnice = \
map(int, line.split(' ')[1:])
cpu_total_times[cpu_name] = [user + nice + system + idle + iowait + irq + softirq + steal,
user + nice + system]
return cpu_total_times
def calc_cpu_percent_by_cores(self):
"""计算CPU各核占用率 (返回的是百分比)"""
cpu_percent_by_cores = {}
if not self.prev_cpu_time_by_cores: # 未初始化
self.prev_cpu_time_by_cores = self.get_cpu_total_time_by_cores()
for cpu_name in self.prev_cpu_time_by_cores.keys():
cpu_percent_by_cores[cpu_name] = 0.
else:
current_cpu_time_by_cores = self.get_cpu_total_time_by_cores()
for cpu_name in current_cpu_time_by_cores.keys():
cpu_percent_by_cores[cpu_name] = round(
(current_cpu_time_by_cores[cpu_name][1] - self.prev_cpu_time_by_cores[cpu_name][1]) * 100.0 / \
(current_cpu_time_by_cores[cpu_name][0] - self.prev_cpu_time_by_cores[cpu_name][0]), 2)
self.prev_cpu_time_by_cores = current_cpu_time_by_cores
return cpu_percent_by_cores
其中 @wrap_process_exceptions
使用于检测是否具有读取权限的装饰器, 下同
计算原理同上, 代码略. 详见 sys_monitor.py#L143-L177
- memoryPercent = (total - available) * 100.0 / total
注意,当前系统使用的内存是由内存总量total减去可用内存aviailable的值来计算的,不能用
memoryPercent = used * 100.0 / total
因为 used 的值不包括一些被内核占用并且永不释放的缓存内存,如果用 used 的方式来计算内存百分比,
会发现最终计算的结果会比实际占用的内存小 15% 左右。
@wrap_process_exceptions
def get_mem_info(self):
"""获取内存信息 - /proc/meminfo"""
with open("/proc/meminfo", "r") as mem_info:
MemTotal = mem_info.readline().split(":")[1].strip().strip("kB")
MemFree = mem_info.readline().split(":")[1].strip().strip("kB")
MemAvailable = mem_info.readline().split(":")[1].strip().strip("kB")
# 只需要前三行
return map(int, [MemTotal, MemFree, MemAvailable])
def calc_mem_percent(self):
"""计算系统内存占用率 (返回的是百分比)"""
MemTotal, MemFree, MemAvailable = self.get_mem_info()
mem_percent = round((MemTotal - MemAvailable) * 100.0 / MemTotal, 2)
return mem_percent
/proc文件系统计算网络上传下载速度的时候, 只能获取某一网卡的数据.
各个网卡数据相加才是总体情况
此外, 这个计算过程也要排除虚拟网卡的影响.
获取所有网卡信息的示例代码
@wrap_process_exceptions
def get_all_net_device():
"""获取所有网卡(不包括本地回环)"""
# Note : 网卡命名规则
# lo - 本地回环
# eth0 - 物理网卡(数字代表第几个,下同)
# wlan0 - 无线网卡
# br0 - 网桥
# ens3 - 虚拟网卡 vps?
# ppp0 - ppp拨号
# tpp0 - ...
devices = []
with open("/proc/net/dev", "r") as net_dev:
for line in net_dev:
if not line.count("lo:") and line.count(":"):
devices.append(line.split(":")[0].strip())
return devices
选择默认网卡的示例代码
def get_default_net_device():
"""获取默认网卡 - 默认选取流量最大的网卡作为默认监控网卡(本地回环除外)"""
devices = get_all_net_device()
default_net_device = 'eth0'
if default_net_device in devices:
return default_net_device
else: # 获取流量最大的网卡作为默认网卡
temp_d = ''
max_byte = -1
for device_name in devices:
if max_byte < sum(get_net_dev_data(device_name)):
max_byte = get_net_dev_data(device_name)
temp_d = device_name
return temp_d
- downloadSpeed = (currentDownloadBytes - prevDownloadBytes) / 1024.0
- uploadSpeed = (currentUploadBytes - prevUploadBytes) / 1024.0
@wrap_process_exceptions
def get_net_dev_data(self, device):
"""获取系统网络数据(某一网卡) - /proc/net/dev"""
receive_bytes = -1
send_bytes = -1
with open("/proc/net/dev", "r") as net_dev:
for line in net_dev:
if line.find(device) != -1:
dev_data = map(int, filter(lambda x: x, line.split(":", 2)[1].strip().split(" ")))
receive_bytes += dev_data[0]
send_bytes += dev_data[8]
return receive_bytes, send_bytes
@wrap_process_exceptions
def calc_net_speed(self, device_name=None):
"""
计算某一网卡的网络速度
:return: [上传速度,下载速度] (单位为Kbps)
"""
if not device_name: # 未指定网卡
device_name = self.get_default_net_device()
if self.prev_net_receive_byte == 0: # 未初始化
self.prev_net_receive_byte, self.prev_net_send_byte = self.get_net_dev_data(device_name)
self.prev_net_time = time()
return 0., 0.
current_net_receive_byte, current_net_send_byte = self.get_net_dev_data(device_name)
current_net_time = time()
download_speed = (current_net_receive_byte - self.prev_net_receive_byte) / 1024.0 / (
current_net_time - self.prev_net_time)
upload_speed = (current_net_send_byte - self.prev_net_send_byte) / 1024.0 / (
current_net_time - self.prev_net_time)
self.prev_net_receive_byte, self.prev_net_send_byte = current_net_receive_byte, current_net_send_byte
self.prev_net_time = current_net_time
return upload_speed, download_speed
CPU硬件信息 - /proc/cpuinf
@wrap_process_exceptions
def get_cpu_info(self):
"""系统CPU信息 - /proc/cpuinfo"""
result = []
c = ""
with open("/proc/cpuinfo", "r") as cpuinfo:
for line in cpuinfo:
if line.startswith("processor"):
c = ""
elif line.startswith("model name"):
c += line.split(":", 1)[1].strip() + " - "
elif line.startswith("cpu MHz"):
c += line.split(":", 1)[1].strip() + "Mhz "
elif line.startswith("siblings"):
c += line.split(":", 1)[1].strip() + " CPUs"
elif line.startswith("power management"):
result.append(c)
return result
系统内核, 版本信息 - /proc/version
@wrap_process_exceptions
def get_sys_info(self):
"""系统信息 - /proc/version"""
sys_info = {"kernel": "", "system": ""}
with open("/proc/version", "r") as version:
sys_info_data = version.readline()
sys_info["kernel"] = sys_info_data.split('(')[0].strip()
sys_info["system"] = sys_info_data.split('(')[3].split(')')[0].strip()
return sys_info
系统启动时间 - /proc/meminfo
@wrap_process_exceptions
def get_sys_total_mem():
"""获取总内存大小 - /proc/meminfo"""
with open("/proc/meminfo", "r") as mem_info:
MemTotal = mem_info.readline().split(":")[1].strip().strip("kB")
return MemTotal
系统平均负载 - /proc/loadavg
@wrap_process_exceptions
def get_sys_loadavg(self):
"""获取系统平均负载 - /proc/loadavg"""
la = {}
with open("/proc/loadavg", "r") as loadavg:
la['lavg_1'], la['lavg_5'], la['lavg_15'], la['nr'], la['last_pid'] = \
loadavg.readline().split()
return la
系统运行时间 - /proc/uptime
@wrap_process_exceptions
def get_sys_uptime(self):
"""获取系统运行时间 - /proc/uptime"""
def second2time_str(sec):
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
d, h = divmod(h, 24)
return "%d Days %d hours %02d min %02d secs" % (d, h, m, s)
ut = {}
cpu_core_num = len(self.get_cpu_total_time_by_cores().keys()) # 计算cpu核数
with open("/proc/uptime", "r") as uptime:
system_uptime, idle_time = map(float, uptime.readline().split())
ut["system_uptime"] = second2time_str(int(system_uptime))
ut["idle_time"] = idle_time
ut["free rate"] = round(idle_time / system_uptime / cpu_core_num, 4)
- readKbs = (currentReadChar - prevReadChar) / 2 / 1000
- writeKbs = (currentWriteChar - prevWriteChar) / 2 / 1000
@wrap_process_exceptions
def disk_io_counters(self):
"""获取磁盘IO数据"""
"""Return disk I/O statistics for every disk installed on the
system as a dict of raw tuples.
"""
# determine partitions we want to look for
def get_partitions():
partitions = []
with open("/proc/partitions") as f:
lines = f.readlines()[2:]
for line in reversed(lines):
_, _, _, name = line.split()
if name[-1].isdigit():
# we're dealing with a partition (e.g. 'sda1'); 'sda' will
# also be around but we want to omit it
partitions.append(name)
else:
if not partitions or not partitions[-1].startswith(name):
# we're dealing with a disk entity for which no
# partitions have been defined (e.g. 'sda' but
# 'sda1' was not around), see:
# https://github.com/giampaolo/psutil/issues/338
partitions.append(name)
return partitions
def get_sector_size(partition):
"""Return the sector size of a partition.
Used by disk_io_counters().
"""
try:
with open("/sys/block/%s/queue/hw_sector_size" % partition, "rt") as f:
return int(f.read())
except (IOError, ValueError):
# man iostat states that sectors are equivalent with blocks and
# have a size of 512 bytes since 2.4 kernels.
return SECTOR_SIZE_FALLBACK
retdict = {}
partitions = get_partitions()
with open("/proc/diskstats") as f:
lines = f.readlines()
for line in lines:
# OK, this is a bit confusing. The format of /proc/diskstats can
# have 3 variations.
# On Linux 2.4 each line has always 15 fields, e.g.:
# "3 0 8 hda 8 8 8 8 8 8 8 8 8 8 8"
# On Linux 2.6+ each line *usually* has 14 fields, and the disk
# name is in another position, like this:
# "3 0 hda 8 8 8 8 8 8 8 8 8 8 8"
# ...unless (Linux 2.6) the line refers to a partition instead
# of a disk, in which case the line has less fields (7):
# "3 1 hda1 8 8 8 8"
# See:
# https://www.kernel.org/doc/Documentation/iostats.txt
# https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats
fields = line.split()
fields_len = len(fields)
if fields_len == 15:
# Linux 2.4
name = fields[3]
reads = int(fields[2])
(reads_merged, rbytes, rtime, writes, writes_merged,
wbytes, wtime, _, busy_time, _) = map(int, fields[4:14])
elif fields_len == 14:
# Linux 2.6+, line referring to a disk
name = fields[2]
(reads, reads_merged, rbytes, rtime, writes, writes_merged,
wbytes, wtime, _, busy_time, _) = map(int, fields[3:14])
elif fields_len == 7:
# Linux 2.6+, line referring to a partition
name = fields[2]
reads, rbytes, writes, wbytes = map(int, fields[3:])
rtime = wtime = reads_merged = writes_merged = busy_time = 0
else:
raise ValueError("not sure how to interpret line %r" % line)
if name in partitions:
ssize = get_sector_size(name)
rbytes *= ssize
wbytes *= ssize
retdict[name] = (reads, writes, rbytes, wbytes, rtime, wtime,
reads_merged, writes_merged, busy_time)
return retdict
def get_disk_io(self):
"""计算磁盘io [读取字节数,写入字节数]"""
rbytes, wbytes = 0, 0
io_data = self.disk_io_counters()
for name in io_data.keys():
rbytes += io_data[name][2]
wbytes += io_data[name][3]
return rbytes, wbytes
@wrap_process_exceptions
def get_disk_stat(style='G'):
"""获取磁盘占用情况"""
# statvfs() http://www.runoob.com/python/os-statvfs.html
# reference pydf - https://github.com/k4rtik/pydf/tree/c59c16df1d1086d03f8948338238bf380431deb9
disk_stat = []
def get_all_mount_points():
"""获取所有挂载点 - /proc/mounts"""
"""
/proc/mounts
Before kernel 2.4.19, this file was a list of all the filesystems currently mounted on the system.
With the introduction of per-process mount namespaces in Linux 2.4.19 (see mount_namespaces(7)), this file became a link
to /proc/self/mounts, which lists the mount points of the process's own mount namespace.
The format of this file is documented in fstab(5).
"""
mount_points = {}
with open("/proc/mounts", "r") as mounts:
for line in mounts.readlines():
spl = line.split()
if len(spl) < 4:
continue
device, mp, typ, opts = spl[0:4]
opts = opts.split(',')
mount_points[mp] = (device, typ, opts)
return mount_points
def is_remote_fs(fs):
"""test if fs (as type) is a remote one"""
# reference pydf - https://github.com/k4rtik/pydf/tree/c59c16df1d1086d03f8948338238bf380431deb9
return fs.lower() in ["nfs", "smbfs", "cifs", "ncpfs", "afs", "coda",
"ftpfs", "mfs", "sshfs", "fuse.sshfs", "nfs4"]
def is_special_fs(fs):
"""test if fs (as type) is a special one
in addition, a filesystem is special if it has number of blocks equal to 0"""
# reference pydf - https://github.com/k4rtik/pydf/tree/c59c16df1d1086d03f8948338238bf380431deb9
return fs.lower() in ["tmpfs", "devpts", "devtmpfs", "proc", "sysfs", "usbfs", "devfs", "fdescfs", "linprocfs"]
mp = get_all_mount_points()
for mount_point in mp.keys():
device, fstype, opts = mp[mount_point]
# 过滤掉非物理磁盘
if is_special_fs(fstype):
continue
try:
disk_status = statvfs(mount_point)
except (OSError, IOError):
continue
# 处理磁盘数据
fs_blocksize = disk_status.f_bsize
if not fs_blocksize:
fs_blocksize = disk_status.f_frsize
free = disk_status.f_bfree * fs_blocksize
total = disk_status.f_blocks * fs_blocksize
avail = disk_status.f_bavail * fs_blocksize
used = total - free
# 忽略系统相关挂载点(大小为0)
if not total:
continue
used_percent = round(used * 100.0 / total, 2)
# 设置返回结果单位(默认为G)
style_size = 1024.0 ** 3
if style == 'M':
style_size = 1024.0 ** 2
elif style == 'T':
style_size = 1024.0 ** 4
# 磁盘状态 : 设备, 文件系统, 总大小, 已用大小, 使用率, 挂载点
disk_stat.append(
(device,
fstype,
round(total / style_size, 2),
round(used / style_size, 2),
used_percent,
mount_point)
)
return disk_stat
如果有任何问题, 请联系邮箱 [email protected]