Skip to content

Commit

Permalink
is_larger / sort-by-rss: use oom_score on zombie main thread, add moc…
Browse files Browse the repository at this point in the history
…k tests

The previous rss estimation was very crude. Also
allows to get rid of meminfo_t for is_larger.

Also add comprehensive unit tests for is_larger.

#309
  • Loading branch information
rfjakob committed Apr 4, 2024
1 parent 9bc52bf commit 2948234
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 34 deletions.
64 changes: 35 additions & 29 deletions kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ int kill_wait(const poll_loop_args_t* args, pid_t pid, int sig)
// than our current `victim`.
// In the process, it fills the `cur` structure. It does so lazily, meaning
// it only fills the fields it needs to make a decision.
bool is_larger(const poll_loop_args_t* args, const meminfo_t* m, const procinfo_t* victim, procinfo_t* cur)
bool is_larger(const poll_loop_args_t* args, const procinfo_t* victim, procinfo_t* cur)
{
if (cur->pid <= 1) {
// Let's not kill init.
Expand Down Expand Up @@ -328,24 +328,31 @@ bool is_larger(const poll_loop_args_t* args, const meminfo_t* m, const procinfo_
}
}

// find process with the largest rss
if (args->sort_by_rss) {
/* find process with the largest rss */

// Zombie main thread or other shenanigans?
if (cur->VmRSSkiB == 0 && cur->badness > 0) {
// Acc. to https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/proc/base.c?h=v6.8#n561
// oom_score goes from 0 to 2000.
cur->VmRSSkiB = (m->MemTotalKiB + m->SwapTotalKiB) * cur->badness / 2000;
warn("%s pid %d: rss=0 but oom_score=%d. Zombie main thread? Estimating rss=%lld\n",
__func__, cur->pid, cur->badness, cur->VmRSSkiB);
}

if (cur->VmRSSkiB < victim->VmRSSkiB) {
return false;
// Case 1: neither victim nor cur have rss=0 (zombie main thread).
// This is the usual case.
if (cur->VmRSSkiB > 0 && victim->VmRSSkiB > 0) {
if (cur->VmRSSkiB < victim->VmRSSkiB) {
return false;
}
if (cur->VmRSSkiB == victim->VmRSSkiB && cur->badness <= victim->badness) {
return false;
}
}

if (cur->VmRSSkiB == victim->VmRSSkiB && cur->badness <= victim->badness) {
return false;
// Case 2: one (or both) have rss=0 (zombie main thread)
else {
if (cur->VmRSSkiB == 0) {
// only print the warning when the zombie is first seen, i.e. as "cur"
warn("%s: pid %d: rss=0 but oom_score=%d. Zombie main thread? Using oom_score for decision.\n",
__func__, cur->pid, cur->badness);
}
if (cur->badness < victim->badness) {
return false;
}
if (cur->badness == victim->badness && cur->VmRSSkiB <= victim->VmRSSkiB) {
return false;
}
}
} else {
/* find process with the largest oom_score */
Expand All @@ -370,41 +377,40 @@ bool is_larger(const poll_loop_args_t* args, const meminfo_t* m, const procinfo_
return false;
}
}
return true;
}

// Looks like we have a new victim. Fill out remaining fields
void fill_informative_fields(procinfo_t* cur)
{
if (strlen(cur->name) == 0) {
int res = get_comm(cur->pid, cur->name, sizeof(cur->name));
if (res < 0) {
debug("pid %d: error reading process name: %s\n", cur->pid, strerror(-res));
return false;
debug("%s: pid %d: error reading process name: %s\n", __func__, cur->pid, strerror(-res));
}
}
/* read cmdline */
{
if (strlen(cur->cmdline) == 0) {
int res = get_cmdline(cur->pid, cur->cmdline, sizeof(cur->cmdline));
if (res < 0) {
debug("pid %d: error reading process cmdline: %s\n", cur->pid, strerror(-res));
return false;
debug("%s: pid %d: error reading process cmdline: %s\n", __func__, cur->pid, strerror(-res));
}
}

return true;
}

// debug_print_procinfo pretty-prints the process information in `cur`.
void debug_print_procinfo(const procinfo_t* cur)
void debug_print_procinfo(procinfo_t* cur)
{
if (!enable_debug) {
return;
}
fill_informative_fields(cur);
debug("pid %5d: badness %3d VmRSS %7lld uid %4d oom_score_adj %4d \"%s\"",
cur->pid, cur->badness, cur->VmRSSkiB, cur->uid, cur->oom_score_adj, cur->name);
}

/*
* Find the process with the largest oom_score or rss(when flag --sort-by-rss is set).
*/
procinfo_t find_largest_process(const poll_loop_args_t* args, const meminfo_t* m)
procinfo_t find_largest_process(const poll_loop_args_t* args)
{
DIR* procdir = opendir(procdir_path);
if (procdir == NULL) {
Expand Down Expand Up @@ -440,7 +446,7 @@ procinfo_t find_largest_process(const poll_loop_args_t* args, const meminfo_t* m
/* omitted fields are set to zero */
};

bool larger = is_larger(args, m, &victim, &cur);
bool larger = is_larger(args, &victim, &cur);

debug_print_procinfo(&cur);

Expand Down
3 changes: 2 additions & 1 deletion kill.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ typedef struct {
} poll_loop_args_t;

void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victim);
procinfo_t find_largest_process(const poll_loop_args_t* args, const meminfo_t* m);
procinfo_t find_largest_process(const poll_loop_args_t* args);
bool is_larger(const poll_loop_args_t* args, const procinfo_t* victim, procinfo_t* cur);

#endif
8 changes: 4 additions & 4 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ double min(double x, double y)
// Dry-run oom kill to make sure that
// (1) it works (meaning /proc is accessible)
// (2) the stack grows to maximum size before calling mlockall()
static void startup_selftests(poll_loop_args_t* args, const meminfo_t* m)
static void startup_selftests(poll_loop_args_t* args)
{
{
debug("%s: dry-running oom kill...\n", __func__);
procinfo_t victim = find_largest_process(args, m);
procinfo_t victim = find_largest_process(args);
kill_process(args, 0, &victim);
}
if (args->notify_ext) {
Expand Down Expand Up @@ -369,7 +369,7 @@ int main(int argc, char* argv[])
fprintf(stderr, " SIGKILL when mem <= " PRIPCT " and swap <= " PRIPCT "\n",
args.mem_kill_percent, args.swap_kill_percent);

startup_selftests(&args, &m);
startup_selftests(&args);

int err = mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT);
// kernels older than 4.4 don't support MCL_ONFAULT. Retry without it.
Expand Down Expand Up @@ -477,7 +477,7 @@ static void poll_loop(const poll_loop_args_t* args)
args->mem_term_percent, args->swap_term_percent);
}
if (sig) {
procinfo_t victim = find_largest_process(args, &m);
procinfo_t victim = find_largest_process(args);
/* The run time of find_largest_process is proportional to the number
* of processes, and takes 2.5ms on my box with a running Gnome desktop (try "make bench").
* This is long enough that the situation may have changed in the meantime,
Expand Down
Binary file added perf.data
Binary file not shown.
Binary file added perf.data.old
Binary file not shown.
78 changes: 78 additions & 0 deletions testsuite_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
Expand All @@ -12,6 +13,9 @@ import (
"time"
)

// #include "meminfo.h"
import "C"

type exitVals struct {
stdout string
stderr string
Expand Down Expand Up @@ -145,3 +149,77 @@ func extractCmdExitCode(err error) int {
log.Panicf("could not decode error %#v", err)
return 0
}

type mockProcProcess struct {
pid int
state string // set to "R" when empty
oom_score int
VmRSSkiB int
comm string
num_threads int // set to 1 when zero
}

func (m *mockProcProcess) toProcinfo_t() (p C.procinfo_t) {
p.pid = C.int(m.pid)
p.badness = C.int(m.oom_score)
p.VmRSSkiB = C.longlong(m.VmRSSkiB)
for i, v := range []byte(m.comm) {
p.name[i] = C.char(v)
}
return p
}

func mockProc(t *testing.T, procs []mockProcProcess) {
mockProcdir, err := ioutil.TempDir("", t.Name())
if err != nil {
t.Fatal(err)
}
procdir_path(mockProcdir)

for _, p := range procs {
if p.state == "" {
p.state = "R"
}
if p.num_threads == 0 {
p.num_threads = 1
}

pidDir := fmt.Sprintf("%s/%d", mockProcdir, int(p.pid))
if err := os.Mkdir(pidDir, 0755); err != nil {
t.Fatal(err)
}
// statm
//
// rss = 2nd field, in pages. The other fields are not used by earlyoom.
rss := p.VmRSSkiB * 1024 / os.Getpagesize()
content := []byte(fmt.Sprintf("1 %d 3 4 5 6 7\n", rss))
if err := ioutil.WriteFile(pidDir+"/statm", content, 0444); err != nil {
t.Fatal(err)
}
// stat
//
// Real /proc/pid/stat string for gnome-shell
template := "549077 (%s) S 547891 549077 549077 0 -1 4194560 245592 104 342 5 108521 28953 0 1 20 0 %d 0 4816953 5260238848 65528 18446744073709551615 94179647238144 94179647245825 140730757359824 0 0 0 0 16781312 17656 0 0 0 17 1 0 0 0 0 0 94179647252976 94179647254904 94179672109056 140730757367876 140730757367897 140730757367897 140730757369827 0\n"
content = []byte(fmt.Sprintf(template, p.comm, p.num_threads))
if err := ioutil.WriteFile(pidDir+"/stat", content, 0444); err != nil {
t.Fatal(err)
}
// oom_score
content = []byte(fmt.Sprintf("%d\n", p.oom_score))
if err := ioutil.WriteFile(pidDir+"/oom_score", content, 0444); err != nil {
t.Fatal(err)
}
// oom_score_adj
if err := ioutil.WriteFile(pidDir+"/oom_score_adj", []byte("0\n"), 0444); err != nil {
t.Fatal(err)
}
// comm
if err := ioutil.WriteFile(pidDir+"/comm", []byte(p.comm+"\n"), 0444); err != nil {
t.Fatal(err)
}
// cmdline
if err := ioutil.WriteFile(pidDir+"/cmdline", []byte("foo\000-bar\000-baz"), 0444); err != nil {
t.Fatal(err)
}
}
}
49 changes: 49 additions & 0 deletions testsuite_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,55 @@ func Test_parse_proc_pid_stat_Mock(t *testing.T) {
}
}

func permute_is_larger(t *testing.T, sort_by_rss bool, procs []mockProcProcess) {
args := poll_loop_args_t(sort_by_rss)
for i := range procs {
for j := range procs {
// If the entry is later in the list, is_larger should return true.
want := j > i
have := is_larger(&args, procs[i], procs[j])
if want != have {
t.Errorf("j%d/pid%d larger than i%d/pid%d? want=%v have=%v", j, procs[j].pid, i, procs[i].pid, want, have)
}
}
}
}

func Test_is_larger(t *testing.T) {
procs := []mockProcProcess{
// smallest
{pid: 100, oom_score: 100, VmRSSkiB: 1234},
{pid: 101, oom_score: 100, VmRSSkiB: 1238},
{pid: 102, oom_score: 101, VmRSSkiB: 4},
{pid: 103, oom_score: 102, VmRSSkiB: 4},
{pid: 104, oom_score: 103, VmRSSkiB: 0, num_threads: 2}, // zombie main thread
// largest
}

mockProc(t, procs)
t.Logf("procdir_path=%q", procdir_path(""))

permute_is_larger(t, false, procs)
}

func Test_is_larger_by_rss(t *testing.T) {
procs := []mockProcProcess{
// smallest
{pid: 100, oom_score: 100, VmRSSkiB: 4},
{pid: 101, oom_score: 100, VmRSSkiB: 8},
{pid: 102, oom_score: 101, VmRSSkiB: 8},
{pid: 103, oom_score: 99, VmRSSkiB: 12},
{pid: 104, oom_score: 102, VmRSSkiB: 0, num_threads: 2}, // zombie main thread
{pid: 105, oom_score: 102, VmRSSkiB: 12},
// largest
}

mockProc(t, procs)
t.Logf("procdir_path=%q", procdir_path(""))

permute_is_larger(t, true, procs)
}

func Benchmark_parse_meminfo(b *testing.B) {
for n := 0; n < b.N; n++ {
parse_meminfo()
Expand Down

0 comments on commit 2948234

Please sign in to comment.