Skip to content

Commit

Permalink
Add triage, which clusters together failures to be kinder to humans.
Browse files Browse the repository at this point in the history
This loads failures from BigQuery, digests it into manageable clusters,
and uploads the resulting JSON to a GCS bucket for interactive browsing.

The frontend shows clusters with occurrence graphs and allows data to be
filtered.
  • Loading branch information
Ryan Hitchman committed Mar 23, 2017
1 parent 12b94b1 commit 7c6ea4c
Show file tree
Hide file tree
Showing 13 changed files with 1,505 additions and 1 deletion.
1 change: 1 addition & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ filegroup(
"//scenarios:all-srcs",
"//testgrid/config:all-srcs",
"//testgrid/jenkins_verify:all-srcs",
"//triage:all-srcs",
"//velodrome:all-srcs",
"//vendor:all-srcs",
"//verify:all-srcs",
Expand Down
17 changes: 17 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,20 @@ git_repository(
load("@io_bazel_rules_go//go:def.bzl", "go_repositories")

go_repositories()

git_repository(
name = "org_pubref_rules_node",
tag = "v0.3.3",
remote = "https://github.com/pubref/rules_node.git",
)

load("@org_pubref_rules_node//node:rules.bzl", "node_repositories", "npm_repository")

node_repositories()

npm_repository(
name = "npm_mocha",
deps = {
"mocha": "3.2.0",
},
)
2 changes: 1 addition & 1 deletion gubernator/static/build_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ describe('build', function() {
expect('handles color+bold',
'foo \x1b[90m\x1b[1mdarkgray\x1b[0m', 'foo <em><span class="ansi-8">darkgray</span></em>');
});
});
});
28 changes: 28 additions & 0 deletions triage/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
load("@org_pubref_rules_node//node:rules.bzl", "mocha_test")

py_test(
name = "summarize_test",
srcs = [
"summarize.py",
"summarize_test.py",
],
)

mocha_test(
name = "script_test",
main = "script_test.js",
)

filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)

filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
47 changes: 47 additions & 0 deletions triage/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<link rel="stylesheet" type="text/css" href="style.css">
<title>Kubernetes Aggregated Test Results</title>
</head>
<body>
<h1>Kubernetes Failures</h1>
<form id="options" onchange="rerender();">
Sort by
<label><input id="sort" name="sort" type="radio" value="total">total count</label>
<label><input name="sort" type="radio" value="day" checked>count in last day</label>
<label><input name="sort" type="radio" value="message">error message</label>
<br>
<label style="display:none"><input type="checkbox" id="show-normalize">Display normalized errors</label><br>
Include results from:
<label><input type="checkbox" checked id="job-ci">CI</label>
<label><input type="checkbox" id="job-pr">PR</label>
<br><br>
Filter (these are regexes):
<table>
<label><tr><td>Failure text<td><input type="text" id="filter-text"></label><br>
<label><tr><td>Job<td><input type="text" id="filter-job"></label><br>
<label><tr><td>Test<td><input type="text" id="filter-test"></label><br>
</table>
</form>
<div id="summary"></div>
<div id="clusters">
<h2>Loading... <span id="loading-progress"></span></h2>
</div>
</body>
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
<script src="model.js"></script>
<script src="render.js"></script>
<script src="interactive.js"></script>
<script>load();</script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-82843984-4', 'auto');
ga('send', 'pageview');
</script>
</script>
</html>
255 changes: 255 additions & 0 deletions triage/interactive.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
"use strict";

var builds = null;
var clustered = null; // filtered clusters
var clusteredAll = null; // all clusters
var options = null; // user-provided in form or URL
var lastClusterRendered = 0; // for infinite scrolling

// Escape special regex characters for putting a literal into a regex.
// http://stackoverflow.com/a/9310752/3694
RegExp.escape = function(text) {
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
};

// Load options from form inputs, put them in the URL, and return the options dict.
function readOptions() {
var read = id => {
let el = document.getElementById(id);
if (el.type === "checkbox") return el.checked;
if (el.type === "radio") return el.form[el.name].value;
if (el.type === "text") {
if (id.startsWith("filter")) {
if (el.value === "") {
return null;
}
try {
return new RegExp(el.value, "im");
} catch(err) {
console.error("bad regexp", el.value, err);
return new RegExp(RegExp.escape(el.value), "im");
}
} else {
return el.value;
}
}
}

var opts = {
ci: read('job-ci'),
pr: read('job-pr'),
reText: read('filter-text'),
reJob: read('filter-job'),
reTest: read('filter-test'),
showNormalize: read('show-normalize'),
sort: read('sort'),
}

var url = '';
if (!opts.ci) url += '&ci=0';
if (opts.pr) url += '&pr=1';
for (var name of ["text", "job", "test"]) {
var re = opts['re' + name[0].toUpperCase() + name.slice(1)];
if (re) {
var baseRe = re.toString().replace(/im$/, '').replace(/\\\//g, '/').slice(1, -1);
url += '&' + name + '=' + encodeURIComponent(baseRe);
}
}
if (url) {
if (document.location.hash) {
url += document.location.hash;
}
history.replaceState(null, "", "?" + url.slice(1));
} else if (document.location.search) {
history.replaceState(null, "", document.location.pathname + document.location.hash);
}

return opts;
}

// Convert querystring parameters into form inputs.
function setOptionsFromURL() {
// http://stackoverflow.com/a/3855394/3694
var qs = (function(a) {
if (a == "") return {};
var b = {};
for (var i = 0; i < a.length; ++i)
{
var p=a[i].split('=', 2);
if (p.length == 1)
b[p[0]] = "";
else
b[p[0]] = decodeURIComponent(p[1].replace(/\+/g, " "));
}
return b;
})(window.location.search.substr(1).split('&'));

var write = (id, value) => {
if (!value) return;
var el = document.getElementById(id);
if (el.type === "checkbox") el.checked = (value === "1");
if (el.type === "text") el.value = value;
}
write('job-ci', qs.ci);
write('job-pr', qs.pr);
write('filter-text', qs.text);
write('filter-job', qs.job);
write('filter-test', qs.test);
}

// Render up to `count` clusters, with `start` being the first for consideration.
function renderSubset(start, count) {
var top = document.getElementById('clusters');
var n = 0;
var shown = 0;
for (let [key, keyId, text, clusters] of clustered.data) {
if (n++ < start) continue;
shown += renderCluster(top, key, keyId, text, clusters);
lastClusterRendered = n;
if (shown >= count) break;
}
}

// Clear the page and reinitialize the renderer and filtering. Render a few failures.
function rerender(maxCount) {
if (!clusteredAll) return;

options = readOptions();
clustered = clusteredAll.refilter(options);

var top = document.getElementById('clusters');
top.removeChildren();
summary.removeChildren();

var summaryText = `
${clustered.length} clusters of ${clustered.sum} failures`;

if (clustered.sumRecent > 0) {
summaryText += ` (${clustered.sumRecent} in last day)`;
}

summaryText += ` out of ${builds.runCount} builds from ${builds.getStartTime().toLocaleString()} to ${builds.getEndTime().toLocaleString()}.`

summary.innerText = summaryText;

if (clustered.length > 0) {
let graph = addElement(summary, 'div');
renderGraph(graph, clustered.allBuilds());
}

renderSubset(0, maxCount || 10);

drawVisibleGraphs();
}

// Render clusters until a cluster with the given key is found, then scroll to that cluster.
// Bails out early if no cluster with the given key is known.
function renderUntilFound(keyId) {
var el = null;
if (!clustered.byId[keyId]) {
return;
}
while ((el = document.getElementById(keyId)) === null) {
if (lastClusterRendered >= clustered.length)
return;
renderSubset(lastClusterRendered, 50);
}
el.scrollIntoView();

// expand the graph for the selected failure.
drawVisibleGraphs();
}

// When the user scrolls down, render more clusters to provide infinite scrolling.
// This is important to make the first page load fast.
// Also, trigger a debounced lazy graph rendering pass.
function scrollHandler() {
if (!clustered) return;
if (lastClusterRendered < clustered.length) {
var top = document.getElementById('clusters');
if (top.getBoundingClientRect().bottom < 3 * window.innerHeight) {
renderSubset(lastClusterRendered, 10);
}
}
if (drawGraphsTimer) {
clearTimeout(drawGraphsTimer);
}
drawGraphsTimer = setTimeout(drawVisibleGraphs, 50);
}

var drawGraphsTimer = null;

function drawVisibleGraphs() {
for (let el of document.querySelectorAll('div.graph')) {
if (el.children.length > 0) {
continue; // already rendered
}
let rect = el.getBoundingClientRect();
if (0 <= rect.top + kGraphHeight && rect.top - kGraphHeight < window.innerHeight) {
renderGraph(el, clustered.buildsForCluster(el.dataset.cluster));
}
}
}

// If someone clicks on an expandable node, expand it!
function clickHandler(evt) {
var target = evt.target;
if (expand(target)) {
evt.preventDefault();
return false;
}
}

// Download a file from GCS and invoke callback with the result.
// extracted/modified from kubernetes/test-infra/gubernator/static/build.js
function get(uri, callback, onprogress) {
if (uri[0] === '/') {
// Matches /bucket/file/path -> [..., "bucket", "file/path"]
var groups = uri.match(/([^/:]+)\/(.*)/);
var bucket = groups[1], path = groups[2];
var url = 'https://www.googleapis.com/storage/v1/b/' + bucket + '/o/' +
encodeURIComponent(path) + '?alt=media';
} else {
var url = uri;
}
var req = new XMLHttpRequest();
req.open('GET', url);
req.onload = function(resp) {
callback(req);
};
req.onprogress = onprogress;
req.send();
}

// One-time initialization of the whole page.
function load() {
setOptionsFromURL();
google.charts.load('current', {'packages': ['corechart', 'line']});
google.charts.setOnLoadCallback(() => { google.charts.loaded = true });

var setLoading = t => document.getElementById("loading-progress").innerText = t;
var toMB = b => Math.round(b / 1024 / 1024 * 100) / 100;

get('/k8s-gubernator/triage/failure_data.json',
req => {
setLoading(`parsing ${toMB(req.response.length)}MB.`);
var data = JSON.parse(req.response);
setTimeout(() => {
builds = new Builds(data.builds);
clusteredAll = new Clusters(data.clustered);
rerender();
if (window.location.hash) {
renderUntilFound(window.location.hash.slice(1));
}
}, 0);
},
evt => {
if (evt.type === "progress") {
setLoading(`downloaded ${toMB(evt.loaded)}MB`);
}
}
);

document.addEventListener('click', clickHandler);
document.addEventListener('scroll', scrollHandler);
}
Loading

0 comments on commit 7c6ea4c

Please sign in to comment.