Skip to content

Commit

Permalink
Merge pull request #712 from Mirantis/ivan4th/diag
Browse files Browse the repository at this point in the history
Virtlet diagnostics
  • Loading branch information
pigmej authored Jul 11, 2018
2 parents 2d3ebd0 + a1c5174 commit 7724a0a
Show file tree
Hide file tree
Showing 51 changed files with 2,338 additions and 284 deletions.
45 changes: 40 additions & 5 deletions cmd/virtlet/virtlet.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/Mirantis/virtlet/pkg/api/virtlet.k8s/v1"
"github.com/Mirantis/virtlet/pkg/cni"
"github.com/Mirantis/virtlet/pkg/config"
"github.com/Mirantis/virtlet/pkg/diag"
"github.com/Mirantis/virtlet/pkg/libvirttools"
"github.com/Mirantis/virtlet/pkg/manager"
"github.com/Mirantis/virtlet/pkg/nsfix"
Expand All @@ -39,12 +40,17 @@ import (
)

const (
wantTapManagerEnv = "WANT_TAP_MANAGER"
nodeNameEnv = "KUBE_NODE_NAME"
wantTapManagerEnv = "WANT_TAP_MANAGER"
nodeNameEnv = "KUBE_NODE_NAME"
diagSocket = "/run/virtlet-diag.sock"
netnsDiagCommand = `if [ -d /var/run/netns ]; then cd /var/run/netns; for ns in *; do echo "*** ${ns} ***"; ip netns exec "${ns}" ip a; ip netns exec "${ns}" ip r; echo; done; fi`
criproxyLogCommand = `nsenter -t 1 -m -u -i journalctl -xe -u criproxy -n 20000 --no-pager || true`
qemuLogDir = "/var/log/libvirt/qemu"
)

var (
dumpConfig = flag.Bool("dump-config", false, "Dump node-specific Virtlet config as a shell script and exit")
dumpDiag = flag.Bool("diag", false, "Dump diagnostics as JSON and exit")
displayVersion = flag.Bool("version", false, "Display version and exit")
versionFormat = flag.String("version-format", "text", "Version format to use (text, short, json, yaml)")
)
Expand All @@ -55,8 +61,8 @@ func configWithDefaults(cfg *v1.VirtletConfig) *v1.VirtletConfig {
return r
}

func runVirtlet(config *v1.VirtletConfig, clientCfg clientcmd.ClientConfig) {
manager := manager.NewVirtletManager(config, nil, clientCfg)
func runVirtlet(config *v1.VirtletConfig, clientCfg clientcmd.ClientConfig, diagSet *diag.Set) {
manager := manager.NewVirtletManager(config, nil, clientCfg, diagSet)
if err := manager.Run(); err != nil {
glog.Errorf("Error: %v", err)
os.Exit(1)
Expand Down Expand Up @@ -106,6 +112,32 @@ func setLogLevel(config *v1.VirtletConfig) {
})
}

func runDiagServer() *diag.Set {
diagSet := diag.NewDiagSet()
diagSet.RegisterDiagSource("ip-a", diag.NewCommandSource("txt", []string{"ip", "a"}))
diagSet.RegisterDiagSource("ip-r", diag.NewCommandSource("txt", []string{"ip", "r"}))
diagSet.RegisterDiagSource("psaux", diag.NewCommandSource("txt", []string{"ps", "aux"}))
diagSet.RegisterDiagSource("netns", diag.NewCommandSource("txt", []string{"/bin/bash", "-c", netnsDiagCommand}))
diagSet.RegisterDiagSource("criproxy", diag.NewCommandSource("log", []string{"/bin/bash", "-c", criproxyLogCommand}))
diagSet.RegisterDiagSource("libvirt-logs", diag.NewLogDirSource(qemuLogDir))
diagSet.RegisterDiagSource("stack", diag.StackDumpSource)
server := diag.NewServer(diagSet)
go func() {
err := server.Serve(diagSocket, nil)
glog.V(1).Infof("Diag server returned: %v", err)
}()
return diagSet
}

func doDiag() {
dr, err := diag.RetrieveDiagnostics(diagSocket)
if err != nil {
glog.Errorf("Failed to retrieve diagnostics: %v", err)
os.Exit(1)
}
os.Stdout.Write(dr.ToJSON())
}

func main() {
nsfix.HandleReexec()
clientCfg := utils.BindFlags(flag.CommandLine)
Expand All @@ -131,9 +163,12 @@ func main() {
glog.Errorf("Error writing config: %v", err)
os.Exit(1)
}
case *dumpDiag:
doDiag()
default:
localConfig = configWithDefaults(localConfig)
go runTapManager(localConfig)
runVirtlet(localConfig, clientCfg)
diagSet := runDiagServer()
runVirtlet(localConfig, clientCfg, diagSet)
}
}
2 changes: 1 addition & 1 deletion cmd/virtletctl/virtletctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ func newRootCmd() *cobra.Command {
flag.CommandLine.Parse([]string{})

client := tools.NewRealKubeClient(clientCfg)
cmd.AddCommand(tools.NewDumpMetadataCmd(client))
cmd.AddCommand(tools.NewVirshCmd(client, os.Stdout))
cmd.AddCommand(tools.NewSSHCmd(client, os.Stdout, ""))
cmd.AddCommand(tools.NewVNCCmd(client, os.Stdout, true))
cmd.AddCommand(tools.NewInstallCmd(cmd, "", ""))
cmd.AddCommand(tools.NewGenDocCmd(cmd, os.Stdout))
cmd.AddCommand(tools.NewGenCmd(os.Stdout))
cmd.AddCommand(tools.NewVersionCommand(client, os.Stdout, nil))
cmd.AddCommand(tools.NewDiagCommand(client, os.Stdin, os.Stdout))

for _, c := range cmd.Commands() {
c.PreRunE = func(*cobra.Command, []string) error {
Expand Down
6 changes: 3 additions & 3 deletions deploy/data/virtlet-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ spec:
name: image-name-translations
- name: pods-log
mountPath: /var/log/pods
# needed for diagnostic purposes
- name: libvirt-log
mountPath: /var/log/libvirt
securityContext:
privileged: true
readinessProbe:
Expand Down Expand Up @@ -275,9 +278,6 @@ spec:
- hostPath:
path: /var/log/pods
name: pods-log
- hostPath:
path: /var/run/netns
name: netns-dir
- configMap:
name: virtlet-image-translations
name: image-name-translations
Expand Down
3 changes: 2 additions & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
This directory holds files containing documentation.
This directory contains the Virtlet documentation.

* For a basic example of VM pod definition, see [examples/cirros-vm.yaml](../examples/cirros-vm.yaml)
* [Cloud-init data generation](cloud-init-data-generation.md)
Expand All @@ -11,4 +11,5 @@ This directory holds files containing documentation.
* [Environment variables](environment-variables.md) support
* [Image Handling](images.md)
* [Image Name Translation](image-name-translation.md)
* [Diagnostics](diagnostics.md)
* [Update notes](update-notes.md)
97 changes: 97 additions & 0 deletions docs/diagnostics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Diagnostics

Virtlet provides a set of
[virtletctl diag](virtletctl/virtletctl_diag.md) commands that can
help with troubleshooting. The diagnostics can be invoked either
directly or by means of a
[Sonobuoy](https://github.com/heptio/sonobuoy) plugin.

## Direct invocation

The most basic diagnostics command is [virtletctl diag dump](virtletctl/virtletctl_diag_dump.md):
```
$ virtletctl diag out/
$ ls -lR out
total 0
drwxr-xr-x 3 user wheel 96 Jul 11 01:56 nodes
out/nodes:
total 0
drwxr-xr-x 12 user wheel 384 Jul 11 01:56 kube-node-1
out/nodes/kube-node-1:
total 5352
-rwxr-xr-x 1 user wheel 1276000 Jul 11 01:56 criproxy.log
-rwxr-xr-x 1 user wheel 1787 Jul 11 01:56 ip-a.txt
-rwxr-xr-x 1 user wheel 322 Jul 11 01:56 ip-r.txt
drwxr-xr-x 3 user wheel 96 Jul 11 01:56 libvirt-logs
drwxr-xr-x 5 user wheel 160 Jul 11 01:56 libvirt-xml
-rwxr-xr-x 1 user wheel 9964 Jul 11 01:56 metadata.txt
-rwxr-xr-x 1 user wheel 1443 Jul 11 01:56 netns.txt
-rwxr-xr-x 1 user wheel 9217 Jul 11 02:56 psaux.txt
-rwxr-xr-x 1 user wheel 18214 Jul 11 01:56 stack.log
-rwxr-xr-x 1 user wheel 64314 Jul 11 01:56 virtlet-pod-libvirt.log
-rwxr-xr-x 1 user wheel 1349763 Jul 11 01:56 virtlet-pod-virtlet.log
out/nodes/kube-node-1/libvirt-logs:
total 8
-rwxr-xr-x 1 user wheel 2172 Jul 11 01:56 virtlet-1b2261ca-7ed6-cirros-vm.log
out/nodes/kube-node-1/libvirt-xml:
total 24
-rwxr-xr-x 1 user wheel 3511 Jul 11 01:56 domain-virtlet-1b2261ca-7ed6-cirros-vm.xml
-rwxr-xr-x 1 user wheel 445 Jul 11 01:56 pool-volumes.xml
-rwxr-xr-x 1 user wheel 1041 Jul 11 01:56 volume-virtlet_root_1b2261ca-7ed6-58e7-58de-0eef2c9d5320.xml
```

The following files and directories are produced for each Kubernetes
node that runs Virtlet:
* `criproxy.log` - the logs of CRI Proxy's systemd unit
* `ip-a.txt` - the output of `ip a` on the node
* `ip-r.txt` - the output of `ip r` on the node
* `metadata.txt` - the contents of Virtlet's internal metadata db in a text form
* `netns.txt` - the output of `ip a` and `ip r` for each network
namespace that's managed by Virtlet
* `psaux.txt` - the output of `ps aux` command on the node
* `stack.log` - the dump of Go stack of Virtlet process
* `virtlet-pod-libvirt.log` - the log of Virtlet pod's libvirt container
* `virtlet-pod-virtlet.log` - the log of Virtlet pod's virtlet container
* `livirt-logs` - a directory with libvirt/QEMU logs for each domain
* `libvirt-xml` - the dumps of all the domains, storage pools and storage volumes in libvirt

It's also possible to dump Virtlet diagnostics as JSON to stdout using
`virtletctl diag dump --json`. The JSON file can be subsequently
unpacked into the aforementioned directory structure using
[virtletctl diag unpack](virtletctl/virtletctl_diag_unpack.md).

## Sonobuoy

Virtlet diagnostics can be run as a
[Sonobuoy](https://github.com/heptio/sonobuoy) plugin. Unfortunately,
right now Sonobuoy's plugin support is
[somewhat limited](https://github.com/heptio/sonobuoy/issues/405). Because
of that problem, Sonobuoy run must be done in two phases, first
generating YAML and then using `virtletctl` to patch it (inject
Virtlet sonobuoy plugin):
```
$ cat sonobuoy.json
{
"plugins": [ { "name": "virtlet" } ]
}
$ sonobuoy gen --config sonobuoy.json --e2e-focus nosuchtest |
virtletctl diag sonobuoy |
kubectl apply -f -
$ # wait till sonobuoy run is complete
$ sonobuoy status
PLUGIN STATUS COUNT
virtlet complete 1
Sonobuoy has completed. Use `sonobuoy retrieve` to get results.
$ sonobuoy retrieve
```

The diagnostics results are placed under `plugins/virtlet/results` and
can be unpacked using [virtletctl diag unpack](virtletctl/virtletctl_diag_unpack.md):
```
$ virtletctl diag unpack out/ <sonobuoy_output_dir/plugins/virtlet/results
```
4 changes: 2 additions & 2 deletions docs/virtletctl/virtletctl.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Kubernetes cluster.

### SEE ALSO

* [virtletctl dump-metadata](virtletctl_dump-metadata.md) - Dump Virtlet metadata db
* [virtletctl diag](virtletctl_diag.md) - Virtlet diagnostics
* [virtletctl gen](virtletctl_gen.md) - Generate Kubernetes YAML for Virtlet deployment
* [virtletctl gendoc](virtletctl_gendoc.md) - Generate Markdown documentation for the commands
* [virtletctl install](virtletctl_install.md) - Install virtletctl as a kubectl plugin
Expand All @@ -49,4 +49,4 @@ Kubernetes cluster.
* [virtletctl virsh](virtletctl_virsh.md) - Execute a virsh command
* [virtletctl vnc](virtletctl_vnc.md) - Provide access to the VNC console of a VM pod

###### Auto generated by spf13/cobra on 11-Jun-2018
###### Auto generated by spf13/cobra on 11-Jul-2018
51 changes: 51 additions & 0 deletions docs/virtletctl/virtletctl_diag.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
## virtletctl diag

Virtlet diagnostics

### Synopsis

Retrieve and unpack Virtlet diagnostics information

### Options

```
-h, --help help for diag
```

### Options inherited from parent commands

```
--alsologtostderr log to standard error as well as files
--as string Username to impersonate for the operation
--as-group stringArray Group to impersonate for the operation, this flag can be repeated to specify multiple groups.
--certificate-authority string Path to a cert file for the certificate authority
--client-certificate string Path to a client certificate file for TLS
--client-key string Path to a client key file for TLS
--cluster string The name of the kubeconfig cluster to use
--context string The name of the kubeconfig context to use
--insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure
--kubeconfig string Path to the kubeconfig file to use for CLI requests.
--log-backtrace-at traceLocation when logging hits line file:N, emit a stack trace (default :0)
--log-dir string If non-empty, write log files in this directory
--logtostderr log to standard error instead of files
-n, --namespace string If present, the namespace scope for this CLI request
--password string Password for basic authentication to the API server
--request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0")
-s, --server string The address and port of the Kubernetes API server
--stderrthreshold severity logs at or above this threshold go to stderr (default 2)
--token string Bearer token for authentication to the API server
--user string The name of the kubeconfig user to use
--username string Username for basic authentication to the API server
-v, --v Level log level for V logs
--virtlet-runtime string the name of virtlet runtime used in kubernetes.io/target-runtime annotation (default "virtlet.cloud")
--vmodule moduleSpec comma-separated list of pattern=N settings for file-filtered logging
```

### SEE ALSO

* [virtletctl](virtletctl.md) - Virtlet control tool
* [virtletctl diag dump](virtletctl_diag_dump.md) - Dump Virtlet diagnostics information
* [virtletctl diag sonobuoy](virtletctl_diag_sonobuoy.md) - Add Virtlet sonobuoy plugin to the sonobuoy output
* [virtletctl diag unpack](virtletctl_diag_unpack.md) - Unpack Virtlet diagnostics information

###### Auto generated by spf13/cobra on 11-Jul-2018
53 changes: 53 additions & 0 deletions docs/virtletctl/virtletctl_diag_dump.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
## virtletctl diag dump

Dump Virtlet diagnostics information

### Synopsis

Pull Virtlet diagnostics information from the nodes and dump it as a directory tree or JSON

```
virtletctl diag dump output_dir [flags]
```

### Options

```
-h, --help help for dump
--json Use JSON output
```

### Options inherited from parent commands

```
--alsologtostderr log to standard error as well as files
--as string Username to impersonate for the operation
--as-group stringArray Group to impersonate for the operation, this flag can be repeated to specify multiple groups.
--certificate-authority string Path to a cert file for the certificate authority
--client-certificate string Path to a client certificate file for TLS
--client-key string Path to a client key file for TLS
--cluster string The name of the kubeconfig cluster to use
--context string The name of the kubeconfig context to use
--insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure
--kubeconfig string Path to the kubeconfig file to use for CLI requests.
--log-backtrace-at traceLocation when logging hits line file:N, emit a stack trace (default :0)
--log-dir string If non-empty, write log files in this directory
--logtostderr log to standard error instead of files
-n, --namespace string If present, the namespace scope for this CLI request
--password string Password for basic authentication to the API server
--request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0")
-s, --server string The address and port of the Kubernetes API server
--stderrthreshold severity logs at or above this threshold go to stderr (default 2)
--token string Bearer token for authentication to the API server
--user string The name of the kubeconfig user to use
--username string Username for basic authentication to the API server
-v, --v Level log level for V logs
--virtlet-runtime string the name of virtlet runtime used in kubernetes.io/target-runtime annotation (default "virtlet.cloud")
--vmodule moduleSpec comma-separated list of pattern=N settings for file-filtered logging
```

### SEE ALSO

* [virtletctl diag](virtletctl_diag.md) - Virtlet diagnostics

###### Auto generated by spf13/cobra on 11-Jul-2018
Loading

0 comments on commit 7724a0a

Please sign in to comment.