forked from kata-containers/agent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
device.go
536 lines (439 loc) · 14.6 KB
/
device.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
//
// Copyright (c) 2018 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"context"
"fmt"
"io/ioutil"
"path"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
pb "github.com/kata-containers/agent/protocols/grpc"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"google.golang.org/grpc/codes"
grpcStatus "google.golang.org/grpc/status"
)
const (
driver9pType = "9p"
driverVirtioFSType = "virtio-fs"
driverBlkType = "blk"
driverBlkCCWType = "blk-ccw"
driverMmioBlkType = "mmioblk"
driverSCSIType = "scsi"
driverNvdimmType = "nvdimm"
driverEphemeralType = "ephemeral"
driverLocalType = "local"
vmRootfs = "/"
)
const (
pciBusMode = 0220
)
var (
pciBusRescanFile = sysfsDir + "/bus/pci/rescan"
systemDevPath = "/dev"
getSCSIDevPath = getSCSIDevPathImpl
getPmemDevPath = getPmemDevPathImpl
getPCIDeviceName = getPCIDeviceNameImpl
pciPathToSysfs = pciPathToSysfsImpl
scanSCSIBus = scanSCSIBusImpl
)
// CCW variables
var (
blkCCWSuffix = "virtio"
)
const maxDeviceIDValue = 3
// SCSI variables
var (
// Here in "0:0", the first number is the SCSI host number because
// only one SCSI controller has been plugged, while the second number
// is always 0.
scsiHostChannel = "0:0:"
sysClassPrefix = sysfsDir + "/class"
scsiBlockSuffix = "block"
scsiHostPath = filepath.Join(sysClassPrefix, "scsi_host")
)
// Stores a mapping of device names (in host / outer container naming)
// to the device and resources slots in a container spec
type devIndexEntry struct {
idx int
resourceIdx []int
}
type devIndex map[string]devIndexEntry
// Guest-side PCI path, identifies a PCI device by where it sits in
// the PCI topology.
//
// Has the format "xx/.../yy/zz" Here, zz is the slot of the device on
// its PCI bridge, yy is the slot of the bridge on its parent bridge
// and so forth until xx is the slot of the "most upstream" bridge on
// the root bus. If a device is connected directly to the root bus,
// its pciPath is just "zz"
type PciPath struct {
path string
}
type deviceHandler func(ctx context.Context, device pb.Device, spec *pb.Spec, s *sandbox, devIdx devIndex) error
var deviceHandlerList = map[string]deviceHandler{
driverMmioBlkType: virtioMmioBlkDeviceHandler,
driverBlkType: virtioBlkDeviceHandler,
driverBlkCCWType: virtioBlkCCWDeviceHandler,
driverSCSIType: virtioSCSIDeviceHandler,
driverNvdimmType: nvdimmDeviceHandler,
}
func rescanPciBus() error {
return ioutil.WriteFile(pciBusRescanFile, []byte{'1'}, pciBusMode)
}
// pciPathToSysfs fetches the sysfs path for a PCI path, relative to
// the syfs path for the PCI host bridge, based on the PCI path
// provided.
func pciPathToSysfsImpl(pciPath PciPath) (string, error) {
var relPath string
bus := "0000:00"
rootBusPath, err := createRootBusPath()
if err != nil {
return "", err
}
tokens := strings.Split(pciPath.path, "/")
for i, slot := range tokens {
// Full PCI address of this device along the path
bdf := fmt.Sprintf("%s:%s.0", bus, slot)
relPath = filepath.Join(relPath, bdf)
if i == len(tokens)-1 {
// Final device need not be a bridge
break
}
// Find out the bus exposed by bridge
bridgeBusPath := filepath.Join(sysfsDir, rootBusPath, relPath, "pci_bus")
files, err := ioutil.ReadDir(bridgeBusPath)
if err != nil {
return "", fmt.Errorf("Error reading %s : %s", bridgeBusPath, err)
}
if len(files) != 1 {
return "", fmt.Errorf("Expected exactly one PCI bus in %s, got %d instead", bridgeBusPath, len(files))
}
bus = files[0].Name()
}
return relPath, nil
}
func getDeviceName(s *sandbox, devID string) (string, error) {
var devName string
var notifyChan chan string
fieldLogger := agentLog.WithField("devID", devID)
// Check if the dev identifier is in PCI device map.
s.Lock()
for key, value := range s.sysToDevMap {
if strings.Contains(key, devID) {
devName = value
fieldLogger.Infof("Device: %s found in device map", devID)
break
}
}
// If device is not found in the device map, hotplug event has not
// been received yet, create and add channel to the watchers map.
// The key of the watchers map is the device we are interested in.
// Note this is done inside the lock, not to miss any events from the
// global udev listener.
if devName == "" {
notifyChan = make(chan string, 1)
s.deviceWatchers[devID] = notifyChan
}
s.Unlock()
if devName == "" {
fieldLogger.Infof("Waiting on channel for device: %s notification", devID)
select {
case devName = <-notifyChan:
case <-time.After(hotplugTimeout):
s.Lock()
delete(s.deviceWatchers, devID)
s.Unlock()
return "", grpcStatus.Errorf(codes.DeadlineExceeded,
"Timeout reached after %s waiting for device %s",
hotplugTimeout, devID)
}
}
return filepath.Join(systemDevPath, devName), nil
}
func getPCIDeviceNameImpl(s *sandbox, pciPath PciPath) (string, error) {
sysfsRelPath, err := pciPathToSysfs(pciPath)
if err != nil {
return "", err
}
fieldLogger := agentLog.WithField("sysfsRelPath", sysfsRelPath)
// Rescan pci bus if we need to wait for a new pci device
if err = rescanPciBus(); err != nil {
fieldLogger.WithError(err).Error("Failed to scan pci bus")
return "", err
}
return getDeviceName(s, sysfsRelPath)
}
// device.Id should be the predicted device name (vda, vdb, ...)
// device.VmPath already provides a way to send it in
func virtioMmioBlkDeviceHandler(_ context.Context, device pb.Device, spec *pb.Spec, s *sandbox, devIdx devIndex) error {
if device.VmPath == "" {
return fmt.Errorf("Invalid path for virtioMmioBlkDevice")
}
return updateSpecDeviceList(device, spec, devIdx)
}
func virtioBlkCCWDeviceHandler(ctx context.Context, device pb.Device, spec *pb.Spec, s *sandbox, devIdx devIndex) error {
devPath, err := getBlkCCWDevPath(s, device.Id)
if err != nil {
return err
}
if devPath == "" {
return grpcStatus.Errorf(codes.InvalidArgument,
"Storage source is empty")
}
device.VmPath = devPath
return updateSpecDeviceList(device, spec, devIdx)
}
// device.Id should be a PCI path (see type PciPath)
func virtioBlkDeviceHandler(_ context.Context, device pb.Device, spec *pb.Spec, s *sandbox, devIdx devIndex) error {
// When "Id" (PCI path) is not set, we allow to use the predicted "VmPath" passed from kata-runtime
if device.Id != "" {
devPath, err := getPCIDeviceName(s, PciPath{device.Id})
if err != nil {
return err
}
device.VmPath = devPath
}
return updateSpecDeviceList(device, spec, devIdx)
}
// device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
func virtioSCSIDeviceHandler(ctx context.Context, device pb.Device, spec *pb.Spec, s *sandbox, devIdx devIndex) error {
// Retrieve the device path from SCSI address.
devPath, err := getSCSIDevPath(s, device.Id)
if err != nil {
return err
}
device.VmPath = devPath
return updateSpecDeviceList(device, spec, devIdx)
}
func nvdimmDeviceHandler(_ context.Context, device pb.Device, spec *pb.Spec, s *sandbox, devIdx devIndex) error {
return updateSpecDeviceList(device, spec, devIdx)
}
// updateSpecDeviceList takes a device description provided by the caller,
// trying to find it on the guest. Once this device has been identified, the
// "real" information that can be read from inside the VM is used to update
// the same device in the list of devices provided through the OCI spec.
// This is needed to update information about minor/major numbers that cannot
// be predicted from the caller.
func updateSpecDeviceList(device pb.Device, spec *pb.Spec, devIdx devIndex) error {
// If no ContainerPath is provided, we won't be able to match and
// update the device in the OCI spec device list. This is an error.
if device.ContainerPath == "" {
return grpcStatus.Errorf(codes.Internal,
"ContainerPath cannot be empty")
}
if spec.Linux == nil || len(spec.Linux.Devices) == 0 {
return grpcStatus.Errorf(codes.Internal,
"No devices found from the spec, cannot update")
}
stat := syscall.Stat_t{}
if err := syscall.Stat(device.VmPath, &stat); err != nil {
return err
}
dev := stat.Rdev
major := int64(unix.Major(dev))
minor := int64(unix.Minor(dev))
agentLog.WithFields(logrus.Fields{
"device-path": device.VmPath,
"device-major": major,
"device-minor": minor,
}).Info("handling block device")
// Update the spec
idxData, ok := devIdx[device.ContainerPath]
if !ok {
return grpcStatus.Errorf(codes.Internal,
"Should have found a matching device %s in the spec",
device.ContainerPath)
}
agentLog.WithFields(logrus.Fields{
"device-path": device.VmPath,
"host-device-major": spec.Linux.Devices[idxData.idx].Major,
"host-device-minor": spec.Linux.Devices[idxData.idx].Minor,
"guest-device-major": major,
"guest-device-minor": minor,
}).Info("updating block device major/minor into the spec")
spec.Linux.Devices[idxData.idx].Major = major
spec.Linux.Devices[idxData.idx].Minor = minor
// Resources must be updated since they are used to identify the
// device in the devices cgroup.
for _, idxRsrc := range idxData.resourceIdx {
spec.Linux.Resources.Devices[idxRsrc].Major = major
spec.Linux.Resources.Devices[idxRsrc].Minor = minor
}
return nil
}
// scanSCSIBus scans SCSI bus for the given SCSI address(SCSI-Id and LUN)
func scanSCSIBusImpl(scsiAddr string) error {
files, err := ioutil.ReadDir(scsiHostPath)
if err != nil {
return err
}
tokens := strings.Split(scsiAddr, ":")
if len(tokens) != 2 {
return grpcStatus.Errorf(codes.Internal,
"Unexpected format for SCSI Address : %s, expect SCSIID:LUN",
scsiAddr)
}
// Scan scsi host passing in the channel, SCSI id and LUN. Channel
// is always 0 because we have only one SCSI controller.
scanData := []byte(fmt.Sprintf("0 %s %s", tokens[0], tokens[1]))
for _, file := range files {
host := file.Name()
scanPath := filepath.Join(scsiHostPath, host, "scan")
if err := ioutil.WriteFile(scanPath, scanData, 0200); err != nil {
return err
}
}
return nil
}
// getSCSIDevPathImpl scans SCSI bus looking for the provided SCSI address, then
// it waits for the SCSI disk to become available and returns the device path
// associated with the disk.
func getSCSIDevPathImpl(s *sandbox, scsiAddr string) (string, error) {
if err := scanSCSIBus(scsiAddr); err != nil {
return "", err
}
devPath := filepath.Join(scsiHostChannel+scsiAddr, scsiBlockSuffix)
return getDeviceName(s, devPath)
}
func getPmemDevPathImpl(s *sandbox, devPmemPath string) (string, error) {
// for example: /block/pmem1
devPath := filepath.Join("/", scsiBlockSuffix, filepath.Base(devPmemPath))
return getDeviceName(s, devPath)
}
// checkCCWBusFormat checks the format for the ccw bus. It needs to be in the form 0.<n>.<dddd>
// n is the subchannel set ID - integer from 0 up to 3
// dddd is the device id - integer in hex up to 0xffff
// See https://www.ibm.com/support/knowledgecenter/en/linuxonibm/com.ibm.linux.z.ldva/ldva_r_XML_Address.html
func checkCCWBusFormat(bus string) error {
busFormat := strings.Split(bus, ".")
if len(busFormat) != 3 {
return fmt.Errorf("Wrong bus format. It needs to be in the form 0.<n>.<dddd>, got %s", bus)
}
bus0, err := strconv.ParseInt(busFormat[0], 10, 32)
if err != nil {
return err
}
if bus0 != 0 {
return fmt.Errorf("Wrong bus format. First digit needs to be 0 instead is %d", bus0)
}
bus1, err := strconv.ParseInt(busFormat[1], 10, 32)
if err != nil {
return err
}
if bus1 > maxDeviceIDValue {
return fmt.Errorf("Wrong bus format. Second digit must be lower than %d instead is %d", maxDeviceIDValue, bus1)
}
if len(busFormat[2]) != 4 {
return fmt.Errorf("Wrong bus format. Third digit must be in the form <dddd>, got %s", bus)
}
busFormat[2] = "0x" + busFormat[2]
_, err = strconv.ParseInt(busFormat[2], 0, 32)
if err != nil {
return err
}
return nil
}
// getBlkCCWDevPath returns the CCW block path based on the bus ID
func getBlkCCWDevPath(s *sandbox, bus string) (string, error) {
if err := checkCCWBusFormat(bus); err != nil {
return "", err
}
return getDeviceName(s, path.Join(bus, blkCCWSuffix))
}
func addDevices(ctx context.Context, devices []*pb.Device, spec *pb.Spec, s *sandbox) error {
devIdx := makeDevIndex(spec)
for _, device := range devices {
if device == nil {
continue
}
err := addDevice(ctx, device, spec, s, devIdx)
if err != nil {
return err
}
}
return nil
}
func makeDevIndex(spec *pb.Spec) devIndex {
devIdx := make(devIndex)
if spec == nil || spec.Linux == nil || spec.Linux.Devices == nil {
return devIdx
}
for i, d := range spec.Linux.Devices {
rIdx := make([]int, 0)
if spec.Linux.Resources != nil && spec.Linux.Resources.Devices != nil {
for j, r := range spec.Linux.Resources.Devices {
if r.Type == d.Type && r.Major == d.Major && r.Minor == d.Minor {
rIdx = append(rIdx, j)
}
}
}
devIdx[d.Path] = devIndexEntry{
idx: i,
resourceIdx: rIdx,
}
}
return devIdx
}
func addDevice(ctx context.Context, device *pb.Device, spec *pb.Spec, s *sandbox, devIdx devIndex) error {
if device == nil {
return grpcStatus.Error(codes.InvalidArgument, "invalid device")
}
if spec == nil {
return grpcStatus.Error(codes.InvalidArgument, "invalid spec")
}
// log before validation to help with debugging gRPC protocol
// version differences.
agentLog.WithFields(logrus.Fields{
"device-id": device.Id,
"device-type": device.Type,
"device-vm-path": device.VmPath,
"device-container-path": device.ContainerPath,
"device-options": device.Options,
}).Debug()
if device.Type == "" {
return grpcStatus.Errorf(codes.InvalidArgument,
"invalid type for device %v", device)
}
if device.Id == "" && device.VmPath == "" {
return grpcStatus.Errorf(codes.InvalidArgument,
"invalid ID and VM path for device %v", device)
}
if device.ContainerPath == "" {
return grpcStatus.Errorf(codes.InvalidArgument,
"invalid container path for device %v", device)
}
devHandler, ok := deviceHandlerList[device.Type]
if !ok {
return grpcStatus.Errorf(codes.InvalidArgument,
"Unknown device type %q", device.Type)
}
return devHandler(ctx, *device, spec, s, devIdx)
}
// updateDeviceCgroupForGuestRootfs updates the device cgroup for container
// to not allow access to the nvdim root partition. This prevents the container
// from being able to access the VM rootfs.
func updateDeviceCgroupForGuestRootfs(spec *pb.Spec) {
var devStat unix.Stat_t
err := unix.Stat(vmRootfs, &devStat)
if err != nil {
return
}
devMajor := int64(unix.Major(devStat.Dev))
devMinor := int64(unix.Minor(devStat.Dev))
nvdimmCg := pb.LinuxDeviceCgroup{
Allow: false,
Major: devMajor,
Minor: devMinor,
Type: "b",
Access: "rw",
}
spec.Linux.Resources.Devices = append(spec.Linux.Resources.Devices, nvdimmCg)
}