From 061d5a8f5cbd479023e9834408dcd7f38c3e8831 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Thu, 6 Jun 2024 17:19:24 +0200 Subject: [PATCH 01/16] add a new dependency for github.com/NVIDIA/nvidia-container-toolkit The new `github.com/NVIDIA/nvidia-container-toolkit` is required because we need to use the `nvcdi` package in order to be able to generate an NVIDIA implementation of a CDI specification. All the devices / mounts discoverability logic is encapsulated in this library. Signed-off-by: Gabriel Mougard --- go.mod | 8 ++++++++ go.sum | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/go.mod b/go.mod index d301a9662158..cee807efb2d4 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/canonical/lxd go 1.22.5 require ( + github.com/NVIDIA/nvidia-container-toolkit v1.16.1 github.com/Rican7/retry v0.3.1 github.com/armon/go-proxyproto v0.1.0 github.com/canonical/go-dqlite v1.22.0 @@ -64,9 +65,13 @@ require ( gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637 gopkg.in/yaml.v2 v2.4.0 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 + tags.cncf.io/container-device-interface v0.8.0 + tags.cncf.io/container-device-interface/specs-go v0.8.0 ) require ( + github.com/NVIDIA/go-nvlib v0.6.0 // indirect + github.com/NVIDIA/go-nvml v0.12.4-0 // indirect github.com/antlr4-go/antlr/v4 v4.13.1 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bmatcuk/doublestar/v4 v4.6.1 // indirect @@ -117,6 +122,8 @@ require ( github.com/muhlemmer/httpforwarded v0.1.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/natefinch/wrap v0.2.0 // indirect + github.com/opencontainers/runtime-spec v1.2.0 // indirect + github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect @@ -158,4 +165,5 @@ require ( gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index 70d8f4673b07..09d2363d8e12 100644 --- a/go.sum +++ b/go.sum @@ -47,6 +47,12 @@ github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8 github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= +github.com/NVIDIA/go-nvlib v0.6.0 h1:zAMBzCYT9xeyRQo0tb7HJbStkzajD6e5joyaQqJ2OGU= +github.com/NVIDIA/go-nvlib v0.6.0/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY= +github.com/NVIDIA/go-nvml v0.12.4-0 h1:4tkbB3pT1O77JGr0gQ6uD8FrsUPqP1A/EOEm2wI1TUg= +github.com/NVIDIA/go-nvml v0.12.4-0/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ= +github.com/NVIDIA/nvidia-container-toolkit v1.16.1 h1:PkY6RqYD1wIt1izCvYZ7kr7IitxK8e9+k/prO6b3vD0= +github.com/NVIDIA/nvidia-container-toolkit v1.16.1/go.mod h1:jJXYvHEdqqpDcRXvolaiFCBsgLxvCwmJWSBZM3zQPY8= github.com/Rican7/retry v0.3.0/go.mod h1:CxSDrhAyXmTMeEuRAnArMu1FHu48vtfjLREWqVl7Vw0= github.com/Rican7/retry v0.3.1 h1:scY4IbO8swckzoA/11HgBwaZRJEyY9vaNJshcdhp1Mc= github.com/Rican7/retry v0.3.1/go.mod h1:CxSDrhAyXmTMeEuRAnArMu1FHu48vtfjLREWqVl7Vw0= @@ -63,6 +69,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I= github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/canonical/go-dqlite v1.22.0 h1:DuJmfcREl4gkQJyvZzjl2GHFZROhbPyfdjDRQXpkOyw= @@ -222,6 +230,7 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -247,6 +256,7 @@ github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm4 github.com/google/renameio v1.0.1 h1:Lh/jXZmvZxb0BBeSY5VKEfidcbcbenKjZFzM/q0fSeU= github.com/google/renameio v1.0.1/go.mod h1:t/HQoYBZSsWSNK35C6CO/TpPLDVWvxOHboWUAweKUpk= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= @@ -492,11 +502,13 @@ github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/muhlemmer/gu v0.3.1 h1:7EAqmFrW7n3hETvuAdmFmn4hS8W+z3LgKtrnow+YzNM= github.com/muhlemmer/gu v0.3.1/go.mod h1:YHtHR+gxM+bKEIIs7Hmi9sPT3ZDUvTN/i88wQpZkrdM= github.com/muhlemmer/httpforwarded v0.1.0 h1:x4DLrzXdliq8mprgUMR0olDvHGkou5BJsK/vWUetyzY= @@ -515,6 +527,14 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= +github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= +github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0= +github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI= +github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= +github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= github.com/openfga/api/proto v0.0.0-20240807201305-c96ec773cae9 h1:Y0fIAHrYECcf5lpa/o1AbH21bS7rsco/FoH4A4NGlZE= github.com/openfga/api/proto v0.0.0-20240807201305-c96ec773cae9/go.mod h1:gil5LBD8tSdFQbUkCQdnXsoeU9kDJdJgbGdHkgJfcd0= github.com/openfga/language/pkg/go v0.2.0-beta.0 h1:dTvgDkQImfNnH1iDvxnUIbz4INvKr4kS46dI12oAEzM= @@ -579,6 +599,7 @@ github.com/sethvargo/go-retry v0.2.4 h1:T+jHEQy/zKJf5s95UkguisicE0zuF9y7+/vgz08O github.com/sethvargo/go-retry v0.2.4/go.mod h1:1afjQuvh7s4gflMObvjLPaWgluLLyhA1wmVZ6KLpICw= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= @@ -625,12 +646,20 @@ github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8 github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= +github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk= github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs= github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo= +github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= +github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -827,6 +856,8 @@ golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1110,3 +1141,9 @@ launchpad.net/xmlpath v0.0.0-20130614043138-000000000004/go.mod h1:vqyExLOM3qBx7 rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +tags.cncf.io/container-device-interface v0.8.0 h1:8bCFo/g9WODjWx3m6EYl3GfUG31eKJbaggyBDxEldRc= +tags.cncf.io/container-device-interface v0.8.0/go.mod h1:Apb7N4VdILW0EVdEMRYXIDVRZfNJZ+kmEUss2kRRQ6Y= +tags.cncf.io/container-device-interface/specs-go v0.8.0 h1:QYGFzGxvYK/ZLMrjhvY0RjpUavIn4KcmRmVP/JjdBTA= +tags.cncf.io/container-device-interface/specs-go v0.8.0/go.mod h1:BhJIkjjPh4qpys+qm4DAYtUyryaTDg9zris+AczXyws= From 1b1c20d60efad7aee47b6ffa844f1c7f7aab4869 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Thu, 6 Jun 2024 17:20:42 +0200 Subject: [PATCH 02/16] lxd/device/cdi: Introduce base CDI types and `ToCDI` Signed-off-by: Gabriel Mougard --- lxd/device/cdi/id.go | 104 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 lxd/device/cdi/id.go diff --git a/lxd/device/cdi/id.go b/lxd/device/cdi/id.go new file mode 100644 index 000000000000..7c956cdb9fee --- /dev/null +++ b/lxd/device/cdi/id.go @@ -0,0 +1,104 @@ +package cdi + +import ( + "fmt" + + "tags.cncf.io/container-device-interface/pkg/parser" +) + +// Vendor represents the compatible CDI vendor. +type Vendor string + +const ( + // NVIDIA represents the Nvidia CDI vendor. + NVIDIA Vendor = "nvidia.com" +) + +// ToVendor converts a string to a CDI vendor. +func ToVendor(vendor string) (Vendor, error) { + switch vendor { + case string(NVIDIA): + return NVIDIA, nil + default: + return "", fmt.Errorf("Invalid CDI vendor (%q)", vendor) + } +} + +// Class represents the compatible CDI class. +type Class string + +const ( + // GPU is a single discrete GPU. + GPU Class = "gpu" + // IGPU is an integrated GPU. + IGPU Class = "igpu" + // MIG is a single MIG compatible GPU. + MIG Class = "mig" +) + +// ToClass converts a string to a CDI class. +func ToClass(c string) (Class, error) { + switch c { + case string(GPU): + return GPU, nil + case string(IGPU): + return IGPU, nil + case string(MIG): + return MIG, nil + default: + return "", fmt.Errorf("Invalid CDI class (%q)", c) + } +} + +// ID represents a Container Device Interface (CDI) identifier. +// +// +------------+-------+------------------------------------------+ +// | Vendor | Class | Name | +// +---------------------------------------------------------------+ +// | nvidia.com | gpu | [dev_idx], [dev_uuid] or `all` | +// | | mig | [dev_idx]:[mig_idx], [dev_uuid] or `all` | +// | | igpu | [dev_idx], [dev_uuid] or `all` | +// +------------+-------+------------------------------------------+ +// +// Examples: +// - nvidia.com/gpu=0 +// - nvidia.com/gpu=d1f1c76e-7a72-487e-b121-e6d2e5555dc8 +// - nvidia.com/gpu=all +// - nvidia.com/mig=0:1 +// - nvidia.com/igpu=0 +type ID struct { + Vendor Vendor + Class Class + Name string +} + +// String returns the string representation of the ID. +func (id ID) String() string { + return fmt.Sprintf("%s/%s=%s", id.Vendor, id.Class, id.Name) +} + +// Empty returns true if the ID is empty. +func (id ID) Empty() bool { + return id.Vendor == "" && id.Class == "" && id.Name == "" +} + +// ToCDI converts a string identifier to a CDI ID. +func ToCDI(id string) (ID, error) { + vendor, class, name, err := parser.ParseQualifiedName(id) + if err != nil { + // The ID is not a valid CDI qualified name but it could be a valid DRM device ID. + return ID{}, nil + } + + vendorType, err := ToVendor(vendor) + if err != nil { + return ID{}, err + } + + classType, err := ToClass(class) + if err != nil { + return ID{}, err + } + + return ID{Vendor: vendorType, Class: classType, Name: name}, nil +} From 8548e7cfb6d92cce1864f87273c1a2a68dd6abd7 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Fri, 26 Jul 2024 14:09:47 +0200 Subject: [PATCH 03/16] lxd/device/cdi: Add unit tests for the CDI identifier parser Signed-off-by: Gabriel Mougard --- lxd/device/cdi/id_test.go | 117 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 lxd/device/cdi/id_test.go diff --git a/lxd/device/cdi/id_test.go b/lxd/device/cdi/id_test.go new file mode 100644 index 000000000000..658ff04dc156 --- /dev/null +++ b/lxd/device/cdi/id_test.go @@ -0,0 +1,117 @@ +package cdi + +import ( + "reflect" + "testing" +) + +func TestToVendor(t *testing.T) { + tests := []struct { + name string + input string + want Vendor + wantErr bool + }{ + {"Valid Nvidia", "nvidia.com", NVIDIA, false}, + {"Invalid vendor", "amd.com", "", true}, + {"Empty string", "", "", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ToVendor(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("ToVendor() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if got != tt.want { + t.Errorf("ToVendor() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestToClass(t *testing.T) { + tests := []struct { + name string + input string + want Class + wantErr bool + }{ + {"Valid GPU", "gpu", GPU, false}, + {"Valid IGPU", "igpu", IGPU, false}, + {"Valid MIG", "mig", MIG, false}, + {"Invalid class", "cpu", "", true}, + {"Empty string", "", "", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ToClass(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("ToClass() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if got != tt.want { + t.Errorf("ToClass() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDEmpty(t *testing.T) { + tests := []struct { + name string + id ID + want bool + }{ + {"Empty ID", ID{}, true}, + {"Non-empty ID", ID{Vendor: NVIDIA, Class: GPU, Name: "0"}, false}, + {"Partial ID", ID{Vendor: NVIDIA}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.id.Empty() + if got != tt.want { + t.Errorf("ID.Empty() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestToCDI(t *testing.T) { + tests := []struct { + name string + input string + want ID + wantErr bool + }{ + {"Valid GPU", "nvidia.com/gpu=0", ID{Vendor: NVIDIA, Class: GPU, Name: "0"}, false}, + {"Valid GPU all", "nvidia.com/gpu=all", ID{Vendor: NVIDIA, Class: GPU, Name: "all"}, false}, + {"Valid MIG", "nvidia.com/mig=0:1", ID{Vendor: NVIDIA, Class: MIG, Name: "0:1"}, false}, + {"Valid IGPU", "nvidia.com/igpu=0", ID{Vendor: NVIDIA, Class: IGPU, Name: "0"}, false}, + {"Valid GPU with UUID", "nvidia.com/gpu=GPU-8da9a1ee-3495-a369-a73a-b9d8ffbc1220", ID{Vendor: NVIDIA, Class: GPU, Name: "GPU-8da9a1ee-3495-a369-a73a-b9d8ffbc1220"}, false}, + {"Valid MIG with UUID", "nvidia.com/mig=MIG-8da9a1ee-3495-a369-a73a-b9d8ffbc1220", ID{Vendor: NVIDIA, Class: MIG, Name: "MIG-8da9a1ee-3495-a369-a73a-b9d8ffbc1220"}, false}, + {"Invalid vendor", "amd.com/gpu=0", ID{}, true}, + {"Invalid class", "nvidia.com/cpu=0", ID{}, true}, + {"Valid MIG format (all MIG indexes in device)", "nvidia.com/mig=0", ID{Vendor: NVIDIA, Class: MIG, Name: "0"}, false}, + {"Non-CDI format", "not-a-cdi-format", ID{}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ToCDI(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("ToCDI() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ToCDI() = %v, want %v", got, tt.want) + } + }) + } +} From 59fdc8fec8422a3c9c007886000ede2d351c1028 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Thu, 20 Jun 2024 13:54:51 +0200 Subject: [PATCH 04/16] lxd/device/cdi: Introduce a `CDILogger` type In order to log the internal discovery operations of the CDI library, we created a `CDILogger` type reusing the existing LXD's shared logger , but with slightly modified method prototypes to comply with the CDI logger interface. Signed-off-by: Gabriel Mougard --- lxd/device/cdi/log.go | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 lxd/device/cdi/log.go diff --git a/lxd/device/cdi/log.go b/lxd/device/cdi/log.go new file mode 100644 index 000000000000..1a2f58697c97 --- /dev/null +++ b/lxd/device/cdi/log.go @@ -0,0 +1,52 @@ +package cdi + +import ( + "fmt" + + "github.com/canonical/lxd/shared/logger" +) + +// CDILogger reuses LXD's shared logger to log the internal operations of the CDI spec generator. +type CDILogger struct { + lxdLogger logger.Logger +} + +// NewCDILogger creates a new CDI logger from a LXD logger instance. +func NewCDILogger(l logger.Logger) *CDILogger { + return &CDILogger{lxdLogger: l} +} + +// Info logs a message (with optional context) at the INFO log level. +func (l *CDILogger) Info(args ...any) { + l.lxdLogger.Info(fmt.Sprint(args...)) +} + +// Infof logs at the INFO log level using a standard printf format string. +func (l *CDILogger) Infof(format string, args ...any) { + l.lxdLogger.Info(fmt.Sprintf(format, args...)) +} + +// Warning logs a message (with optional context) at the WARNING log level. +func (l *CDILogger) Warning(args ...any) { + l.lxdLogger.Warn(fmt.Sprint(args...)) +} + +// Warningf logs at the WARNING log level using a standard printf format string. +func (l *CDILogger) Warningf(format string, args ...any) { + l.lxdLogger.Warn(fmt.Sprintf(format, args...)) +} + +// Errorf logs at the ERROR log level using a standard printf format string. +func (l *CDILogger) Errorf(format string, args ...any) { + l.lxdLogger.Error(fmt.Sprintf(format, args...)) +} + +// Debugf logs at the DEBUG log level using a standard printf format string. +func (l *CDILogger) Debugf(format string, args ...any) { + l.lxdLogger.Debug(fmt.Sprintf(format, args...)) +} + +// Tracef logs at the TRACE log level using a standard printf format string. +func (l *CDILogger) Tracef(format string, args ...any) { + l.lxdLogger.Trace(fmt.Sprintf(format, args...)) +} From ba948c579d2a0ef7f98620852805c8191602cb61 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Thu, 6 Jun 2024 17:41:18 +0200 Subject: [PATCH 05/16] lxd/device/cdi: Add the NVIDIA CDI spec generator Signed-off-by: Gabriel Mougard --- lxd/device/cdi/spec.go | 117 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 lxd/device/cdi/spec.go diff --git a/lxd/device/cdi/spec.go b/lxd/device/cdi/spec.go new file mode 100644 index 000000000000..539ffb1f5bc1 --- /dev/null +++ b/lxd/device/cdi/spec.go @@ -0,0 +1,117 @@ +package cdi + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" + "tags.cncf.io/container-device-interface/specs-go" + + "github.com/canonical/lxd/lxd/instance" + "github.com/canonical/lxd/shared" + "github.com/canonical/lxd/shared/logger" +) + +const ( + // defaultNvidiaTegraMountSpecPath is default location of CSV files that define the modifications required to the OCI spec. + defaultNvidiaTegraMountSpecPath = "/etc/nvidia-container-runtime/host-files-for-container.d" +) + +// defaultNvidiaTegraCSVFiles returns the default CSV files for the Nvidia Tegra platform. +func defaultNvidiaTegraCSVFiles(rootPath string) []string { + files := []string{ + "devices.csv", + "drivers.csv", + "l4t.csv", + } + + paths := make([]string, 0, len(files)) + for _, file := range files { + paths = append(paths, filepath.Join(rootPath, defaultNvidiaTegraMountSpecPath, file)) + } + + return paths +} + +// generateNvidiaSpec generates a CDI spec for an Nvidia vendor. +func generateNvidiaSpec(cdiID ID, inst instance.Instance) (*specs.Spec, error) { + l := logger.AddContext(logger.Ctx{"instanceName": inst.Name(), "projectName": inst.Project().Name, "cdiID": cdiID.String()}) + mode := nvcdi.ModeAuto + if cdiID.Class == IGPU { + mode = nvcdi.ModeCSV + } + + indexDeviceNamer, err := nvcdi.NewDeviceNamer(nvcdi.DeviceNameStrategyIndex) + if err != nil { + return nil, fmt.Errorf("Failed to create device namer with index strategy: %w", err) + } + + uuidDeviceNamer, err := nvcdi.NewDeviceNamer(nvcdi.DeviceNameStrategyUUID) + if err != nil { + return nil, fmt.Errorf("Failed to create device namer with uuid strategy: %w", err) + } + + nvidiaCTKPath, err := exec.LookPath("nvidia-ctk") + if err != nil { + return nil, fmt.Errorf("Failed to find the nvidia-ctk binary: %w", err) + } + + rootPath := "" + if shared.InSnap() { + rootPath = "/var/lib/snapd/hostfs" + } + + cdilib, err := nvcdi.New( + nvcdi.WithDeviceNamers(indexDeviceNamer, uuidDeviceNamer), + nvcdi.WithLogger(NewCDILogger(l)), + nvcdi.WithDriverRoot(rootPath), + nvcdi.WithDevRoot(rootPath), + nvcdi.WithNVIDIACDIHookPath(nvidiaCTKPath), + nvcdi.WithMode(mode), + nvcdi.WithCSVFiles(defaultNvidiaTegraCSVFiles(rootPath)), + ) + if err != nil { + return nil, fmt.Errorf("Failed to create CDI library: %w", err) + } + + specIface, err := cdilib.GetSpec() + if err != nil { + return nil, fmt.Errorf("Failed to get CDI spec interface: %w", err) + } + + spec := specIface.Raw() + if spec == nil { + return nil, fmt.Errorf("CDI spec is nil") + } + + // The spec definition can be quite large so we log it to a file. + specPath := filepath.Join(inst.LogPath(), fmt.Sprintf("nvidia_cdi_spec.%s.log", strings.ReplaceAll(cdiID.String(), "/", "_"))) + specFile, err := os.Create(specPath) + if err != nil { + l.Warn("Failed to create a log file to hold a CDI spec", logger.Ctx{"specPath": specPath, "error": err}) + return spec, nil + } + + defer specFile.Close() + + _, err = specFile.WriteString(logger.Pretty(spec)) + if err != nil { + return nil, fmt.Errorf("Failed to write spec to %q: %v", specPath, err) + } + + l.Debug("CDI spec has been successfully generated", logger.Ctx{"specPath": specPath}) + return spec, nil +} + +// generateSpec generates a CDI spec for the given CDI ID. +func generateSpec(cdiID ID, inst instance.Instance) (*specs.Spec, error) { + switch cdiID.Vendor { + case NVIDIA: + return generateNvidiaSpec(cdiID, inst) + default: + return nil, fmt.Errorf("Unsupported CDI vendor (%q) for the spec generation", cdiID.Vendor) + } +} From 58c7a88fc807d759f3686fe94c1644914a4f0bee Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Thu, 6 Jun 2024 17:42:34 +0200 Subject: [PATCH 06/16] lxd/device/cdi: Add base CDI Hook types Signed-off-by: Gabriel Mougard --- lxd/device/cdi/hooks.go | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 lxd/device/cdi/hooks.go diff --git a/lxd/device/cdi/hooks.go b/lxd/device/cdi/hooks.go new file mode 100644 index 000000000000..bfd0e33cf75a --- /dev/null +++ b/lxd/device/cdi/hooks.go @@ -0,0 +1,44 @@ +package cdi + +const ( + // CDIHookDefinitionKey is used to reference a CDI hook definition in a run config as a file path. + // A CDI hook definition is a simple way to represent the symlinks to be created and the folder entries to add to the ld cache. + // This resource file is to be read and processed by LXD's `callhook` program. + CDIHookDefinitionKey = "cdiHookDefinitionKey" + // CDIHooksFileSuffix is the suffix for the file that contains the CDI hooks. + CDIHooksFileSuffix = "_cdi_hooks.json" + // CDIConfigDevicesFileSuffix is the suffix for the file that contains the CDI config devices. + CDIConfigDevicesFileSuffix = "_cdi_config_devices.json" + // CDIUnixPrefix is the prefix used for creating unix char devices + // (e.g. cdi.unix..). + CDIUnixPrefix = "cdi.unix" + // CDIDiskPrefix is the prefix used for creating bind mounts (or 'disk' devices) + // representing user space files required for a CDI passthrough + // (e.g. cdi.disk..). + CDIDiskPrefix = "cdi.disk" +) + +// SymlinkEntry represents a symlink entry. +type SymlinkEntry struct { + Target string `json:"target" yaml:"target"` + Link string `json:"link" yaml:"link"` +} + +// Hooks represents all the hook instructions that can be executed by +// `lxd-cdi-hook`. +type Hooks struct { + // ContainerRootFS is the path to the container's root filesystem. + ContainerRootFS string `json:"container_rootfs" yaml:"container_rootfs"` + // LdCacheUpdates is a list of entries to update the ld cache. + LDCacheUpdates []string `json:"ld_cache_updates" yaml:"ld_cache_updates"` + // SymLinks is a list of entries to create a symlink. + Symlinks []SymlinkEntry `json:"symlinks" yaml:"symlinks"` +} + +// ConfigDevices represents devices and mounts that need to be configured from a CDI specification. +type ConfigDevices struct { + // UnixCharDevs is a slice of unix-char device configuration. + UnixCharDevs []map[string]string `json:"unix_char_devs" yaml:"unix_char_devs"` + // BindMounts is a slice of mount configuration. + BindMounts []map[string]string `json:"bind_mounts" yaml:"bind_mounts"` +} From da314e64d5df91c64806cd4a496d76a8fda2ed09 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Thu, 6 Jun 2024 17:44:49 +0200 Subject: [PATCH 07/16] lxd/device/gpu: Augment the capabilities of the `id` GPU option to support CDI naming for `phyical` gputype Signed-off-by: Gabriel Mougard --- lxd/device/gpu.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lxd/device/gpu.go b/lxd/device/gpu.go index 96ffdd7b652e..5ce602372eae 100644 --- a/lxd/device/gpu.go +++ b/lxd/device/gpu.go @@ -35,7 +35,19 @@ func gpuValidationRules(requiredFields []string, optionalFields []string) map[st // type: string // shortdesc: Product ID of the parent GPU device "productid": validate.Optional(validate.IsDeviceID), - // lxdmeta:generate(entities=device-gpu-{physical+mdev+mig}; group=device-conf; key=id) + // lxdmeta:generate(entities=device-gpu-physical; group=device-conf; key=id) + // The ID can either be the DRM card ID of the GPU device (container or VM) or a fully-qualified Container Device Interface (CDI) name (container only). + // Here are some examples of fully-qualified CDI names: + // + // - `nvidia.com/gpu=0`: Instructs LXD to operate a discrete GPU (dGPU) pass-through of brand NVIDIA with the first discovered GPU on your system. You can use the `nvidia-smi` tool on your host to find out which identifier to use. + // - `nvidia.com/gpu=1833c8b5-9aa0-5382-b784-68b7e77eb185`: Instructs LXD to operate a discrete GPU (dGPU) pass-through of brand NVIDIA with a given GPU unique identifier. This identifier should also appear with `nvidia-smi -L`. + // - `nvidia.com/igpu=all`: Instructs LXD to pass all the host integrated GPUs (iGPU) of brand NVIDIA. The concept of an index does not currently map to iGPUs. It is possible to list them with the `nvidia-smi -L` command. A special `nvgpu` mention should appear in the generated list to indicate a device to be an iGPU. + // - `nvidia.com/gpu=all`: Instructs LXD to pass all the host GPUs of brand NVIDIA through to the container. + // --- + // type: string + // shortdesc: ID of the GPU device + + // lxdmeta:generate(entities=device-gpu-{mdev+mig}; group=device-conf; key=id) // // --- // type: string From 41b88404d3e51719d4ebb98778ab2746739dc354 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Mon, 29 Jul 2024 19:59:50 +0200 Subject: [PATCH 08/16] lxd/device/cdi: Add CDI translation logic (OCI spec -> unix device + mounts) logic Signed-off-by: Gabriel Mougard --- lxd/device/cdi/configure.go | 339 ++++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 lxd/device/cdi/configure.go diff --git a/lxd/device/cdi/configure.go b/lxd/device/cdi/configure.go new file mode 100644 index 000000000000..8308f630e350 --- /dev/null +++ b/lxd/device/cdi/configure.go @@ -0,0 +1,339 @@ +package cdi + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "golang.org/x/sys/unix" + "tags.cncf.io/container-device-interface/specs-go" + + "github.com/canonical/lxd/lxd/instance" + "github.com/canonical/lxd/shared" + "github.com/canonical/lxd/shared/logger" +) + +// specDevToNativeDev builds a list of unix-char devices to be created from a CDI spec. +func specDevToNativeDev(configDevices *ConfigDevices, d specs.DeviceNode) error { + if d.Path == "" { + return fmt.Errorf("Device path is empty in the CDI device node: %v", d) + } + + hostPath := d.HostPath + if hostPath == "" { + hostPath = d.Path // When the hostPath is empty, the path is the device path in the container. + } + + if d.Major == 0 || d.Minor == 0 { + stat := unix.Stat_t{} + err := unix.Stat(hostPath, &stat) + if err != nil { + return err + } + + d.Major = int64(unix.Major(uint64(stat.Rdev))) + d.Minor = int64(unix.Minor(uint64(stat.Rdev))) + } + + configDevices.UnixCharDevs = append(configDevices.UnixCharDevs, map[string]string{"type": "unix-char", "source": hostPath, "path": d.Path, "major": fmt.Sprintf("%d", d.Major), "minor": fmt.Sprintf("%d", d.Minor)}) + return nil +} + +// specMountToNativeDev builds a list of disk mounts to be created from a CDI spec. +func specMountToNativeDev(configDevices *ConfigDevices, cdiID ID, mounts []*specs.Mount) ([]SymlinkEntry, error) { + if len(mounts) == 0 { + return nil, fmt.Errorf("CDI mounts are empty") + } + + indirectSymlinks := make([]SymlinkEntry, 0) + var chosenOpts []string + + rootPath := "" + if shared.InSnap() { + rootPath = "/var/lib/snapd/hostfs" + } + + for _, mount := range mounts { + if mount.HostPath == "" || mount.ContainerPath == "" { + return nil, fmt.Errorf("The hostPath or containerPath is empty in the CDI mount: %v", *mount) + } + + chosenOpts = []string{} + for _, opt := range mount.Options { + if !shared.ValueInSlice(opt, chosenOpts) { + chosenOpts = append(chosenOpts, opt) + } + } + + chosenOptsStr := strings.Join(chosenOpts, ",") + + // mount.HostPath can be a symbolic link, so we need to evaluate it + evaluatedHostPath, err := filepath.EvalSymlinks(mount.HostPath) + if err != nil { + return nil, err + } + + if evaluatedHostPath != mount.HostPath && mount.ContainerPath == strings.TrimPrefix(mount.HostPath, rootPath) { + indirectSymlinks = append(indirectSymlinks, SymlinkEntry{Target: strings.TrimPrefix(evaluatedHostPath, rootPath), Link: mount.ContainerPath}) + mount.ContainerPath = strings.TrimPrefix(evaluatedHostPath, rootPath) + } + + configDevices.BindMounts = append( + configDevices.BindMounts, + map[string]string{ + "type": "disk", + "source": evaluatedHostPath, + "path": mount.ContainerPath, + "raw.mount.options": chosenOptsStr, + }, + ) + } + + // If the user desires to run a nested docker container inside a LXD container, + // the Tegra CSV files also need to be mounted so that the NVIDIA docker runtime + // can be auto-enabled as 'csv' mode. + if cdiID.Vendor == NVIDIA && cdiID.Class == IGPU { + tegraCSVFilesCandidates := defaultNvidiaTegraCSVFiles(rootPath) + tegraCSVFiles := make([]string, 0) + for _, candidate := range tegraCSVFilesCandidates { + _, err := os.Stat(candidate) + if err == nil { + tegraCSVFiles = append(tegraCSVFiles, candidate) + } else if os.IsNotExist(err) { + continue + } else { + return nil, err + } + } + + if len(tegraCSVFiles) == 0 { + return nil, fmt.Errorf("No CSV files detected for Tegra iGPU") + } + + for _, tegraFile := range tegraCSVFiles { + configDevices.BindMounts = append( + configDevices.BindMounts, + map[string]string{ + "type": "disk", + "source": tegraFile, + "path": strings.TrimPrefix(tegraFile, rootPath), + "readonly": "true", + }, + ) + } + } + + return indirectSymlinks, nil +} + +// specHookToLXDCDIHook will translate a hook from a CDI spec into an entry in a `Hooks`. +// Some CDI hooks are not relevant for LXD and will be ignored. +func specHookToLXDCDIHook(hook *specs.Hook, hooks *Hooks, l logger.Logger) error { + if hook == nil { + l.Warn("CDI hook is nil") + return nil + } + + rootPath := "" + if shared.InSnap() { + rootPath = "/var/lib/snapd/hostfs" + } + + if len(hook.Args) < 3 { + return fmt.Errorf("Not enough arguments for CDI hook: %v", hook.Args) + } + + processCreateSymlinksHook := func(args []string) error { + // The list of arguments is either + // `--link :: --link :: ...` + // or `--link=:: --link=:: ...` + // and we need to handle both cases as they are both valid. + var targetWithLink string + for i := 0; i < len(args); i += 1 { + if args[i] == "--link" { + continue + } + + if strings.Contains(args[i], "=") { + // We can assume the arg is `--link=::` + splitted := strings.Split(args[i], "=") + if len(splitted) != 2 { + return fmt.Errorf("Invalid symlink arg %q", args[i]) + } + + targetWithLink = splitted[1] + } else { + // We can assume the arg is `::` + targetWithLink = args[i] + } + + entry := strings.Split(targetWithLink, "::") + if len(entry) != 2 { + return fmt.Errorf("Invalid symlink entry %q", targetWithLink) + } + + // `Link` is always an absolute path and `Target` (a `Link` points to a `Target`) is relative + // to the `Link` location in the CDI spec. A resolving operation will be needed to have the absolute + // path of the `Target` + hooks.Symlinks = append(hooks.Symlinks, SymlinkEntry{Target: strings.TrimPrefix(entry[0], rootPath), Link: strings.TrimPrefix(entry[1], rootPath)}) + } + + return nil + } + + processUpdateLdcacheHook := func(args []string) error { + // As above, the list of arguments is either + // `--folder --folder ...` + // or `--folder= --folder= ...` + // and we need to handle both cases as they are both valid. + var folder string + for i := 0; i < len(args); i += 1 { + if args[i] == "--folder" { + continue + } + + if strings.Contains(args[i], "=") { + // We can assume the arg is `--folder=` + splitted := strings.Split(args[i], "=") + if len(splitted) != 2 { + return fmt.Errorf("Invalid CDI folder arg %q", args[i]) + } + + folder = splitted[1] + } else { + // We can assume the arg is `` + folder = args[i] + } + + hooks.LDCacheUpdates = append(hooks.LDCacheUpdates, folder) + } + + return nil + } + + processHooks := map[string]func([]string) error{ + "create-symlinks": processCreateSymlinksHook, + "update-ldcache": processUpdateLdcacheHook, + } + + for i, arg := range hook.Args { + process, supported := processHooks[arg] + if supported { + if len(hook.Args) > i+1 { + // We pass in only the arguments, + // not the hook name which is not relevant in the process functions + return process(hook.Args[i+1:]) + } + } + } + + return nil +} + +// applyContainerEdits updates the configDevices and the hooks with CDI "container edits" +// (edits are user space libraries to mount and char device to pass to the container). +func applyContainerEdits(edits specs.ContainerEdits, configDevices *ConfigDevices, hooks *Hooks, existingMounts []*specs.Mount, l logger.Logger) ([]*specs.Mount, error) { + for _, d := range edits.DeviceNodes { + if d == nil { + l.Warn("One CDI DeviceNode is nil") + continue + } + + err := specDevToNativeDev(configDevices, *d) + if err != nil { + return nil, err + } + } + + for _, hook := range edits.Hooks { + err := specHookToLXDCDIHook(hook, hooks, l) + if err != nil { + return nil, err + } + } + + return append(existingMounts, edits.Mounts...), nil +} + +// GenerateFromCDI does several things: +// +// 1. It generates a CDI specification from a CDI ID and an instance. +// According the the specified 'vendor', 'class' and 'name' (this assembled triplet is called a fully-qualified CDI ID. We'll just call it ID in the context of this package), the CDI specification is generated. +// The CDI specification is a JSON-like format. It is divided into two parts: the 'specific device' configuration and the 'general device' configuration. +// - The 'specific device' configuration: this is a list of 'container edits' that can be added to the container runtime. +// According to the CDI ID (vendor, class, name), we only select the 'container edits' that matches the CDI ID. +// The 'container edits' are a list of device nodes, hooks and mounts that must be added to the container runtime. +// - The 'general device' configuration: this is a single 'container edits' entry runtime that must be passed to the container runtime in ant case. Which unix char devices need to be passed +// (e.g, special GPU memory controller device, etc.)? Which user space libraries need to be mounted (e.g, CUDA libraries for NVIDIA, etc.)? +// Which hooks need to be executed (e.g, symlinks to create, folder entries to add to ldcache, etc.))? +// In our case, these edits will be interpreted either as disk or unix char mounts passed to the container. +// The hooks will be centralized in a single resource file that will be read and executed as a LXC `lxc.hook.mount` hook, +// through LXD's `callhook` command. +// 2. We first process the 'specific device' configuration: we convert this information into a map of devices +// (keyed by their path given in the spec, it mapped to a map of device properties). We also collect the specific mounts (but we do not process them yet) and hooks. +// 3. We then process the 'general device' configuration in the same fashion. +// 4. Now we process all the mounts we collected from the spec in order to turn them into disk devices. +// This operations generate a side effect: it generates a list of indirect symlinks (see `specMountToNativeDev`) +// 5. Merge all the hooks (direct + indirect) into a single list of hooks. +func GenerateFromCDI(inst instance.Instance, cdiID ID, l logger.Logger) (*ConfigDevices, *Hooks, error) { + // 1. Generate the CDI specification + spec, err := generateSpec(cdiID, inst) + if err != nil { + return nil, nil, fmt.Errorf("Failed to generate CDI spec: %w", err) + } + + // Initialize the hooks as empty + hooks := &Hooks{ContainerRootFS: inst.RootfsPath()} + mounts := make([]*specs.Mount, 0) + configDevices := &ConfigDevices{UnixCharDevs: make([]map[string]string, 0), BindMounts: make([]map[string]string, 0)} + + // 2. Process the specific device configuration + for _, device := range spec.Devices { + if device.Name == cdiID.Name { + mounts, err = applyContainerEdits(device.ContainerEdits, configDevices, hooks, mounts, l) + if err != nil { + return nil, nil, err + } + + break + } + } + + // 3. Process general device configuration + mounts, err = applyContainerEdits(spec.ContainerEdits, configDevices, hooks, mounts, l) + if err != nil { + return nil, nil, err + } + + // 4. Process the mounts + indirectSymlinks, err := specMountToNativeDev(configDevices, cdiID, mounts) + if err != nil { + return nil, nil, err + } + + // 5. merge the indirectSymlinks to the list of symlinks to be create in the hooks + hooks.Symlinks = append(hooks.Symlinks, indirectSymlinks...) + return configDevices, hooks, nil +} + +// ReloadConfigDevicesFromDisk reads the paths to the CDI configuration devices file from the disk. +// This is useful in order to cache the CDI configuration devices file so that wee don't have to re-generate a CDI spec whhen stopping the container. +func ReloadConfigDevicesFromDisk(pathsToConfigDevicesFilePath string) (ConfigDevices, error) { + // Load the config devices file from the disk + pathsToCDIConfigDevicesFile, err := os.Open(pathsToConfigDevicesFilePath) + if err != nil { + return ConfigDevices{}, fmt.Errorf("Failed to open the paths to CDI conf file at %q: %w", pathsToConfigDevicesFilePath, err) + } + + defer pathsToCDIConfigDevicesFile.Close() + + configDevices := &ConfigDevices{} + err = json.NewDecoder(pathsToCDIConfigDevicesFile).Decode(configDevices) + if err != nil { + return ConfigDevices{}, fmt.Errorf("Failed to decode the paths to CDI conf file at %q: %w", pathsToConfigDevicesFilePath, err) + } + + return *configDevices, nil +} From 582127ecfeb23ea9c611790a69786f8c1df2c1fd Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Tue, 6 Aug 2024 18:42:56 +0200 Subject: [PATCH 09/16] lxd/device/gpu_physical: more explicit output values for `deviceNumStringToUint32` Signed-off-by: Gabriel Mougard --- lxd/device/gpu_physical.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lxd/device/gpu_physical.go b/lxd/device/gpu_physical.go index 58302aeeb496..92d859f428d2 100644 --- a/lxd/device/gpu_physical.go +++ b/lxd/device/gpu_physical.go @@ -396,21 +396,21 @@ func (d *gpuPhysical) postStop() error { // deviceNumStringToUint32 converts a device number string (major:minor) into separare major and // minor uint32s. -func (d *gpuPhysical) deviceNumStringToUint32(devNum string) (uint32, uint32, error) { +func (d *gpuPhysical) deviceNumStringToUint32(devNum string) (major uint32, minor uint32, err error) { devParts := strings.SplitN(devNum, ":", 2) tmp, err := strconv.ParseUint(devParts[0], 10, 32) if err != nil { return 0, 0, err } - major := uint32(tmp) + major = uint32(tmp) tmp, err = strconv.ParseUint(devParts[1], 10, 32) if err != nil { return 0, 0, err } - minor := uint32(tmp) + minor = uint32(tmp) return major, minor, nil } From e6944c44bacd9080a3fe7c2ddb45788a95a1570c Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Mon, 29 Jul 2024 20:00:34 +0200 Subject: [PATCH 10/16] lxd/device/gpu_physical: Configure a GPU (physical) if CDI detected Signed-off-by: Gabriel Mougard --- lxd/device/gpu_physical.go | 309 ++++++++++++++++++++++++++++++++++++- 1 file changed, 305 insertions(+), 4 deletions(-) diff --git a/lxd/device/gpu_physical.go b/lxd/device/gpu_physical.go index 92d859f428d2..3348a5700cbd 100644 --- a/lxd/device/gpu_physical.go +++ b/lxd/device/gpu_physical.go @@ -1,6 +1,7 @@ package device import ( + "encoding/json" "fmt" "os" "path/filepath" @@ -10,11 +11,14 @@ import ( "golang.org/x/sys/unix" + "github.com/canonical/lxd/lxd/device/cdi" deviceConfig "github.com/canonical/lxd/lxd/device/config" pcidev "github.com/canonical/lxd/lxd/device/pci" + "github.com/canonical/lxd/lxd/idmap" "github.com/canonical/lxd/lxd/instance" "github.com/canonical/lxd/lxd/instance/instancetype" "github.com/canonical/lxd/lxd/resources" + "github.com/canonical/lxd/lxd/storage/filesystem" "github.com/canonical/lxd/lxd/util" "github.com/canonical/lxd/shared" ) @@ -98,10 +102,227 @@ func (d *gpuPhysical) Start() (*deviceConfig.RunConfig, error) { return d.startContainer() } +// startCDIDevices starts all the devices given in a CDI specification: +// * `unix-char` (representing the card and non-card devices) +// * `disk` (representing the mounts)). +func (d *gpuPhysical) startCDIDevices(configDevices cdi.ConfigDevices, runConf *deviceConfig.RunConfig) error { + srcFDHandlers := make([]*os.File, 0) + defer func() { + for _, f := range srcFDHandlers { + _ = f.Close() + } + }() + + for _, conf := range configDevices.UnixCharDevs { + if conf["source"] == "" { + return fmt.Errorf("The source of the unix-char device %v used for CDI is empty", conf) + } + + if conf["major"] == "" || conf["minor"] == "" { + return fmt.Errorf("The major or minor of the unix-char device %v used for CDI is empty", conf) + } + + major, err := strconv.ParseUint(conf["major"], 10, 32) + if err != nil { + return fmt.Errorf("Failed to parse major number %q when starting CDI device: %w", conf["major"], err) + } + + minor, err := strconv.ParseUint(conf["minor"], 10, 32) + if err != nil { + return fmt.Errorf("Failed to parse minor number %q when starting CDI device: %w", conf["minor"], err) + } + + // Here putting a `cdi.CDIUnixPrefix` prefix with 'd.name' as a device name will create an directory entry like: + // /devices//.. + // 'unixDeviceSetupCharNum' is already checking for dupe entries so we have no validation to do here. + err = unixDeviceSetupCharNum(d.state, d.inst.DevicesPath(), cdi.CDIUnixPrefix, d.name, conf, uint32(major), uint32(minor), conf["path"], false, runConf) + if err != nil { + return err + } + } + + // Create the devices directory if missing. + if !shared.PathExists(d.inst.DevicesPath()) { + err := os.Mkdir(d.inst.DevicesPath(), 0711) + if err != nil { + return err + } + } + + for _, conf := range configDevices.BindMounts { + if conf["source"] == "" { + return fmt.Errorf("The source of the disk device %v used for CDI is empty", conf) + } + + srcPath := shared.HostPath(conf["source"]) + destPath := conf["path"] + relativeDestPath := strings.TrimPrefix(destPath, "/") + + // This time, the created path will be like: + // /devices//.. + deviceName := filesystem.PathNameEncode(deviceJoinPath(cdi.CDIDiskPrefix, d.name, relativeDestPath)) + devPath := filepath.Join(d.inst.DevicesPath(), deviceName) + + ownerShift := deviceConfig.MountOwnerShiftNone + if idmap.CanIdmapMount(devPath, "") { + ownerShift = deviceConfig.MountOwnerShiftDynamic + } + + options := []string{"bind"} + mntOptions := shared.SplitNTrimSpace(conf["raw.mount.options"], ",", -1, true) + fsName := "none" + + fileInfo, err := os.Stat(srcPath) + if err != nil { + return fmt.Errorf("Failed accessing source path %q: %w", srcPath, err) + } + + fileMode := fileInfo.Mode() + isFile := false + if !fileMode.IsDir() { + isFile = true + } + + f, err := os.OpenFile(srcPath, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("Failed opening source path %q: %w", srcPath, err) + } + + srcPath = fmt.Sprintf("/proc/self/fd/%d", f.Fd()) + srcFDHandlers = append(srcFDHandlers, f) + + // Clean any existing entry. + if shared.PathExists(devPath) { + err := os.Remove(devPath) + if err != nil { + return err + } + } + + // Create the mount point. + if isFile { + f, err := os.Create(devPath) + if err != nil { + return err + } + + srcFDHandlers = append(srcFDHandlers, f) + } else { + err := os.Mkdir(devPath, 0700) + if err != nil { + return err + } + } + + // Mount the fs. + err = DiskMount(srcPath, devPath, false, "", mntOptions, fsName) + if err != nil { + return err + } + + if isFile { + options = append(options, "create=file") + } else { + options = append(options, "create=dir") + } + + runConf.Mounts = append(runConf.Mounts, deviceConfig.MountEntryItem{ + DevName: deviceName, + DevPath: devPath, + TargetPath: relativeDestPath, + FSType: "none", + Opts: options, + OwnerShift: ownerShift, + }) + + runConf.PostHooks = append(runConf.PostHooks, func() error { + err := unix.Unmount(devPath, unix.MNT_DETACH) + if err != nil { + return err + } + + return nil + }) + } + + // Serialize the config devices inside the devices directory. + f, err := os.Create(d.generateCDIConfigDevicesFilePath()) + if err != nil { + return fmt.Errorf("Could not create the CDI config devices file: %w", err) + } + + defer f.Close() + err = json.NewEncoder(f).Encode(configDevices) + if err != nil { + return fmt.Errorf("Could not write to the CDI config devices file: %w", err) + } + + return nil +} + +func (d *gpuPhysical) generateCDIHooksFilePath() string { + return filepath.Join(d.inst.DevicesPath(), fmt.Sprintf("%s%s", d.name, cdi.CDIHooksFileSuffix)) +} + +func (d *gpuPhysical) generateCDIConfigDevicesFilePath() string { + return filepath.Join(d.inst.DevicesPath(), fmt.Sprintf("%s%s", d.name, cdi.CDIConfigDevicesFileSuffix)) +} + // startContainer detects the requested GPU devices and sets up unix-char devices. // Returns RunConfig populated with mount info required to pass the unix-char devices into the container. func (d *gpuPhysical) startContainer() (*deviceConfig.RunConfig, error) { runConf := deviceConfig.RunConfig{} + if d.config["id"] != "" { + // Check if the id of the device match a CDI format. + cdiID, err := cdi.ToCDI(d.config["id"]) + if err != nil { + return nil, err + } + + // The cdiID can be empty if the provided ID doesn't conform to the CDI (Container Device Interface) format, + // and this will not be treated as an error, as we allow the program to continue processing. + // The ID might still be valid in other contexts, such as a DRM card ID. + // This flexibility allows for both CDI-compliant device specifications and legacy device identifiers. + if !cdiID.Empty() { + if cdiID.Class == cdi.MIG { + return nil, fmt.Errorf(`MIG GPU notation detected for a "physical" gputype device. Choose a "mig" gputype device instead.`) + } + + configDevices, hooks, err := cdi.GenerateFromCDI(d.inst, cdiID, d.logger) + if err != nil { + return nil, err + } + + // Start the devices needed by the CDI specification. + err = d.startCDIDevices(*configDevices, &runConf) + if err != nil { + return nil, err + } + + // Persist the hooks to be run on a `lxc.hook.mount` LXC hook. + hooksFile := d.generateCDIHooksFilePath() + f, err := os.Create(hooksFile) + if err != nil { + return nil, fmt.Errorf("Could not create the CDI hooks file: %w", err) + } + + defer f.Close() + err = json.NewEncoder(f).Encode(hooks) + if err != nil { + return nil, fmt.Errorf("Could not write to the CDI hooks file: %w", err) + } + + runConf.GPUDevice = append(runConf.GPUDevice, + []deviceConfig.RunConfigItem{ + {Key: cdi.CDIHookDefinitionKey, Value: filepath.Base(hooksFile)}, + }...) + + return &runConf, nil + } + } + + // If we use a non-CDI approach, we proceeds with the normal GPU detection approach using the provided DRM card id + // or PCI-e bus address. gpus, err := resources.GetGPU() if err != nil { return nil, err @@ -342,6 +563,41 @@ func (d *gpuPhysical) pciDeviceDriverOverrideIOMMU(pciDev pcidev.Device, driverO return nil } +// stopCDIDevices reads the configDevices and remove potential unix device and unmounts disk mounts. +func (d *gpuPhysical) stopCDIDevices(configDevices cdi.ConfigDevices, runConf *deviceConfig.RunConfig) error { + // Remove ALL the underlying unix-char dev entries created when the CDI device started. + err := unixDeviceRemove(d.inst.DevicesPath(), cdi.CDIUnixPrefix, d.name, "", runConf) + if err != nil { + return err + } + + for _, conf := range configDevices.BindMounts { + relativeDestPath := strings.TrimPrefix(conf["path"], "/") + devPath := filepath.Join(d.inst.DevicesPath(), filesystem.PathNameEncode(deviceJoinPath(cdi.CDIDiskPrefix, d.name, relativeDestPath))) + runConf.PostHooks = append(runConf.PostHooks, func() error { + // Clean any existing device mount entry. Should occur first before custom volume unmounts. + err := DiskMountClear(devPath) + if err != nil { + return err + } + + return nil + }) + + // The disk device doesn't exist do nothing. + if !shared.PathExists(devPath) { + return nil + } + + // Request an unmount of the device inside the instance. + runConf.Mounts = append(runConf.Mounts, deviceConfig.MountEntryItem{ + TargetPath: relativeDestPath, + }) + } + + return nil +} + // Stop is run when the device is removed from the instance. func (d *gpuPhysical) Stop() (*deviceConfig.RunConfig, error) { runConf := deviceConfig.RunConfig{ @@ -349,7 +605,28 @@ func (d *gpuPhysical) Stop() (*deviceConfig.RunConfig, error) { } if d.inst.Type() == instancetype.Container { - err := unixDeviceRemove(d.inst.DevicesPath(), "unix", d.name, "", &runConf) + cdiID, err := cdi.ToCDI(d.config["id"]) + if err != nil { + return nil, err + } + + if !cdiID.Empty() { + // This is more efficient than GenerateFromCDI as we don't need to re-generate a CDI specification to parse it again. + configDevices, err := cdi.ReloadConfigDevicesFromDisk(d.generateCDIConfigDevicesFilePath()) + if err != nil { + return nil, err + } + + err = d.stopCDIDevices(configDevices, &runConf) + if err != nil { + return nil, err + } + + return &runConf, nil + } + + // In case of an 'id' not being CDI-compliant (e.g, a legacy DRM card id), we remove unix devices only as usual. + err = unixDeviceRemove(d.inst.DevicesPath(), "unix", d.name, "", &runConf) if err != nil { return nil, err } @@ -371,11 +648,35 @@ func (d *gpuPhysical) postStop() error { v := d.volatileGet() if d.inst.Type() == instancetype.Container { - // Remove host files for this device. - err := unixDeviceDeleteFiles(d.state, d.inst.DevicesPath(), "unix", d.name, "") + cdiID, err := cdi.ToCDI(d.config["id"]) if err != nil { - return fmt.Errorf("Failed to delete files for device '%s': %w", d.name, err) + return err } + + if !cdiID.Empty() { + err = unixDeviceDeleteFiles(d.state, d.inst.DevicesPath(), cdi.CDIUnixPrefix, d.name, "") + if err != nil { + return fmt.Errorf("Failed to delete files for CDI device '%s': %w", d.name, err) + } + + // Also remove the JSON files that were used to store the CDI related information. + err = os.Remove(d.generateCDIHooksFilePath()) + if err != nil { + return fmt.Errorf("Failed to delete CDI hooks file for device %q: %w", d.name, err) + } + + err = os.Remove(d.generateCDIConfigDevicesFilePath()) + if err != nil { + return fmt.Errorf("Failed to delete CDI paths to conf file for device %q: %w", d.name, err) + } + } else { + err = unixDeviceDeleteFiles(d.state, d.inst.DevicesPath(), "unix", d.name, "") + if err != nil { + return fmt.Errorf("Failed to delete files for device %q: %w", d.name, err) + } + } + + return err } // If VM physical pass through, unbind from vfio-pci and bind back to host driver. From 37b4cd360cefe49644afde039356468eba8e8a28 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Wed, 7 Aug 2024 16:11:45 +0200 Subject: [PATCH 11/16] lxd/instance/drivers: Make `removeUnixDevices` a driver_common function Signed-off-by: Gabriel Mougard --- lxd/instance/drivers/driver_common.go | 32 +++++++++++++++++++++++++++ lxd/instance/drivers/driver_lxc.go | 30 ------------------------- lxd/instance/drivers/driver_qemu.go | 29 ------------------------ 3 files changed, 32 insertions(+), 59 deletions(-) diff --git a/lxd/instance/drivers/driver_common.go b/lxd/instance/drivers/driver_common.go index 540357150f3f..8476e564c5ea 100644 --- a/lxd/instance/drivers/driver_common.go +++ b/lxd/instance/drivers/driver_common.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/http" + "os" "path/filepath" "sort" "strconv" @@ -1670,3 +1671,34 @@ func (d *common) deleteSnapshots(deleteFunc func(snapInst instance.Instance) err return nil } + +// removeUnixDevices reads the devices path and remove all unix devices. +func (d *common) removeUnixDevices() error { + // Check that we indeed have devices to remove + if !shared.PathExists(d.DevicesPath()) { + return nil + } + + // Load the directory listing + dents, err := os.ReadDir(d.DevicesPath()) + if err != nil { + return err + } + + // Go through all the unix devices + for _, f := range dents { + // Skip non-Unix devices + if !strings.HasPrefix(f.Name(), "forkmknod.unix.") && !strings.HasPrefix(f.Name(), "unix.") && !strings.HasPrefix(f.Name(), device.IBDevPrefix) { + continue + } + + // Remove the entry + devicePath := filepath.Join(d.DevicesPath(), f.Name()) + err := os.Remove(devicePath) + if err != nil { + d.logger.Error("Failed removing unix device", logger.Ctx{"err": err, "path": devicePath}) + } + } + + return nil +} diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index b6a582d79951..588d89584446 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -7859,36 +7859,6 @@ func (d *lxc) InsertSeccompUnixDevice(prefix string, m deviceConfig.Device, pid return d.insertMountLXD(devPath, tgtPath, "none", unix.MS_BIND, pid, idmap.IdmapStorageNone) } -func (d *lxc) removeUnixDevices() error { - // Check that we indeed have devices to remove - if !shared.PathExists(d.DevicesPath()) { - return nil - } - - // Load the directory listing - dents, err := os.ReadDir(d.DevicesPath()) - if err != nil { - return err - } - - // Go through all the unix devices - for _, f := range dents { - // Skip non-Unix devices - if !strings.HasPrefix(f.Name(), "forkmknod.unix.") && !strings.HasPrefix(f.Name(), "unix.") && !strings.HasPrefix(f.Name(), "infiniband.unix.") { - continue - } - - // Remove the entry - devicePath := filepath.Join(d.DevicesPath(), f.Name()) - err := os.Remove(devicePath) - if err != nil { - d.logger.Error("Failed removing unix device", logger.Ctx{"err": err, "path": devicePath}) - } - } - - return nil -} - // FillNetworkDevice takes a nic or infiniband device type and enriches it with automatically // generated name and hwaddr properties if these are missing from the device. func (d *lxc) FillNetworkDevice(name string, m deviceConfig.Device) (deviceConfig.Device, error) { diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 13180586f140..c96630458e8d 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -5940,35 +5940,6 @@ func (d *qemu) updateMemoryLimit(newLimit string) error { return fmt.Errorf("Failed setting memory to %dMiB (currently %dMiB) as it was taking too long", newSizeMB, curSizeMB) } -func (d *qemu) removeUnixDevices() error { - // Check that we indeed have devices to remove. - if !shared.PathExists(d.DevicesPath()) { - return nil - } - - // Load the directory listing. - dents, err := os.ReadDir(d.DevicesPath()) - if err != nil { - return err - } - - for _, f := range dents { - // Skip non-Unix devices. - if !strings.HasPrefix(f.Name(), "forkmknod.unix.") && !strings.HasPrefix(f.Name(), "unix.") && !strings.HasPrefix(f.Name(), "infiniband.unix.") { - continue - } - - // Remove the entry - devicePath := filepath.Join(d.DevicesPath(), f.Name()) - err := os.Remove(devicePath) - if err != nil { - d.logger.Error("Failed removing unix device", logger.Ctx{"err": err, "path": devicePath}) - } - } - - return nil -} - func (d *qemu) removeDiskDevices() error { // Check that we indeed have devices to remove. if !shared.PathExists(d.DevicesPath()) { From 650879b1ba98e7ba6c6e2d44c0d52dba717c95d2 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Thu, 6 Jun 2024 17:50:33 +0200 Subject: [PATCH 12/16] lxd/instance/drivers/driver_lxc: Configure LXC to use LXD's `startmountns` callhook if CDI devices have been configured Signed-off-by: Gabriel Mougard --- lxd/instance/drivers/driver_lxc.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index 588d89584446..f5e07f3b17ae 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -41,6 +41,7 @@ import ( "github.com/canonical/lxd/lxd/db/cluster" "github.com/canonical/lxd/lxd/db/operationtype" "github.com/canonical/lxd/lxd/device" + "github.com/canonical/lxd/lxd/device/cdi" deviceConfig "github.com/canonical/lxd/lxd/device/config" "github.com/canonical/lxd/lxd/device/nictype" "github.com/canonical/lxd/lxd/idmap" @@ -2053,6 +2054,7 @@ func (d *lxc) startCommon() (string, []func() error, error) { // Create the devices nicID := -1 nvidiaDevices := []string{} + cdiConfigFiles := []string{} sortedDevices := d.expandedDevices.Sorted() startDevices := make([]device.Device, 0, len(sortedDevices)) @@ -2223,6 +2225,10 @@ func (d *lxc) startCommon() (string, []func() error, error) { if entry.Key == device.GPUNvidiaDeviceKey { nvidiaDevices = append(nvidiaDevices, entry.Value) } + + if entry.Key == cdi.CDIHookDefinitionKey { + cdiConfigFiles = append(cdiConfigFiles, entry.Value) + } } } } @@ -2235,6 +2241,13 @@ func (d *lxc) startCommon() (string, []func() error, error) { } } + if len(cdiConfigFiles) > 0 { + err = lxcSetConfigItem(cc, "lxc.hook.mount", fmt.Sprintf("%s callhook %s %s %s startmountns --devicesRootFolder %s %s", d.state.OS.ExecPath, shared.VarPath(""), strconv.Quote(d.Project().Name), strconv.Quote(d.Name()), d.DevicesPath(), strings.Join(cdiConfigFiles, " "))) + if err != nil { + return "", nil, fmt.Errorf("Unable to set the startmountns callhook to process CDI hooks files (%q) for instance %q in project %q: %w", strings.Join(cdiConfigFiles, ","), d.Name(), d.Project().Name, err) + } + } + // Load the LXC raw config. err = d.loadRawLXCConfig(cc) if err != nil { From 1ce19732220e4a1a1d8f3b01f510041a8677c734 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Sun, 28 Jul 2024 18:27:11 +0200 Subject: [PATCH 13/16] lxd: Setup the `startmountns` callhook command to react to `lxc.hook.mount` for executing the CDI hooks Signed-off-by: Gabriel Mougard --- lxd/main_callhook.go | 200 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 198 insertions(+), 2 deletions(-) diff --git a/lxd/main_callhook.go b/lxd/main_callhook.go index 8c3de078b949..e0b5e955e67c 100644 --- a/lxd/main_callhook.go +++ b/lxd/main_callhook.go @@ -1,21 +1,29 @@ package main import ( + "bufio" + "encoding/json" + "errors" "fmt" "net/url" "os" + "os/exec" "path/filepath" + "strings" "time" "github.com/spf13/cobra" "github.com/canonical/lxd/client" + "github.com/canonical/lxd/lxd/device/cdi" ) type cmdCallhook struct { - global *cmdGlobal + global *cmdGlobal + devicesRootFolder string } +// Command returns a cobra command for `lxd callhook`. func (c *cmdCallhook) Command() *cobra.Command { cmd := &cobra.Command{} cmd.Use = "callhook [| ] " @@ -24,14 +32,175 @@ func (c *cmdCallhook) Command() *cobra.Command { Call container lifecycle hook in LXD This internal command notifies LXD about a container lifecycle event - (start, stopns, stop, restart) and blocks until LXD has processed it. + (start, startmountns, stopns, stop, restart) and blocks until LXD has processed it. ` cmd.RunE = c.Run cmd.Hidden = true + // devicesRootFolder is used to specify where to look for CDI config device files. + cmd.Flags().StringVar(&c.devicesRootFolder, "devicesRootFolder", "", "Root folder for CDI devices") + return cmd } +// resolveTargetRelativeToLink converts a target relative to a link path into an absolute path. +func resolveTargetRelativeToLink(link string, target string) (string, error) { + if !filepath.IsAbs(link) { + return "", fmt.Errorf("The link must be an absolute path: %q (target: %q)", link, target) + } + + if filepath.IsAbs(target) { + return target, nil + } + + linkDir := filepath.Dir(link) + absTarget := filepath.Join(linkDir, target) + cleanPath := filepath.Clean(absTarget) + absPath, err := filepath.Abs(cleanPath) + if err != nil { + return "", err + } + + return absPath, nil +} + +// customCDILinkerConfFile is the name of the linker conf file we will write to +// inside the container. The `00-lxdcdi` prefix is chosen to ensure that these libraries have +// a higher precedence than other libraries on the system. +var customCDILinkerConfFile = "00-lxdcdi.conf" + +// applyCDIHooksToContainer is called before the container has started but after the container namespace has been setup, +// and is used whenever CDI devices are added to a container and where symlinks and linker cache entries need to be created. +// These entries are listed in a 'CDI hooks file' located at `hooksFilePath`. +func applyCDIHooksToContainer(devicesRootFolder string, hooksFilePath string) error { + hookFile, err := os.Open(filepath.Join(devicesRootFolder, hooksFilePath)) + if err != nil { + return fmt.Errorf("Failed to open the CDI hooks file at %q: %w", hooksFilePath, err) + } + + defer hookFile.Close() + + hooks := &cdi.Hooks{} + err = json.NewDecoder(hookFile).Decode(hooks) + if err != nil { + return fmt.Errorf("Failed to decode the CDI hooks file at %q: %w\n", hooksFilePath, err) + } + + fmt.Println("CDI Hooks file loaded:") + prettyHooks, err := json.MarshalIndent(hooks, "", " ") + if err != nil { + return err + } + + containerRootFSMount := os.Getenv("LXC_ROOTFS_MOUNT") + if containerRootFSMount == "" { + return fmt.Errorf("LXC_ROOTFS_MOUNT is empty") + } + + fmt.Println(string(prettyHooks)) + + // Creating the symlinks + for _, symlink := range hooks.Symlinks { + // Resolve hook link from target + absTarget, err := resolveTargetRelativeToLink(symlink.Link, symlink.Target) + if err != nil { + return fmt.Errorf("Failed to resolve a CDI symlink: %w\n", err) + } + + // Try to create the directory if it doesn't exist + err = os.MkdirAll(filepath.Dir(filepath.Join(containerRootFSMount, symlink.Link)), 0755) + if err != nil { + return fmt.Errorf("Failed to create the directory for the CDI symlink: %w\n", err) + } + + // Create the symlink + err = os.Symlink(absTarget, filepath.Join(containerRootFSMount, symlink.Link)) + if err != nil { + if !os.IsExist(err) { + return fmt.Errorf("Failed to create the CDI symlink: %w\n", err) + } + + fmt.Printf("Symlink not created because link %q already exists for target %q\n", symlink.Link, absTarget) + } + } + + // Updating the linker cache + l := len(hooks.LDCacheUpdates) + if l > 0 { + ldConfFilePath := fmt.Sprintf("%s/etc/ld.so.conf.d/%s", containerRootFSMount, customCDILinkerConfFile) + _, err = os.Stat(ldConfFilePath) + if err == nil { + // The file already exists. Read it first, analyze its entries + // and add the ones that are not already there. + ldConfFile, err := os.OpenFile(ldConfFilePath, os.O_APPEND|os.O_RDWR, 0644) + if err != nil { + return fmt.Errorf("Failed to open the ld.so.conf file at %q: %w\n", ldConfFilePath, err) + } + + existingLinkerEntries := make(map[string]bool) + scanner := bufio.NewScanner(ldConfFile) + for scanner.Scan() { + existingLinkerEntries[strings.TrimSpace(scanner.Text())] = true + } + + fmt.Printf("Existing linker entries: %v\n", existingLinkerEntries) + for _, update := range hooks.LDCacheUpdates { + if !existingLinkerEntries[update] { + fmt.Printf("Adding linker entry: %s\n", update) + _, err = fmt.Fprintln(ldConfFile, update) + if err != nil { + ldConfFile.Close() + return fmt.Errorf("Failed to write to the linker conf file at %q: %w\n", ldConfFilePath, err) + } + + existingLinkerEntries[update] = true + } + } + + ldConfFile.Close() + } else if errors.Is(err, os.ErrNotExist) { + // The file does not exist. We simply create it with our entries. + ldConfFile, err := os.OpenFile(ldConfFilePath, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("Failed to create the linker conf file at %q: %w\n", ldConfFilePath, err) + } + + for _, update := range hooks.LDCacheUpdates { + fmt.Printf("Adding linker entry: %s\n", update) + _, err = fmt.Fprintln(ldConfFile, update) + if err != nil { + ldConfFile.Close() + return fmt.Errorf("Failed to write to the linker conf file at %q: %w\n", ldConfFilePath, err) + } + } + + ldConfFile.Close() + } else { + return fmt.Errorf("Could not stat the linker conf file to add CDI linker entries at %q: %w\n", ldConfFilePath, err) + } + } + + // Then remove the linker cache and regenerate it + linkerCachePath := filepath.Join(containerRootFSMount, "/etc/ld.so.cache") + err = os.Remove(linkerCachePath) + if err != nil { + if !os.IsNotExist(err) { + return fmt.Errorf("Failed to remove the ld.so.cache file: %w\n", err) + } + + fmt.Printf("Linker cache not found in %q, skipping removal\n", linkerCachePath) + } + + // Run `ldconfig` on the HOST (but targeting the container rootFS) to reduce the risk of running untrusted code in the container. + err = exec.Command("/sbin/ldconfig", "-r", containerRootFSMount).Run() + if err != nil { + return fmt.Errorf("Failed to run ldconfig in the container rootfs: %w\n", err) + } + + return nil +} + +// Run executes the `lxd callhook` command. func (c *cmdCallhook) Run(cmd *cobra.Command, args []string) error { // Quick checks. if len(args) < 2 { @@ -49,6 +218,7 @@ func (c *cmdCallhook) Run(cmd *cobra.Command, args []string) error { var projectName string var instanceRef string var hook string + var cdiHooksFiles []string // Used for startmountns hook only. if len(args) == 3 { instanceRef = args[1] @@ -57,6 +227,12 @@ func (c *cmdCallhook) Run(cmd *cobra.Command, args []string) error { projectName = args[1] instanceRef = args[2] hook = args[3] + } else if len(args) >= 5 { + projectName = args[1] + instanceRef = args[2] + hook = args[3] + cdiHooksFiles = make([]string, len(args[4:])) + copy(cdiHooksFiles, args[4:]) } target := "" @@ -66,6 +242,26 @@ func (c *cmdCallhook) Run(cmd *cobra.Command, args []string) error { return fmt.Errorf("This must be run as root") } + if hook == "startmountns" { + if len(cdiHooksFiles) == 0 { + return fmt.Errorf("Missing required CDI hooks files argument") + } + + if c.devicesRootFolder == "" { + return fmt.Errorf("Missing required --devicesRootFolder flag") + } + + var err error + for _, cdiHooksFile := range cdiHooksFiles { + err = applyCDIHooksToContainer(c.devicesRootFolder, cdiHooksFile) + if err != nil { + return err + } + } + + return nil + } + // Connect to LXD. socket := os.Getenv("LXD_SOCKET") if socket == "" { From db5ee3e3c82f48d9ca3202ad5975c283379b461d Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Wed, 7 Aug 2024 13:36:38 +0200 Subject: [PATCH 14/16] lxd: Remove useless `break` statement in main_callhook command In a select statement, only one case is executed when a channel operation succeeds so once a case is selected and its associated code block is executed, the select statement automatically terminates (there's no fall-through behavior like in a switch statement). So when the response case is selected (i.e., a value is received from the channel), the code inside that case is executed, and then the select statement naturally completes. The break statement here doesn't break out of any additional loop or switch statement; it's just breaking out of the select, which would happen anyway. Signed-off-by: Gabriel Mougard --- lxd/main_callhook.go | 1 - 1 file changed, 1 deletion(-) diff --git a/lxd/main_callhook.go b/lxd/main_callhook.go index e0b5e955e67c..c9a8a3ef712a 100644 --- a/lxd/main_callhook.go +++ b/lxd/main_callhook.go @@ -311,7 +311,6 @@ func (c *cmdCallhook) Run(cmd *cobra.Command, args []string) error { return err } - break case <-time.After(30 * time.Second): return fmt.Errorf("Hook didn't finish within 30s") } From 37302501f315e5488f2dbb3c82ec73006be5b7d6 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Thu, 6 Jun 2024 18:46:54 +0200 Subject: [PATCH 15/16] lxd-metadata: update metadata Signed-off-by: Gabriel Mougard --- doc/metadata.txt | 8 +++++++- lxd/metadata/configuration.json | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/metadata.txt b/doc/metadata.txt index a17e5b63f031..0ab9c296d8fa 100644 --- a/doc/metadata.txt +++ b/doc/metadata.txt @@ -275,9 +275,15 @@ You can omit the `MIG-` prefix when specifying this option. ``` ```{config:option} id device-gpu-physical-device-conf -:shortdesc: "DRM card ID of the GPU device" +:shortdesc: "ID of the GPU device" :type: "string" +The ID can either be the DRM card ID of the GPU device (container or VM) or a fully-qualified Container Device Interface (CDI) name (container only). +Here are some examples of fully-qualified CDI names: +- `nvidia.com/gpu=0`: Instructs LXD to operate a discrete GPU (dGPU) pass-through of brand NVIDIA with the first discovered GPU on your system. You can use the `nvidia-smi` tool on your host to find out which identifier to use. +- `nvidia.com/gpu=1833c8b5-9aa0-5382-b784-68b7e77eb185`: Instructs LXD to operate a discrete GPU (dGPU) pass-through of brand NVIDIA with a given GPU unique identifier. This identifier should also appear with `nvidia-smi -L`. +- `nvidia.com/igpu=all`: Instructs LXD to pass all the host integrated GPUs (iGPU) of brand NVIDIA. The concept of an index does not currently map to iGPUs. It is possible to list them with the `nvidia-smi -L` command. A special `nvgpu` mention should appear in the generated list to indicate a device to be an iGPU. +- `nvidia.com/gpu=all`: Instructs LXD to pass all the host GPUs of brand NVIDIA through to the container. ``` ```{config:option} mode device-gpu-physical-device-conf diff --git a/lxd/metadata/configuration.json b/lxd/metadata/configuration.json index 878454680d28..08e7027199e9 100644 --- a/lxd/metadata/configuration.json +++ b/lxd/metadata/configuration.json @@ -315,8 +315,8 @@ }, { "id": { - "longdesc": "", - "shortdesc": "DRM card ID of the GPU device", + "longdesc": "The ID can either be the DRM card ID of the GPU device (container or VM) or a fully-qualified Container Device Interface (CDI) name (container only).\nHere are some examples of fully-qualified CDI names:\n\n- `nvidia.com/gpu=0`: Instructs LXD to operate a discrete GPU (dGPU) pass-through of brand NVIDIA with the first discovered GPU on your system. You can use the `nvidia-smi` tool on your host to find out which identifier to use.\n- `nvidia.com/gpu=1833c8b5-9aa0-5382-b784-68b7e77eb185`: Instructs LXD to operate a discrete GPU (dGPU) pass-through of brand NVIDIA with a given GPU unique identifier. This identifier should also appear with `nvidia-smi -L`.\n- `nvidia.com/igpu=all`: Instructs LXD to pass all the host integrated GPUs (iGPU) of brand NVIDIA. The concept of an index does not currently map to iGPUs. It is possible to list them with the `nvidia-smi -L` command. A special `nvgpu` mention should appear in the generated list to indicate a device to be an iGPU.\n- `nvidia.com/gpu=all`: Instructs LXD to pass all the host GPUs of brand NVIDIA through to the container.", + "shortdesc": "ID of the GPU device", "type": "string" } }, From 2421c341e07f5219c45c130de3959f814fbcf733 Mon Sep 17 00:00:00 2001 From: Gabriel Mougard Date: Fri, 21 Jun 2024 11:29:18 +0200 Subject: [PATCH 16/16] doc: Add documentation on how to add a GPU with CDI mode Signed-off-by: Gabriel Mougard --- doc/.custom_wordlist.txt | 12 ++ .../container_gpu_passthrough_with_docker.md | 152 ++++++++++++++++++ doc/instances.md | 10 ++ doc/reference/devices_gpu.md | 16 ++ 4 files changed, 190 insertions(+) create mode 100644 doc/howto/container_gpu_passthrough_with_docker.md diff --git a/doc/.custom_wordlist.txt b/doc/.custom_wordlist.txt index 6935b12e4187..6055125ae8c8 100644 --- a/doc/.custom_wordlist.txt +++ b/doc/.custom_wordlist.txt @@ -25,6 +25,7 @@ BPF Btrfs bugfix bugfixes +CDI CentOS Ceph CephFS @@ -46,6 +47,7 @@ CSM CSV CUDA dataset +dGPU DCO dereferenced DHCP @@ -96,6 +98,8 @@ idmap idmapped idmaps IdP +iGPU +iGPUs incrementing InfiniBand init @@ -139,6 +143,7 @@ MicroCloud MII MinIO MITM +MNIST MTU Mullvad multicast @@ -152,7 +157,10 @@ NIC NICs NUMA NVMe +NVML NVRAM +NVIDIA +OCI OData OIDC OpenFGA @@ -207,6 +215,7 @@ SATA scalable scriptlet SDC +SDK SDN SDS SDT @@ -223,6 +232,7 @@ SKBPRIO SLAAC SMTP Snapcraft +SoC Solaris SPAs SPL @@ -256,6 +266,8 @@ sysfs syslog Tbit TCP +TensorRT +Tegra TiB Tibit TinyPNG diff --git a/doc/howto/container_gpu_passthrough_with_docker.md b/doc/howto/container_gpu_passthrough_with_docker.md new file mode 100644 index 000000000000..313bee27cdf3 --- /dev/null +++ b/doc/howto/container_gpu_passthrough_with_docker.md @@ -0,0 +1,152 @@ +(container-gpu-passthrough-with-docker)= +# How to pass an NVIDIA GPU to a container + +If you have an NVIDIA GPU (either discrete (dGPU) or integrated (iGPU)) and you want to pass the runtime libraries and configuration installed on your host to your container, you should add a {ref}`LXD GPU device `. +Consider the following scenario: + +Your host is an NVIDIA single board computer that has a Tegra SoC with an iGPU, and you have the Tegra SDK installed on the host. You want to create a LXD container and run an application inside the container using the iGPU as a compute backend. You want to run this application inside a Docker container (or another OCI-compliant runtime). +To achieve this, complete the following steps: + +1. Running a Docker container inside a LXD container can potentially consume a lot of disk space if the outer container is not well configured. Here are two options you can use to optimize the consumed disk space: + + - Either you create a BTRFS storage pool to back the LXD container so that the Docker image later used does not use the VFS storage driver which is very space inefficient, then you initialize the LXD container with {config:option}`instance-security:security.nesting` enabled (needed for running a Docker container inside a LXD container) and using the BTRFS storage pool: + + lxc storage create p1 btrfs size=15GiB + lxc init ubuntu:24.04 t1 --config security.nesting=true -s p1 + + - Or you use the `overlayFS` storage driver in Docker but you need to specify the following syscall interceptions, still with the {config:option}`instance-security:security.nesting` enabled: + + lxc init ubuntu:24.04 t1 --config security.nesting=true --config security.syscalls.intercept.mknod=true --config security.syscalls.intercept.setxattr=true + +1. Add the GPU device to your container: + + - If you want to do an iGPU pass-through: + + lxc config device add t1 igpu0 gpu gputype=physical id=nvidia.com/igpu=0 + + - If you want to do a dGPU pass-through: + + lxc config device add t1 gpu0 gpu gputype=physical id=nvidia.com/gpu=0 + +After adding the device, let's try to run a basic [MNIST](https://en.wikipedia.org/wiki/MNIST_database) inference job inside our LXD container. + +1. Create a `cloud-init` script that installs the Docker runtime, the [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-container-toolkit), and a script to run a test [TensorRT](https://github.com/NVIDIA/TensorRT) workload: + + #cloud-config + package_update: true + write_files: + # `run_tensorrt.sh` compiles samples TensorRT applications and run the the `sample_onnx_mnist` program which loads an ONNX model into the TensorRT inference server and execute a digit recognition job. + - path: /root/run_tensorrt.sh + permissions: "0755" + owner: root:root + content: | + #!/bin/bash + echo "OS release,Kernel version" + (. /etc/os-release; echo "${PRETTY_NAME}"; uname -r) | paste -s -d, + echo + nvidia-smi -q + echo + exec bash -o pipefail -c " + cd /workspace/tensorrt/samples + make -j4 + cd /workspace/tensorrt/bin + ./sample_onnx_mnist + retstatus=\${PIPESTATUS[0]} + echo \"Test exited with status code: \${retstatus}\" >&2 + exit \${retstatus} + " + runcmd: + # Install Docker to run the AI workload + - curl -fsSL https://get.docker.com -o install-docker.sh + - sh install-docker.sh --version 24.0 + # The following installs the NVIDIA container toolkit + # as explained in the official doc website: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt + - curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg + --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg + - curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed -e 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' -e '/experimental/ s/^#//g' | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + # Now that an new apt source/key was added, update the package definitions. + - apt-get update + # Install NVIDIA container toolkit + - DEBIAN_FRONTEND=noninteractive apt-get install -y nvidia-container-toolkit + # Ultimately, we need to tell Docker, our container runtime, to use `nvidia-ctk` as a runtime. + - nvidia-ctk runtime configure --runtime=docker + --config=/etc/docker/daemon.json + - systemctl restart docker + +1. Apply this `cloud-init` setup to your instance: + + lxc config set t1 cloud-init.user-data - < cloud-init.yml + +1. Start the instance: + + lxc start t1 + +1. Wait for the `cloud-init` process to finish: + + lxc exec t1 -- cloud-init status --wait + +1. Once `cloud-init` is finished, open a shell in the instance: + + lxc exec t1 -- bash + +1. Edit the NVIDIA container runtime to avoid using `cgroups`: + + sudo nvidia-ctk config --in-place --set nvidia-container-cli.no-cgroups + +1. If you use an iGPU and your NVIDIA container runtime is not automatically enabled with CSV mode (needed for NVIDIA Tegra board), enable it manually: + + sudo nvidia-ctk config --in-place --set nvidia-container-runtime.mode=csv + +1. Now, run the inference workload with Docker: + + - If you set up a dGPU pass-through: + + docker run --gpus all --runtime nvidia --rm -v $(pwd):/sh_input nvcr.io/nvidia/tensorrt:24.02-py3 bash /sh_input/run_tensorrt.sh + + - If you set up an iGPU pass-through: + + docker run --gpus all --runtime nvidia --rm -v $(pwd):/sh_input nvcr.io/nvidia/tensorrt:24.02-py3-igpu bash /sh_input/run_tensorrt.sh + + In the end you should see something like: + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@= ++++#++=*@@@@@ + @@@@@@@@#. *@@@@@ + @@@@@@@@= *@@@@@ + @@@@@@@@. .. ...****%@@@@@ + @@@@@@@@: .%@@#@@@@@@@@@@@@@ + @@@@@@@% -@@@@@@@@@@@@@@@@@ + @@@@@@@% -@@*@@@*@@@@@@@@@@ + @@@@@@@# :#- ::. ::=@@@@@@@ + @@@@@@@- -@@@@@@ + @@@@@@%. *@@@@@ + @@@@@@# :==*+== *@@@@@ + @@@@@@%---%%@@@@@@@. *@@@@@ + @@@@@@@@@@@@@@@@@@@+ *@@@@@ + @@@@@@@@@@@@@@@@@@@= *@@@@@ + @@@@@@@@@@@@@@@@@@* *@@@@@ + @@@@@%+%@@@@@@@@%. .%@@@@@ + @@@@@* .******= -@@@@@@@ + @@@@@* .#@@@@@@@ + @@@@@* =%@@@@@@@@ + @@@@@@%#+++= =@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @@@@@@@@@@@@@@@@@@@@@@@@@@@@ + + [07/31/2024-13:19:21] [I] Output: + [07/31/2024-13:19:21] [I] Prob 0 0.0000 Class 0: + [07/31/2024-13:19:21] [I] Prob 1 0.0000 Class 1: + [07/31/2024-13:19:21] [I] Prob 2 0.0000 Class 2: + [07/31/2024-13:19:21] [I] Prob 3 0.0000 Class 3: + [07/31/2024-13:19:21] [I] Prob 4 0.0000 Class 4: + [07/31/2024-13:19:21] [I] Prob 5 1.0000 Class 5: ********** + [07/31/2024-13:19:21] [I] Prob 6 0.0000 Class 6: + [07/31/2024-13:19:21] [I] Prob 7 0.0000 Class 7: + [07/31/2024-13:19:21] [I] Prob 8 0.0000 Class 8: + [07/31/2024-13:19:21] [I] Prob 9 0.0000 Class 9: + [07/31/2024-13:19:21] [I] + &&&& PASSED TensorRT.sample_onnx_mnist [TensorRT v8603] # ./sample_onnx_mnist diff --git a/doc/instances.md b/doc/instances.md index 402281d3cf31..f729aa0eb72c 100644 --- a/doc/instances.md +++ b/doc/instances.md @@ -57,6 +57,16 @@ How to import instances: :diataxis:Migrate from LXC ``` +```{only} diataxis +How to pass an NVIDIA GPU to a container with a Docker workload: +``` + +```{filtered-toctree} +:titlesonly: + +:diataxis:Pass NVIDIA GPUs +``` + ## Related topics ```{only} diataxis diff --git a/doc/reference/devices_gpu.md b/doc/reference/devices_gpu.md index c044dcc56236..4337ff091053 100644 --- a/doc/reference/devices_gpu.md +++ b/doc/reference/devices_gpu.md @@ -53,6 +53,22 @@ Add a specific GPU from the host system as a `physical` GPU device to an instanc See {ref}`instances-configure-devices` for more information. +#### CDI mode + +Add a specific GPU from the host system as a `physical` GPU device to an instance using the [Container Device Interface](https://github.com/cncf-tags/container-device-interface) (CDI) notation through a fully-qualified CDI name: + + lxc config device add gpu gputype=physical id= + +For example, add the first available NVIDIA discrete GPU on your system: + + lxc config device add gpu gputype=physical id=nvidia.com/gpu=0 + +If your machine has an NVIDIA iGPU (integrated GPU) located at index 0, you can add it like this: + + lxc config device add gpu gputype=physical id=nvidia.com/igpu=0 + +For a complete example on how to use a GPU CDI pass-through, see {ref}`container-gpu-passthrough-with-docker`. + (gpu-mdev)= ## `gputype`: `mdev`