Skip to content

Commit

Permalink
feat: adding SkyPilot as another target option
Browse files Browse the repository at this point in the history
Signed-off-by: aavarghese <[email protected]>
  • Loading branch information
aavarghese committed Aug 9, 2024
1 parent d3e8eb1 commit 9833f13
Show file tree
Hide file tree
Showing 19 changed files with 389 additions and 51 deletions.
6 changes: 6 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ require (
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/patternmatcher v0.6.0 // indirect
github.com/moby/sys/sequential v0.5.0 // indirect
github.com/moby/sys/user v0.1.0 // indirect
github.com/morikuni/aec v1.0.0 // indirect
github.com/oklog/ulid v1.3.1 // indirect
go.mongodb.org/mongo-driver v1.16.0 // indirect
)
Expand All @@ -52,6 +57,7 @@ require (
github.com/Microsoft/hcsshim v0.12.5 // indirect
github.com/Unknwon/goconfig v1.0.0 // indirect
github.com/abbot/go-http-auth v0.4.0 // indirect
github.com/ahmetb/go-dexec v0.0.0-20240429060713-f13091af5825
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
Expand Down
11 changes: 11 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ github.com/Unknwon/goconfig v1.0.0 h1:9IAu/BYbSLQi8puFjUQApZTxIHqSwrj5d8vpP8vTq4
github.com/Unknwon/goconfig v1.0.0/go.mod h1:wngxua9XCNjvHjDiTiV26DaKDT+0c63QR6H5hjVUUxw=
github.com/abbot/go-http-auth v0.4.0 h1:QjmvZ5gSC7jm3Zg54DqWE/T5m1t2AfDu6QlXJT0EVT0=
github.com/abbot/go-http-auth v0.4.0/go.mod h1:Cz6ARTIzApMJDzh5bRMSUou6UMSp0IEXg9km/ci7TJM=
github.com/ahmetb/go-dexec v0.0.0-20240429060713-f13091af5825 h1:TnbGurKPZ6wv4noePGiuT4XBbkwgHEiYveB0FUKgojc=
github.com/ahmetb/go-dexec v0.0.0-20240429060713-f13091af5825/go.mod h1:w2DmE+EPHh6+osqQz68ZX3J/tWT3I1ytom+K890LHi4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
Expand Down Expand Up @@ -117,6 +119,8 @@ github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQV8=
github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1 h1:ZClxb8laGDf5arXfYcAtECDFgAgHklGI8CxgjHnXKJ4=
github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
Expand Down Expand Up @@ -329,12 +333,18 @@ github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zx
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/mittwald/go-helm-client v0.12.11 h1:fjJysS2pyEXUsgHP9OAQyFIZB67sU6uo+w4vM6JePJM=
github.com/mittwald/go-helm-client v0.12.11/go.mod h1:HA3eMOaUhqa4EXUfj94f6L0v4aUEKHuVV977hVl1KWU=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=
github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=
github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g=
github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc=
github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
github.com/moby/sys/user v0.1.0 h1:WmZ93f5Ux6het5iituh9x2zAG7NFY9Aqi49jjE1PaQg=
github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
Expand All @@ -346,6 +356,7 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0=
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
Expand Down
7 changes: 7 additions & 0 deletions pkg/be/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"lunchpail.io/pkg/be/kubernetes"
"lunchpail.io/pkg/be/platform"
"lunchpail.io/pkg/be/runs"
"lunchpail.io/pkg/be/skypilot"
"lunchpail.io/pkg/compilation"
"lunchpail.io/pkg/ir"
)
Expand Down Expand Up @@ -43,6 +44,12 @@ func New(backend platform.Platform, aopts compilation.Options) (Backend, error)
} else {
be = ibm
}
case platform.SkyPilot:
if sp, err := skypilot.New(aopts); err != nil {
return nil, err
} else {
be = sp
}
default:
return nil, fmt.Errorf("Unsupported backend %v", backend)
}
Expand Down
7 changes: 5 additions & 2 deletions pkg/be/ibmcloud/api.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package ibmcloud

import "github.com/IBM/vpc-go-sdk/vpcv1"
import (
"github.com/IBM/vpc-go-sdk/vpcv1"
"lunchpail.io/pkg/be/platform"
)

type Backend struct {
config ibmConfig
config platform.IbmConfig
vpcService *vpcv1.VpcV1
sshKeyType string
sshPublicKey string
Expand Down
3 changes: 2 additions & 1 deletion pkg/be/ibmcloud/authenticate.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ import (

"github.com/IBM/go-sdk-core/v5/core"
"github.com/IBM/vpc-go-sdk/vpcv1"
"lunchpail.io/pkg/be/platform"
)

func Authenticator(apiKey string, config ibmConfig) (*vpcv1.VpcV1, error) {
func Authenticator(apiKey string, config platform.IbmConfig) (*vpcv1.VpcV1, error) {
var auth core.Authenticator
var method = "apikey"
if apiKey == "" && config.IAMToken != "" {
Expand Down
34 changes: 3 additions & 31 deletions pkg/be/ibmcloud/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ package ibmcloud

import (
"context"
"errors"
"fmt"
"math"
"net/http"
"os/exec"
"strconv"
Expand All @@ -13,6 +11,7 @@ import (

"github.com/IBM/go-sdk-core/v5/core"
"github.com/IBM/vpc-go-sdk/vpcv1"
"lunchpail.io/pkg/be/platform"
"lunchpail.io/pkg/compilation"
"lunchpail.io/pkg/ir/llir"
comp "lunchpail.io/pkg/lunchpail"
Expand Down Expand Up @@ -322,7 +321,7 @@ func createAndInitVM(vpcService *vpcv1.VpcV1, name string, ir llir.LLIR, resourc
}

//Compute number of VSIs to be provisioned and job parallelism for each VSI
parallelism, numInstances, err := computeParallelismAndInstanceCount(vpcService, profile, workerCount)
parallelism, numInstances, err := platform.ComputeParallelismAndInstanceCount(vpcService, profile, workerCount)
if err != nil {
return fmt.Errorf("failed to compute number of instances and job parallelism: %v", err)
}
Expand Down Expand Up @@ -382,7 +381,7 @@ func (backend Backend) SetAction(aopts compilation.Options, ir llir.LLIR, runnam
} else if action == Create {
zone := aopts.Zone //command line zone value
if zone == "" { //random zone value using config
randomZone, err := getRandomizedZone(backend.config, backend.vpcService) //Todo: spread among random zones with a subnet in each zone
randomZone, err := platform.GetRandomizedZone(backend.config, backend.vpcService) //Todo: spread among random zones with a subnet in each zone
if err != nil {
return err
}
Expand All @@ -394,30 +393,3 @@ func (backend Backend) SetAction(aopts compilation.Options, ir llir.LLIR, runnam
}
return nil
}

func computeParallelismAndInstanceCount(vpcService *vpcv1.VpcV1, profile string, workers int32) (parallelism int64, instanceCount int, err error) {
//TODO: 1. Mapping table from size specified by application and user to IBM's profile table
//2. Build comparison table for multiple cloud providers
prof, response, err := vpcService.GetInstanceProfile(
&vpcv1.GetInstanceProfileOptions{
Name: &profile,
})
if err != nil {
return parallelism, instanceCount, fmt.Errorf("failed to retrieve instance profile: %v and the response is: %s", err, response)
}

if prof != nil {
vcpuCount, ok := prof.VcpuCount.(*vpcv1.InstanceProfileVcpu)
if !ok {
return parallelism, instanceCount, errors.New("failed to get VcpuCount from instance profile")
}

parallelism = *vcpuCount.Value
if workers < int32(parallelism) {
parallelism = int64(workers)
}
instanceCount = max(1, int(math.Ceil(float64(workers)/float64(parallelism))))
}

return parallelism, instanceCount, nil
}
5 changes: 3 additions & 2 deletions pkg/be/ibmcloud/new.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
package ibmcloud

import (
"lunchpail.io/pkg/be/platform"
"lunchpail.io/pkg/compilation"
)

func New(aopts compilation.Options) (Backend, error) {
config := loadConfigWithCommandLineOverrides(aopts)
keytype, key, err := loadPublicKey(config, aopts)
config := platform.LoadConfigWithCommandLineOverrides(aopts)
keytype, key, err := platform.LoadPublicKey(config, aopts)

vpcService, err := Authenticator(aopts.ApiKey, config)
if err != nil {
Expand Down
45 changes: 37 additions & 8 deletions pkg/be/ibmcloud/discovery.go → pkg/be/platform/discovery.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package ibmcloud
package platform

import (
"encoding/json"
"errors"
"fmt"
"math"
"math/rand/v2"
"os"
"path/filepath"
Expand All @@ -18,16 +20,16 @@ type resourceGroup struct {
Name string `json:"Name"`
}

type ibmConfig struct {
type IbmConfig struct {
IAMToken string `json:"IAMToken"`
ResourceGroup resourceGroup `json:"ResourceGroup"`
Region string `json:"Region"`
// IAMRefreshToken string `json:"IAMRefreshToken"`
}

// Retrieve bearer token and other login info from ibmcloud's standard config file
func loadConfig() (ibmConfig, error) {
var config ibmConfig
func LoadConfig() (IbmConfig, error) {
var config IbmConfig

homedir, err := os.UserHomeDir()
if err != nil {
Expand All @@ -47,9 +49,9 @@ func loadConfig() (ibmConfig, error) {
}

// Replace the config file values with user specificed values from command line
func loadConfigWithCommandLineOverrides(aopts compilation.Options) ibmConfig {
func LoadConfigWithCommandLineOverrides(aopts compilation.Options) IbmConfig {
// intentionally ignoring error, as we have fallbacks if we couldn't find or load the config
config, _ := loadConfig()
config, _ := LoadConfig()

if aopts.ResourceGroupID != "" {
config.ResourceGroup.GUID = aopts.ResourceGroupID
Expand All @@ -59,7 +61,7 @@ func loadConfigWithCommandLineOverrides(aopts compilation.Options) ibmConfig {
}

// Use region from ibmcloud's standard config file to get a randomized zone within that region
func getRandomizedZone(config ibmConfig, vpcService *vpcv1.VpcV1) (string, error) {
func GetRandomizedZone(config IbmConfig, vpcService *vpcv1.VpcV1) (string, error) {
if config.Region != "" {
zones, response, err := vpcService.ListRegionZones(&vpcv1.ListRegionZonesOptions{
RegionName: &config.Region,
Expand All @@ -75,7 +77,7 @@ func getRandomizedZone(config ibmConfig, vpcService *vpcv1.VpcV1) (string, error

// Retrieve public key from user's ssh dir, if exists
// Looks for two ssh key types: “rsa” and “ed25519" (ibmcloud supported)
func loadPublicKey(config ibmConfig, aopts compilation.Options) (string, string, error) {
func LoadPublicKey(config IbmConfig, aopts compilation.Options) (string, string, error) {
homedir, err := os.UserHomeDir()
if err != nil {
return "", "", err
Expand All @@ -98,3 +100,30 @@ func loadPublicKey(config ibmConfig, aopts compilation.Options) (string, string,

return "", "", nil
}

func ComputeParallelismAndInstanceCount(vpcService *vpcv1.VpcV1, profile string, workers int32) (parallelism int64, instanceCount int, err error) {
//TODO: 1. Mapping table from size specified by application and user to IBM's profile table
//2. Build comparison table for multiple cloud providers
prof, response, err := vpcService.GetInstanceProfile(
&vpcv1.GetInstanceProfileOptions{
Name: &profile,
})
if err != nil {
return parallelism, instanceCount, fmt.Errorf("failed to retrieve instance profile: %v and the response is: %s", err, response)
}

if prof != nil {
vcpuCount, ok := prof.VcpuCount.(*vpcv1.InstanceProfileVcpu)
if !ok {
return parallelism, instanceCount, errors.New("failed to get VcpuCount from instance profile")
}

parallelism = *vcpuCount.Value
if workers < int32(parallelism) {
parallelism = int64(workers)
}
instanceCount = max(1, int(math.Ceil(float64(workers)/float64(parallelism))))
}

return parallelism, instanceCount, nil
}
11 changes: 11 additions & 0 deletions pkg/be/skypilot/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package skypilot

import (
"github.com/IBM/vpc-go-sdk/vpcv1"
"lunchpail.io/pkg/be/platform"
)

type Backend struct {
config platform.IbmConfig
vpcService *vpcv1.VpcV1
}
71 changes: 71 additions & 0 deletions pkg/be/skypilot/authenticate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package skypilot

import (
"errors"
"fmt"
"os"
"os/exec"
"path"

"github.com/IBM/go-sdk-core/v5/core"
"github.com/IBM/vpc-go-sdk/vpcv1"
"lunchpail.io/pkg/be/platform"
)

func Authenticator(apiKey string, config platform.IbmConfig) (*vpcv1.VpcV1, error) {
var auth core.Authenticator
var method = "apikey"
if apiKey == "" && config.IAMToken != "" {
bearerAuth, err := core.NewBearerTokenAuthenticator(config.IAMToken)
if err != nil {
return nil, err
}
method = "bearer token"
auth = bearerAuth
} else if apiKey != "" {
auth = &core.IamAuthenticator{
ApiKey: apiKey,
}

} else {
return nil, fmt.Errorf("Either use 'ibmcloud login' or rerun with an '--api-key' option")
}

// Instantiate the service with an API key based IAM authenticator
vpcService, err := vpcv1.NewVpcV1(&vpcv1.VpcV1Options{
Authenticator: auth,
})
if err != nil {
return nil, errors.New("Error creating VPC Service with apikey" + apiKey)
}
fmt.Printf("Accessing the VPC service via %s\n", method)

//To access IBM’s VPC service, store the apikey and resource group in $HOME/.ibm/credentials.yaml
credsPath := os.Getenv("HOME") + "/.ibm/credentials.yaml"
err = os.MkdirAll(path.Dir(credsPath), 0755)
if err != nil {
return nil, err
}
f, err := os.Create(credsPath)
if err != nil {
return nil, err
}

defer f.Close()
d := []string{"iam_api_key: " + apiKey, "resource_group_id: " + config.ResourceGroup.GUID}

for _, v := range d {
_, err = fmt.Fprintln(f, v)
if err != nil {
return nil, err
}
}

cmd := exec.Command("/bin/bash", "-c", "env DOCKER_HOST=unix:///var/run/docker.sock docker run -td --rm --name sky -v ${HOME}/.sky:/root/.sky:rw -v $HOME/.ibm:/root/.ibm:rw berkeleyskypilot/skypilot")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("internal Error starting docker container: %v", err)
}
return vpcService, nil
}
26 changes: 26 additions & 0 deletions pkg/be/skypilot/delete.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package skypilot

import (
"fmt"
"os"
"os/exec"
)

func stopOrDownSkyCluster(name string, down bool) error {
cmd := exec.Command("/bin/bash", "-c", "env DOCKER_HOST=unix:///var/run/docker.sock docker exec sky sky stop --yes "+name)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("Internal Error running SkyPilot stop cmd: %v", err)
}

if down {
cmd = exec.Command("/bin/bash", "-c", "env DOCKER_HOST=unix:///var/run/docker.sock docker exec sky sky down --yes "+name+" ; docker stop sky")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("Internal Error running SkyPilot down cmd: %v", err)
}
}
return nil
}
Loading

0 comments on commit 9833f13

Please sign in to comment.