From 4e90c72e2d154877ef40599f5e87315e37934974 Mon Sep 17 00:00:00 2001 From: Felix Breidenstein Date: Tue, 19 Oct 2021 12:23:11 +0200 Subject: [PATCH] Implement local filecaching --- .gitignore | 1 + README.md | 55 +++++++++++++-- filewrapper.go | 41 +++++++++++ go.mod | 5 +- go.sum | 6 ++ proxy.go | 183 ++++++++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 275 insertions(+), 16 deletions(-) create mode 100644 filewrapper.go diff --git a/.gitignore b/.gitignore index 7056b1d..2263092 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ s3-http-proxy dist/ +cache/ diff --git a/README.md b/README.md index 1796a72..eb1fa22 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,62 @@ Little proxy to access an private S3 bucket via HTTP. -## Usage + +## Usecase +When your application stores it's assets in an S3 bucket and you use e.g. +CloudFront, you can improve performance by configuring the bucket as a origin +and point a custom behaviour like '/assets' to the bucket. This way, the assets +get directly served from the bucket without shoving the request through your +application stack. This also work perfectly for privat buckets because +CloudFront can use an OAI (Origin Access Identity) to get permissions. When you +now can't (for whatever reason) use CloudFront and just have a good old +webserver/reverseproxy like e.g. nginx in front of your application but still +wan't to directly serve assets from the bucket, you are out of luck because +nginx can't deal with IAM credentials to access a private bucket (and you don't +want to enable public access on your bucket!). Because we had this scenario for +a few customers, we wrote this tool which you can run behind a +webserver/reverseproxy and then configure an location block for '/assets' which +routes the request to this tool, and get nearly the same behaviour as in the +setup with CloudFront (obviously it's not a full blown CDN but you still get +'direct' file access to the bucket without going through your app stack). + + +## Minimal usage example ``` export S3PROXY_BUCKET="nameofmybucket" -export S3PROXY_REGION="us-central-1" -export S3PROXY_PORT="3000" -./proxy +./s3-http-proxy ``` ## Usage with Docker ``` docker run -e S3PROXY_BUCKET=nameofmybucket -p 3000:3000 --rm -it codemonauts/s3-http-proxy ``` + +## Configuration +All configuration happens via environment variables. + +| Name | Required | Default | Description | +| --------------- | :------: | -------------- | ------------------------------------------------------ | +| S3PROXY_BUCKET | x | - | Name of the bucket | +| S3PROXY_REGION | | "eu-central-1" | Region of the bucket | +| S3PROXY_PORT | | "3000" | Listening port of the application | +| S3PROXY_CACHING | | "" | Set this to a path if you wan't the files to be cached | +| S3PROXY_LOGGING | | "WARN" | Loglevel ("ERROR","WARN","INFO","DEBUG") | + + +## Caching +This proxy can localy cache all files from S3 to disk for better performance. To +enable caching just set *S3PROXY_CACHING* to a valid path (relative or absolut +works both). The tool will then only do a HeadRequest to the bucket, when it has +the file already in it's cache to see if the file is still up to date +(Comparison of LastModified timestamp). If the file has changed in the bucket +after we downloaded it, it will freshly get downloaded from the Bucket and +replaced on disk before a response is send. + +If you don't need this invalidation check for your files, you can also directly +point your webserver to the cache directory of the plugin, because the files get +saved to disk with the same folder structure as in S3 so they can directly be +read and delivered by a webserver. + + +With ❤ by [codemonauts](https://codemonauts.com) \ No newline at end of file diff --git a/filewrapper.go b/filewrapper.go new file mode 100644 index 0000000..fc35065 --- /dev/null +++ b/filewrapper.go @@ -0,0 +1,41 @@ +package main + +import ( + "io" + "os" + + "github.com/aws/aws-sdk-go/service/s3" +) + +// FileWrapper wraps either a local file or an reponse from S3 +// It either contains a pointer to a local file and the reponse from a HeadObject request +// or both of these are nil and it only contains an GetObject request +type FileWrapper struct { + File *os.File + GetOutput *s3.GetObjectOutput + HeadOutput *s3.HeadObjectOutput +} + +func (obj *FileWrapper) GetContent() io.Reader { + if obj.File != nil { + return obj.File + } else { + return obj.GetOutput.Body + } +} + +func (obj *FileWrapper) GetContentType() string { + if obj.GetOutput != nil { + return *obj.GetOutput.ContentType + } else { + return *obj.HeadOutput.ContentType + } +} + +func (obj *FileWrapper) GetMetadata() map[string]*string { + if obj.GetOutput != nil { + return obj.GetOutput.Metadata + } else { + return obj.HeadOutput.Metadata + } +} diff --git a/go.mod b/go.mod index 80dc3c6..d273db4 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,7 @@ module github.com/codemonauts/s3-http-proxy go 1.16 -require github.com/aws/aws-sdk-go v1.38.61 +require ( + github.com/aws/aws-sdk-go v1.38.61 + github.com/sirupsen/logrus v1.8.1 // indirect +) diff --git a/go.sum b/go.sum index 0a9058c..3b28f2c 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,7 @@ github.com/aws/aws-sdk-go v1.38.61 h1:wizuqQZe0K4iYJ+Slrs0aSQ4P94FAwqBUHwk46Iz5U github.com/aws/aws-sdk-go v1.38.61/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= @@ -9,7 +10,10 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfC github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -17,6 +21,8 @@ golang.org/x/net v0.0.0-20201110031124-69a78807bb2b h1:uwuIcX0g4Yl1NC5XAz37xsr2l golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= diff --git a/proxy.go b/proxy.go index 9e9d292..4a3eb40 100644 --- a/proxy.go +++ b/proxy.go @@ -1,22 +1,156 @@ package main import ( + "errors" "fmt" "io" - "log" "net/http" "os" + "path/filepath" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3" + log "github.com/sirupsen/logrus" ) var ( s3Service *s3.S3 bucketName string + cachePath string ) +// getFile checks if we have a local copy otherwise downloads from S3 +func getFile(key string) (FileWrapper, error) { + if cachePath != "" { + log.Debug("Trying to get file from cache") + obj, err := getFileFromCache(key) + + // Directly return file from Cache if we didn't got an error + if err == nil { + log.Info("Returning cached file") + return obj, nil + } else { + log.Debug(err) + } + } + + obj, err := getFileFromBucket(key) + if err != nil { + return FileWrapper{}, err + } + + log.Debug("Returning file from Bucket") + return obj, nil + +} + +func getFileFromCache(key string) (FileWrapper, error) { + filePath := filepath.Join(cachePath, key) + + if fileStat, err := os.Stat(filePath); err == nil { + // file in cache. check expire + headRequest, err := s3Service.HeadObject(&s3.HeadObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(key), + }) + + if err != nil { + // We have a local file, but HeadObject returned an error, so we can + // assume that the file no longer exists in the bucket + os.Remove(filePath) + log.Debug("Deleting local file") + return FileWrapper{}, err + } + + if fileStat.ModTime().Before(*headRequest.LastModified) { + // Our file is older than the one in the bucket + os.Remove(filePath) + return FileWrapper{}, errors.New("file not up to date") + } + + fh, err := os.Open(filePath) + if err != nil { + // Couldn't open cached file + return FileWrapper{}, err + } + + return FileWrapper{ + File: fh, + HeadOutput: headRequest, + GetOutput: nil, + }, nil + + } else { + // File not in cache or otherwise not accessible + return FileWrapper{}, err + } +} + +func getFileFromBucket(key string) (FileWrapper, error) { + log.Info("Getting file from Bucket") + + obj, err := s3Service.GetObject(&s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(key), + }) + + if err != nil { + log.Errorf("Error while getting %q from S3: %s\n", key, err.Error()) + return FileWrapper{}, err + } + + s3File := FileWrapper{ + File: nil, + HeadOutput: nil, + GetOutput: obj, + } + + if cachePath != "" { + path, err := saveFileToCache(key, obj) + if err != nil { + // We couldn't save the file to the cache but still return the Get response from S3 + log.Error(err) + return s3File, nil + } + + fh, _ := os.Open(path) + return FileWrapper{ + File: fh, + HeadOutput: nil, + GetOutput: obj, + }, nil + + } + + return s3File, nil +} + +// createWithFolders creates the full nested directory structure and then creates the requested file +func createWithFolders(p string) (*os.File, error) { + if err := os.MkdirAll(filepath.Dir(p), 0770); err != nil { + return nil, err + } + return os.Create(p) +} + +func saveFileToCache(key string, obj *s3.GetObjectOutput) (string, error) { + log.Debug("Saving file to cache") + filePath := filepath.Join(cachePath, key) + + outFile, err := createWithFolders(filePath) + if err != nil { + log.Error("Couldn't create cache dir") + return "", err + } + defer outFile.Close() + + io.Copy(outFile, obj.Body) + + return filePath, nil + +} + func handler(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() @@ -27,24 +161,30 @@ func handler(w http.ResponseWriter, r *http.Request) { return } - input := &s3.GetObjectInput{ - Bucket: aws.String(bucketName), - Key: aws.String(key), - } - obj, err := s3Service.GetObject(input) + log.WithFields(log.Fields{ + "key": key, + }).Info("Got a request") + + obj, err := getFile(key) if err != nil { - log.Printf("Error while getting %q: %s\n", key, err.Error()) w.WriteHeader(http.StatusForbidden) w.Write([]byte("Forbidden")) return } - defer obj.Body.Close() + // Set correct ContentType + w.Header().Set("Content-Type", obj.GetContentType()) - w.Header().Set("Content-Type", *obj.ContentType) + // Check for additional metadata + metadata := obj.GetMetadata() + if len(metadata) > 0 { + for k, v := range metadata { + w.Header().Set(k, *v) + } + } // Directly copy all bytes from the S3 object into the HTTP reponse - io.Copy(w, obj.Body) + io.Copy(w, obj.GetContent()) } func envOrDefault(name string, defaultValue string) string { @@ -59,11 +199,32 @@ func main() { region := envOrDefault("S3PROXY_REGION", "eu-central-1") port := envOrDefault("S3PROXY_PORT", "3000") bucketName = envOrDefault("S3PROXY_BUCKET", "") + cachePath = envOrDefault("S3PROXY_CACHE", "") + logLevel := envOrDefault("S3PROXY_LOGGING", "WARN") + + l, err := log.ParseLevel(logLevel) + if err != nil { + log.Error("Unknown loglevel provided. Defaulting to WARN") + log.SetLevel(log.WarnLevel) + } else { + log.SetLevel(l) + } if bucketName == "" { log.Fatal("You need to provide S3PROXY_BUCKET") } + if cachePath != "" { + // Check if we have write access to the cache directory + testPath := filepath.Join(cachePath, ".testfile") + file, err := createWithFolders(testPath) + if err != nil { + log.Fatal("No write access to the cache dir") + } + defer file.Close() + + } + sess := session.Must(session.NewSession(&aws.Config{ Region: aws.String(region), })) @@ -71,6 +232,6 @@ func main() { http.HandleFunc("/", handler) - log.Printf("Listening on :%s \n", port) + log.Info("Listening on :%s \n", port) log.Fatal(http.ListenAndServe(fmt.Sprintf(":%s", port), nil)) }