Skip to content

Commit

Permalink
Implement local filecaching
Browse files Browse the repository at this point in the history
  • Loading branch information
Felix Breidenstein committed Oct 19, 2021
1 parent 9a48dc3 commit 4e90c72
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
s3-http-proxy
dist/
cache/
55 changes: 51 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,62 @@

Little proxy to access an private S3 bucket via HTTP.

## Usage

## Usecase
When your application stores it's assets in an S3 bucket and you use e.g.
CloudFront, you can improve performance by configuring the bucket as a origin
and point a custom behaviour like '/assets' to the bucket. This way, the assets
get directly served from the bucket without shoving the request through your
application stack. This also work perfectly for privat buckets because
CloudFront can use an OAI (Origin Access Identity) to get permissions. When you
now can't (for whatever reason) use CloudFront and just have a good old
webserver/reverseproxy like e.g. nginx in front of your application but still
wan't to directly serve assets from the bucket, you are out of luck because
nginx can't deal with IAM credentials to access a private bucket (and you don't
want to enable public access on your bucket!). Because we had this scenario for
a few customers, we wrote this tool which you can run behind a
webserver/reverseproxy and then configure an location block for '/assets' which
routes the request to this tool, and get nearly the same behaviour as in the
setup with CloudFront (obviously it's not a full blown CDN but you still get
'direct' file access to the bucket without going through your app stack).


## Minimal usage example
```
export S3PROXY_BUCKET="nameofmybucket"
export S3PROXY_REGION="us-central-1"
export S3PROXY_PORT="3000"
./proxy
./s3-http-proxy
```

## Usage with Docker
```
docker run -e S3PROXY_BUCKET=nameofmybucket -p 3000:3000 --rm -it codemonauts/s3-http-proxy
```

## Configuration
All configuration happens via environment variables.

| Name | Required | Default | Description |
| --------------- | :------: | -------------- | ------------------------------------------------------ |
| S3PROXY_BUCKET | x | - | Name of the bucket |
| S3PROXY_REGION | | "eu-central-1" | Region of the bucket |
| S3PROXY_PORT | | "3000" | Listening port of the application |
| S3PROXY_CACHING | | "" | Set this to a path if you wan't the files to be cached |
| S3PROXY_LOGGING | | "WARN" | Loglevel ("ERROR","WARN","INFO","DEBUG") |


## Caching
This proxy can localy cache all files from S3 to disk for better performance. To
enable caching just set *S3PROXY_CACHING* to a valid path (relative or absolut
works both). The tool will then only do a HeadRequest to the bucket, when it has
the file already in it's cache to see if the file is still up to date
(Comparison of LastModified timestamp). If the file has changed in the bucket
after we downloaded it, it will freshly get downloaded from the Bucket and
replaced on disk before a response is send.

If you don't need this invalidation check for your files, you can also directly
point your webserver to the cache directory of the plugin, because the files get
saved to disk with the same folder structure as in S3 so they can directly be
read and delivered by a webserver.


With ❤ by [codemonauts](https://codemonauts.com)
41 changes: 41 additions & 0 deletions filewrapper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package main

import (
"io"
"os"

"github.com/aws/aws-sdk-go/service/s3"
)

// FileWrapper wraps either a local file or an reponse from S3
// It either contains a pointer to a local file and the reponse from a HeadObject request
// or both of these are nil and it only contains an GetObject request
type FileWrapper struct {
File *os.File
GetOutput *s3.GetObjectOutput
HeadOutput *s3.HeadObjectOutput
}

func (obj *FileWrapper) GetContent() io.Reader {
if obj.File != nil {
return obj.File
} else {
return obj.GetOutput.Body
}
}

func (obj *FileWrapper) GetContentType() string {
if obj.GetOutput != nil {
return *obj.GetOutput.ContentType
} else {
return *obj.HeadOutput.ContentType
}
}

func (obj *FileWrapper) GetMetadata() map[string]*string {
if obj.GetOutput != nil {
return obj.GetOutput.Metadata
} else {
return obj.HeadOutput.Metadata
}
}
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ module github.com/codemonauts/s3-http-proxy

go 1.16

require github.com/aws/aws-sdk-go v1.38.61
require (
github.com/aws/aws-sdk-go v1.38.61
github.com/sirupsen/logrus v1.8.1 // indirect
)
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,27 @@ github.com/aws/aws-sdk-go v1.38.61 h1:wizuqQZe0K4iYJ+Slrs0aSQ4P94FAwqBUHwk46Iz5U
github.com/aws/aws-sdk-go v1.38.61/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b h1:uwuIcX0g4Yl1NC5XAz37xsr2lTtcqevgzYNVt49waME=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
Expand Down
183 changes: 172 additions & 11 deletions proxy.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,156 @@
package main

import (
"errors"
"fmt"
"io"
"log"
"net/http"
"os"
"path/filepath"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
log "github.com/sirupsen/logrus"
)

var (
s3Service *s3.S3
bucketName string
cachePath string
)

// getFile checks if we have a local copy otherwise downloads from S3
func getFile(key string) (FileWrapper, error) {
if cachePath != "" {
log.Debug("Trying to get file from cache")
obj, err := getFileFromCache(key)

// Directly return file from Cache if we didn't got an error
if err == nil {
log.Info("Returning cached file")
return obj, nil
} else {
log.Debug(err)
}
}

obj, err := getFileFromBucket(key)
if err != nil {
return FileWrapper{}, err
}

log.Debug("Returning file from Bucket")
return obj, nil

}

func getFileFromCache(key string) (FileWrapper, error) {
filePath := filepath.Join(cachePath, key)

if fileStat, err := os.Stat(filePath); err == nil {
// file in cache. check expire
headRequest, err := s3Service.HeadObject(&s3.HeadObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(key),
})

if err != nil {
// We have a local file, but HeadObject returned an error, so we can
// assume that the file no longer exists in the bucket
os.Remove(filePath)
log.Debug("Deleting local file")
return FileWrapper{}, err
}

if fileStat.ModTime().Before(*headRequest.LastModified) {
// Our file is older than the one in the bucket
os.Remove(filePath)
return FileWrapper{}, errors.New("file not up to date")
}

fh, err := os.Open(filePath)
if err != nil {
// Couldn't open cached file
return FileWrapper{}, err
}

return FileWrapper{
File: fh,
HeadOutput: headRequest,
GetOutput: nil,
}, nil

} else {
// File not in cache or otherwise not accessible
return FileWrapper{}, err
}
}

func getFileFromBucket(key string) (FileWrapper, error) {
log.Info("Getting file from Bucket")

obj, err := s3Service.GetObject(&s3.GetObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(key),
})

if err != nil {
log.Errorf("Error while getting %q from S3: %s\n", key, err.Error())
return FileWrapper{}, err
}

s3File := FileWrapper{
File: nil,
HeadOutput: nil,
GetOutput: obj,
}

if cachePath != "" {
path, err := saveFileToCache(key, obj)
if err != nil {
// We couldn't save the file to the cache but still return the Get response from S3
log.Error(err)
return s3File, nil
}

fh, _ := os.Open(path)
return FileWrapper{
File: fh,
HeadOutput: nil,
GetOutput: obj,
}, nil

}

return s3File, nil
}

// createWithFolders creates the full nested directory structure and then creates the requested file
func createWithFolders(p string) (*os.File, error) {
if err := os.MkdirAll(filepath.Dir(p), 0770); err != nil {
return nil, err
}
return os.Create(p)
}

func saveFileToCache(key string, obj *s3.GetObjectOutput) (string, error) {
log.Debug("Saving file to cache")
filePath := filepath.Join(cachePath, key)

outFile, err := createWithFolders(filePath)
if err != nil {
log.Error("Couldn't create cache dir")
return "", err
}
defer outFile.Close()

io.Copy(outFile, obj.Body)

return filePath, nil

}

func handler(w http.ResponseWriter, r *http.Request) {
defer r.Body.Close()

Expand All @@ -27,24 +161,30 @@ func handler(w http.ResponseWriter, r *http.Request) {
return
}

input := &s3.GetObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(key),
}
obj, err := s3Service.GetObject(input)
log.WithFields(log.Fields{
"key": key,
}).Info("Got a request")

obj, err := getFile(key)
if err != nil {
log.Printf("Error while getting %q: %s\n", key, err.Error())
w.WriteHeader(http.StatusForbidden)
w.Write([]byte("Forbidden"))
return
}

defer obj.Body.Close()
// Set correct ContentType
w.Header().Set("Content-Type", obj.GetContentType())

w.Header().Set("Content-Type", *obj.ContentType)
// Check for additional metadata
metadata := obj.GetMetadata()
if len(metadata) > 0 {
for k, v := range metadata {
w.Header().Set(k, *v)
}
}

// Directly copy all bytes from the S3 object into the HTTP reponse
io.Copy(w, obj.Body)
io.Copy(w, obj.GetContent())
}

func envOrDefault(name string, defaultValue string) string {
Expand All @@ -59,18 +199,39 @@ func main() {
region := envOrDefault("S3PROXY_REGION", "eu-central-1")
port := envOrDefault("S3PROXY_PORT", "3000")
bucketName = envOrDefault("S3PROXY_BUCKET", "")
cachePath = envOrDefault("S3PROXY_CACHE", "")
logLevel := envOrDefault("S3PROXY_LOGGING", "WARN")

l, err := log.ParseLevel(logLevel)
if err != nil {
log.Error("Unknown loglevel provided. Defaulting to WARN")
log.SetLevel(log.WarnLevel)
} else {
log.SetLevel(l)
}

if bucketName == "" {
log.Fatal("You need to provide S3PROXY_BUCKET")
}

if cachePath != "" {
// Check if we have write access to the cache directory
testPath := filepath.Join(cachePath, ".testfile")
file, err := createWithFolders(testPath)
if err != nil {
log.Fatal("No write access to the cache dir")
}
defer file.Close()

}

sess := session.Must(session.NewSession(&aws.Config{
Region: aws.String(region),
}))
s3Service = s3.New(sess)

http.HandleFunc("/", handler)

log.Printf("Listening on :%s \n", port)
log.Info("Listening on :%s \n", port)
log.Fatal(http.ListenAndServe(fmt.Sprintf(":%s", port), nil))
}

0 comments on commit 4e90c72

Please sign in to comment.