Skip to content

Commit

Permalink
chore: increase memory efficiency
Browse files Browse the repository at this point in the history
  • Loading branch information
ibuildthecloud committed Nov 2, 2024
1 parent 886f944 commit 50489f2
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 52 deletions.
1 change: 1 addition & 0 deletions pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ func (c *Client) CacheDir() string {

func (c *Client) cacheKey(key any) (string, error) {
hash := sha256.New()
hash.Write([]byte("v2"))
if err := json.NewEncoder(hash).Encode(key); err != nil {
return "", err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/engine/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (e *Engine) runHTTP(ctx context.Context, prg *types.Program, tool types.Too
}

for _, env := range e.Env {
if strings.HasPrefix(env, "GPTSCRIPT_") {
if strings.HasPrefix(env, "GPTSCRIPT_WORKSPACE_") {
req.Header.Add("X-GPTScript-Env", env)
}
}
Expand Down
96 changes: 46 additions & 50 deletions pkg/openai/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"slices"
"sort"
"strings"
"time"

openai "github.com/gptscript-ai/chat-completion-client"
"github.com/gptscript-ai/gptscript/pkg/cache"
Expand Down Expand Up @@ -212,15 +213,15 @@ func (c *Client) seed(request openai.ChatCompletionRequest) int {
return hash.Seed(newRequest)
}

func (c *Client) fromCache(ctx context.Context, messageRequest types.CompletionRequest, request openai.ChatCompletionRequest) (result []openai.ChatCompletionStreamResponse, _ bool, _ error) {
func (c *Client) fromCache(ctx context.Context, messageRequest types.CompletionRequest, request openai.ChatCompletionRequest) (result types.CompletionMessage, _ bool, _ error) {
if !messageRequest.GetCache() {
return nil, false, nil
return types.CompletionMessage{}, false, nil
}
found, err := c.cache.Get(ctx, c.cacheKey(request), &result)
if err != nil {
return nil, false, err
return types.CompletionMessage{}, false, err
} else if !found {
return nil, false, nil
return types.CompletionMessage{}, false, nil
}
return result, true, nil
}
Expand Down Expand Up @@ -396,33 +397,27 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
IncludeUsage: true,
}
}
response, ok, err := c.fromCache(ctx, messageRequest, request)
result, ok, err := c.fromCache(ctx, messageRequest, request)
if err != nil {
return nil, err
} else if !ok {
response, err = c.call(ctx, request, id, status)
result, err = c.call(ctx, request, id, status)

// If we got back a context length exceeded error, keep retrying and shrinking the message history until we pass.
var apiError *openai.APIError
if errors.As(err, &apiError) && apiError.Code == "context_length_exceeded" && messageRequest.Chat {
// Decrease maxTokens by 10% to make garbage collection more aggressive.
// The retry loop will further decrease maxTokens if needed.
maxTokens := decreaseTenPercent(messageRequest.MaxTokens)
response, err = c.contextLimitRetryLoop(ctx, request, id, maxTokens, status)
result, err = c.contextLimitRetryLoop(ctx, request, id, maxTokens, status)
}

if err != nil {
return nil, err
}
} else {
cacheResponse = true
}

result := types.CompletionMessage{}
for _, response := range response {
result = appendMessage(result, response)
}

for i, content := range result.Content {
if content.ToolCall != nil && content.ToolCall.ID == "" {
content.ToolCall.ID = "call_" + hash.ID(content.ToolCall.Function.Name, content.ToolCall.Function.Arguments)[:8]
Expand All @@ -440,7 +435,6 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques

status <- types.CompletionStatus{
CompletionID: id,
Chunks: response,
Response: result,
Usage: result.Usage,
Cached: cacheResponse,
Expand All @@ -449,9 +443,9 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
return &result, nil
}

func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatCompletionRequest, id string, maxTokens int, status chan<- types.CompletionStatus) ([]openai.ChatCompletionStreamResponse, error) {
func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatCompletionRequest, id string, maxTokens int, status chan<- types.CompletionStatus) (types.CompletionMessage, error) {
var (
response []openai.ChatCompletionStreamResponse
response types.CompletionMessage
err error
)

Expand All @@ -469,10 +463,10 @@ func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatC
maxTokens = decreaseTenPercent(maxTokens)
continue
}
return nil, err
return types.CompletionMessage{}, err
}

return nil, err
return types.CompletionMessage{}, err
}

func appendMessage(msg types.CompletionMessage, response openai.ChatCompletionStreamResponse) types.CompletionMessage {
Expand Down Expand Up @@ -548,7 +542,7 @@ func override(left, right string) string {
return left
}

func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest, transactionID string, partial chan<- types.CompletionStatus) (responses []openai.ChatCompletionStreamResponse, _ error) {
func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest, transactionID string, partial chan<- types.CompletionStatus) (types.CompletionMessage, error) {
streamResponse := os.Getenv("GPTSCRIPT_INTERNAL_OPENAI_STREAMING") != "false"

partial <- types.CompletionStatus{
Expand All @@ -565,56 +559,58 @@ func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest,
request.StreamOptions = nil
resp, err := c.c.CreateChatCompletion(ctx, request)
if err != nil {
return nil, err
return types.CompletionMessage{}, err
}
return []openai.ChatCompletionStreamResponse{
{
ID: resp.ID,
Object: resp.Object,
Created: resp.Created,
Model: resp.Model,
Usage: resp.Usage,
Choices: []openai.ChatCompletionStreamChoice{
{
Index: resp.Choices[0].Index,
Delta: openai.ChatCompletionStreamChoiceDelta{
Content: resp.Choices[0].Message.Content,
Role: resp.Choices[0].Message.Role,
FunctionCall: resp.Choices[0].Message.FunctionCall,
ToolCalls: resp.Choices[0].Message.ToolCalls,
},
FinishReason: resp.Choices[0].FinishReason,
return appendMessage(types.CompletionMessage{}, openai.ChatCompletionStreamResponse{
ID: resp.ID,
Object: resp.Object,
Created: resp.Created,
Model: resp.Model,
Usage: resp.Usage,
Choices: []openai.ChatCompletionStreamChoice{
{
Index: resp.Choices[0].Index,
Delta: openai.ChatCompletionStreamChoiceDelta{
Content: resp.Choices[0].Message.Content,
Role: resp.Choices[0].Message.Role,
FunctionCall: resp.Choices[0].Message.FunctionCall,
ToolCalls: resp.Choices[0].Message.ToolCalls,
},
FinishReason: resp.Choices[0].FinishReason,
},
},
}, nil
}), nil
}

stream, err := c.c.CreateChatCompletionStream(ctx, request)
if err != nil {
return nil, err
return types.CompletionMessage{}, err
}
defer stream.Close()

var partialMessage types.CompletionMessage
var (
partialMessage types.CompletionMessage
start = time.Now()
last []string
)
for {
response, err := stream.Recv()
if err == io.EOF {
return responses, c.cache.Store(ctx, c.cacheKey(request), responses)
return partialMessage, c.cache.Store(ctx, c.cacheKey(request), partialMessage)
} else if err != nil {
return nil, err
}
if len(response.Choices) > 0 {
slog.Debug("stream", "content", response.Choices[0].Delta.Content)
return types.CompletionMessage{}, err
}
partialMessage = appendMessage(partialMessage, response)
if partial != nil {
partialMessage = appendMessage(partialMessage, response)
partial <- types.CompletionStatus{
CompletionID: transactionID,
PartialResponse: &partialMessage,
if time.Since(start) > 500*time.Millisecond {
last = last[:0]
partial <- types.CompletionStatus{
CompletionID: transactionID,
PartialResponse: &partialMessage,
}
start = time.Now()
}
}
responses = append(responses, response)
}
}

Expand Down
1 change: 0 additions & 1 deletion pkg/types/completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ type CompletionStatus struct {
Response any
Usage Usage
Cached bool
Chunks any
PartialResponse *CompletionMessage
}

Expand Down

0 comments on commit 50489f2

Please sign in to comment.