Skip to content

Commit

Permalink
[Inference API] Expand RateLimiter docs (elastic#117156)
Browse files Browse the repository at this point in the history
  • Loading branch information
timgrein authored Nov 20, 2024
1 parent 21c4431 commit e5209f9
Showing 1 changed file with 12 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@
*
* By setting the accumulated tokens limit to a value greater than zero, it effectively allows bursts of traffic. If the accumulated
* tokens limit is set to zero, it will force the acquiring thread to wait on each call.
*
* Example:
* Time unit: Second
* Tokens to produce per time unit: 10
* Limit for tokens in bucket: 100
*
* Tokens in bucket after n seconds (n second -> tokens in bucket):
* 1 sec -> 10 tokens, 2 sec -> 20 tokens, ... , 10 sec -> 100 tokens (bucket full), ... 200 sec -> 100 tokens (no increase in tokens)
*/
public class RateLimiter {

Expand Down Expand Up @@ -76,6 +84,7 @@ public final synchronized void setRate(double newAccumulatedTokensLimit, double
throw new IllegalArgumentException(Strings.format("Tokens per time unit must be less than or equal to %s", Double.MAX_VALUE));
}

// If the new token limit is smaller than what we've accumulated already we need to drop tokens to meet the new token limit
accumulatedTokens = Math.min(accumulatedTokens, newAccumulatedTokensLimit);

accumulatedTokensLimit = newAccumulatedTokensLimit;
Expand All @@ -88,7 +97,8 @@ public final synchronized void setRate(double newAccumulatedTokensLimit, double
}

/**
* Causes the thread to wait until the tokens are available
* Causes the thread to wait until the tokens are available.
* This reserves token in advance leading to a reduction of accumulated tokens.
* @param tokens the number of items of work that should be throttled, typically you'd pass a value of 1 here
* @throws InterruptedException _
*/
Expand Down Expand Up @@ -130,6 +140,7 @@ private static void validateTokenRequest(int tokens) {

/**
* Returns the amount of time to wait for the tokens to become available.
* This reserves tokens in advance leading to a reduction of accumulated tokens.
* @param tokens the number of items of work that should be throttled, typically you'd pass a value of 1 here. Must be greater than 0.
* @return the amount of time to wait
*/
Expand Down

0 comments on commit e5209f9

Please sign in to comment.