Skip to content

Commit

Permalink
Decouple service unavailability from 'queue full' 503 errors
Browse files Browse the repository at this point in the history
- Add Error::Unavailable and integrate with HTTP error handler
- Update test case for Error::Busy
- Add test case for Error::Unavailable
  • Loading branch information
Michael-JB committed May 27, 2024
1 parent f755feb commit 272c615
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 9 deletions.
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Unreleased

* Add `Error::Unavailable` to decouple service unavailability from 'queue full' 503 responses.

## 0.7.1

* Add `Client::tokenize` and `Client::detokenize`. Thanks to @andreaskoepf
Expand Down
22 changes: 19 additions & 3 deletions src/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,19 @@ async fn translate_http_error(response: reqwest::Response) -> Result<reqwest::Re
// the API, but an intermediate Proxy like NGinx, so we can still forward the error
// message.
let body = response.text().await?;
// If the response is an error emitted by the API, this deserialization should succeed.
let api_error: Result<ApiError, _> = serde_json::from_str(&body);
let translated_error = match status {
StatusCode::TOO_MANY_REQUESTS => Error::TooManyRequests,
StatusCode::SERVICE_UNAVAILABLE => Error::Busy,
StatusCode::SERVICE_UNAVAILABLE => {
// Presence of `api_error` implies the error originated from the API itself (rather
// than the intermediate proxy) and so we can decode it as such.
if api_error.is_ok_and(|error| error.code == "QUEUE_FULL") {
Error::Busy
} else {
Error::Unavailable
}
}
_ => Error::Http {
status: status.as_u16(),
body,
Expand All @@ -175,14 +185,14 @@ async fn translate_http_error(response: reqwest::Response) -> Result<reqwest::Re
}

/// We are only interested in the status codes of the API.
#[derive(Deserialize)]
#[derive(Deserialize, Debug)]
struct ApiError<'a> {
/// Unique string in capital letters emitted by the API to signal different kinds of errors in a
/// finer granularity then the HTTP status codes alone would allow for.
///
/// E.g. Differentiating between request rate limiting and parallel tasks limiting which both
/// are 429 (the former is emitted by NGinx though).
_code: Cow<'a, str>,
code: Cow<'a, str>,
}

/// Errors returned by the Aleph Alpha Client
Expand All @@ -202,6 +212,12 @@ pub enum Error {
welcome to retry your request any time."
)]
Busy,
/// The API itself is unavailable, most likely due to restart.
#[error(
"The service is currently unavailable. This is likely due to restart. Please try again \
later."
)]
Unavailable,
#[error("No response received within given timeout: {0:?}")]
ClientTimeout(Duration),
/// An error on the Http Protocol level.
Expand Down
50 changes: 44 additions & 6 deletions tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ async fn detect_queue_full() {
// Start a background HTTP server on a random local part
let mock_server = MockServer::start().await;

let answer = r#"{
"error":"Sorry we had to reject your request because we could not guarantee to finish it in
a reasonable timeframe. This specific model is very busy at this moment. Try again later
or use another model.",
"code":"QUEUE_FULL"
}"#;
let answer = "{
\"error\":\"Sorry we had to reject your request because we could not guarantee to finish \
it in a reasonable timeframe. This specific model is very busy at this moment. Try \
again later or use another model.\",
\"code\":\"QUEUE_FULL\"
}";
let body = r#"{
"model": "luminous-base",
"prompt": [{"type": "text", "data": "Hello,"}],
Expand All @@ -124,9 +124,47 @@ async fn detect_queue_full() {
.await
.unwrap_err();

// Then
assert!(matches!(error, Error::Busy));
}

/// If the API is down, we want to detect this scenario and inform the user.
#[tokio::test]
async fn detect_service_unavailable() {
// Given

// Start a background HTTP server on a random local part
let mock_server = MockServer::start().await;

let answer = "No server is available to handle this request.";
let body = r#"{
"model": "luminous-base",
"prompt": [{"type": "text", "data": "Hello,"}],
"maximum_tokens": 1
}"#;

Mock::given(method("POST"))
.and(path("/complete"))
.and(header("Authorization", "Bearer dummy-token"))
.and(header("Content-Type", "application/json"))
.and(body_json_string(body))
.respond_with(ResponseTemplate::new(503).set_body_string(answer))
.mount(&mock_server)
.await;

// When
let task = TaskCompletion::from_text("Hello,", 1);
let model = "luminous-base";
let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
let error = client
.output_of(&task.with_model(model), &How::default())
.await
.unwrap_err();

// Then
assert!(matches!(error, Error::Unavailable));
}

/// Should set `nice=true` in query URL in order to tell the server we do not need our result right
/// now in a high stress situation.
#[tokio::test]
Expand Down

0 comments on commit 272c615

Please sign in to comment.