Skip to content

Commit

Permalink
fix: upsert new crawl options
Browse files Browse the repository at this point in the history
  • Loading branch information
drew-harris committed Oct 9, 2024
1 parent c9db0b5 commit 0fd2215
Showing 1 changed file with 25 additions and 34 deletions.
59 changes: 25 additions & 34 deletions server/src/operators/crawl_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,8 @@ pub async fn update_crawl_settings_for_dataset(
.await
.map_err(|e| ServiceError::InternalServerError(e.to_string()))?;

let crawl_req: Result<CrawlRequestPG, diesel::result::Error> =
// Should only ever be empty or len = 1
let prev_crawl_reqs: Vec<CrawlRequestPG> = {
crawl_requests_table::crawl_requests
.select((
crawl_requests_table::id,
Expand All @@ -349,26 +350,9 @@ pub async fn update_crawl_settings_for_dataset(
crawl_requests_table::created_at,
))
.filter(crawl_requests_table::dataset_id.eq(dataset_id))
.first::<CrawlRequestPG>(&mut conn)
.await;

let crawl_req = match crawl_req {
Err(e) => match e {
diesel::result::Error::NotFound => {
create_crawl_request(
crawl_options.clone(),
dataset_id,
uuid::Uuid::new_v4(),
pool.clone(),
redis_pool.clone(),
)
.await
.map_err(|e| ServiceError::InternalServerError(e.to_string()))?;
return Ok(());
}
_ => return Err(ServiceError::InternalServerError(e.to_string())),
},
Ok(req) => req,
.load::<CrawlRequestPG>(&mut conn)
.await
.map_err(|e| ServiceError::InternalServerError(e.to_string()))?
};

if let Some(ref url) = crawl_options.site_url {
Expand Down Expand Up @@ -398,32 +382,39 @@ pub async fn update_crawl_settings_for_dataset(
.map_err(|e| ServiceError::InternalServerError(e.to_string()))?;
}

let previous_crawl_options: CrawlOptions = serde_json::from_value(crawl_req.crawl_options)
.map_err(|e| ServiceError::InternalServerError(e.to_string()))?;

let merged_options = crawl_options.merge(previous_crawl_options);
let new_options = match prev_crawl_reqs.get(0) {
Some(prev_crawl_req) => {
let previous_crawl_options: CrawlOptions =
serde_json::from_value(prev_crawl_req.clone().crawl_options)
.map_err(|e| ServiceError::InternalServerError(e.to_string()))?;

let merged_options = previous_crawl_options.merge(crawl_options.clone());
merged_options
}
None => crawl_options.clone(),
};
diesel::update(
crawl_requests_table::crawl_requests
.filter(crawl_requests_table::dataset_id.eq(dataset_id)),
)
.set(crawl_requests_table::crawl_options.eq(
serde_json::to_value(merged_options.clone()).map_err(|e| {
serde_json::to_value(new_options.clone()).map_err(|e| {
ServiceError::BadRequest(format!("Failed to serialize crawl options: {}", e))
})?,
))
.execute(&mut conn)
.await
.map_err(|e| ServiceError::InternalServerError(e.to_string()))?;

crawl(
crawl_options.clone(),
pool.clone(),
redis_pool.clone(),
dataset_id,
)
.await?;

if prev_crawl_reqs.get(0).is_none() {
crawl(
crawl_options.clone(),
pool.clone(),
redis_pool.clone(),
dataset_id,
)
.await?;
}
Ok(())
}

Expand Down

0 comments on commit 0fd2215

Please sign in to comment.