Skip to content

Commit

Permalink
[DISCO-2503] Suggest: Pocket suggestion ingestion
Browse files Browse the repository at this point in the history
  • Loading branch information
tiftran committed Sep 27, 2023
1 parent 65283ab commit 029f80a
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 3 deletions.
134 changes: 131 additions & 3 deletions components/suggest/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

use std::{path::Path, sync::Arc};
use std::{borrow::Cow, path::Path, sync::Arc};

use interrupt_support::{SqlInterruptHandle, SqlInterruptScope};
use parking_lot::Mutex;
Expand All @@ -14,7 +14,7 @@ use rusqlite::{
};
use sql_support::{open_database::open_database_with_flags, ConnExt};

use crate::rs::DownloadedAmoSuggestion;
use crate::rs::{DownloadedAmoSuggestion, DownloadedPocketSuggestion};
use crate::{
keyword::full_keyword,
provider::SuggestionProvider,
Expand Down Expand Up @@ -124,14 +124,29 @@ impl<'a> SuggestDao<'a> {

/// Fetches suggestions that match the given keyword from the database.
pub fn fetch_by_keyword(&self, keyword: &str) -> Result<Vec<Suggestion>> {
let mut pocket_keyword = Cow::from(keyword);
if !keyword.contains(' ') {
pocket_keyword.to_mut().push(' ');
}
self.conn.query_rows_and_then_cached(
"SELECT s.id, k.rank, s.title, s.url, s.provider
"SELECT s.id, k.rank, s.title, s.url, s.provider, false as is_top_pick
FROM suggestions s
JOIN keywords k ON k.suggestion_id = s.id
WHERE k.keyword = :keyword
UNION
SELECT s.id, k.rank, s.title, s.url, s.provider, true as is_top_pick
FROM suggestions s
JOIN pocket_high_confidence_keywords k ON k.suggestion_id = s.id
WHERE k.keyword = :keyword
UNION
SELECT s.id, k.rank, s.title, s.url, s.provider, false as is_top_pick
FROM suggestions s
JOIN pocket_low_confidence_keywords k ON k.suggestion_id = s.id
WHERE k.keyword BETWEEN :pocket_keyword AND :pocket_keyword || x'ffff'
LIMIT 1",
named_params! {
":keyword": keyword,
":pocket_keyword": pocket_keyword
},
|row| -> Result<Suggestion>{
let suggestion_id: i64 = row.get("id")?;
Expand Down Expand Up @@ -219,6 +234,25 @@ impl<'a> SuggestDao<'a> {
})
})
},
SuggestionProvider::Pocket => {
let is_top_pick = row.get("is_top_pick")?;
self.conn.query_row_and_then(
"SELECT p.score
FROM pocket_custom_details p
WHERE p.suggestion_id = :suggestion_id",
named_params! {
":suggestion_id": suggestion_id
},
|row| {
Ok(Suggestion::Pocket {
title,
url: raw_url,
score: row.get("score")?,
is_top_pick
})
}
)
}
}
},
)
Expand Down Expand Up @@ -417,6 +451,100 @@ impl<'a> SuggestDao<'a> {
Ok(())
}

/// Inserts all suggestions from a downloaded Pocket attachment into
/// the database.
pub fn insert_pocket_suggestions(
&mut self,
record_id: &SuggestRecordId,
suggestions: &[DownloadedPocketSuggestion],
) -> Result<()> {
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
"INSERT INTO suggestions(
record_id,
provider,
title,
url
)
VALUES(
:record_id,
:provider,
:title,
:url
)
RETURNING id
",
named_params! {
":record_id": record_id.as_str(),
":provider": SuggestionProvider::Pocket,
":title": suggestion.title,
":url": suggestion.url,

},
|row| row.get(0),
true,
)?;
self.conn.execute(
"INSERT INTO pocket_custom_details(
suggestion_id,
description,
score
)
VALUES(
:suggestion_id,
:description,
:score
)",
named_params! {
":suggestion_id": suggestion_id,
":description": suggestion.description,
":score": suggestion.score,
},
)?;
for (index, keyword) in suggestion.low_confidence_keywords.iter().enumerate() {
self.conn.execute(
"INSERT INTO pocket_low_confidence_keywords(
keyword,
suggestion_id,
rank
)
VALUES(
:keyword,
:suggestion_id,
:rank
)",
named_params! {
":keyword": keyword,
":rank": index,
":suggestion_id": suggestion_id,
},
)?;
}

for (index, keyword) in suggestion.high_confidence_keywords.iter().enumerate() {
self.conn.execute(
"INSERT INTO pocket_high_confidence_keywords(
keyword,
suggestion_id,
rank
)
VALUES(
:keyword,
:suggestion_id,
:rank
)",
named_params! {
":keyword": keyword,
":rank": index,
":suggestion_id": suggestion_id,
},
)?;
}
}
Ok(())
}

/// Inserts or replaces an icon for a suggestion into the database.
pub fn put_icon(&mut self, icon_id: &str, data: &[u8]) -> Result<()> {
self.conn.execute(
Expand Down
2 changes: 2 additions & 0 deletions components/suggest/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub enum SuggestionProvider {
Amp = 1,
Wikipedia = 2,
Amo = 3,
Pocket = 4,
}

impl FromSql for SuggestionProvider {
Expand All @@ -34,6 +35,7 @@ impl SuggestionProvider {
1 => Some(SuggestionProvider::Amp),
2 => Some(SuggestionProvider::Wikipedia),
3 => Some(SuggestionProvider::Amo),
4 => Some(SuggestionProvider::Pocket),
_ => None,
}
}
Expand Down
14 changes: 14 additions & 0 deletions components/suggest/src/rs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ pub(crate) enum SuggestRecord {
AmpWikipedia,
#[serde(rename = "amo-suggestions")]
Amo,
#[serde(rename = "pocket-suggestions")]
Pocket,
}

/// Represents either a single value, or a list of values. This is used to
Expand Down Expand Up @@ -253,3 +255,15 @@ pub(crate) struct DownloadedAmoSuggestion {
pub keywords: Vec<String>,
pub score: f64,
}
/// A Pocket suggestion to ingest from a Pocket Suggestion Attachment
#[derive(Clone, Debug, Deserialize)]
pub(crate) struct DownloadedPocketSuggestion {
pub description: String,
pub url: String,
pub title: String,
#[serde(rename = "lowConfidenceKeywords")]
pub low_confidence_keywords: Vec<String>,
#[serde(rename = "highConfidenceKeywords")]
pub high_confidence_keywords: Vec<String>,
pub score: f64,
}
20 changes: 20 additions & 0 deletions components/suggest/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@ pub const SQL: &str = "
PRIMARY KEY (keyword, suggestion_id)
) WITHOUT ROWID;
CREATE TABLE pocket_low_confidence_keywords(
keyword TEXT NOT NULL,
suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
rank INTEGER NOT NULL,
PRIMARY KEY (keyword, suggestion_id)
);
CREATE TABLE pocket_high_confidence_keywords(
keyword TEXT NOT NULL,
suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
rank INTEGER NOT NULL,
PRIMARY KEY (keyword, suggestion_id)
);
CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
CREATE TABLE suggestions(
Expand All @@ -43,6 +57,12 @@ pub const SQL: &str = "
ON DELETE CASCADE
);
CREATE TABLE pocket_custom_details(
suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
description TEXT NOT NULL,
score REAL NOT NULL
);
CREATE TABLE wikipedia_custom_details(
suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
icon_id TEXT NOT NULL
Expand Down
45 changes: 45 additions & 0 deletions components/suggest/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,10 +365,33 @@ where
dao.drop_unparsable_record_id(&record_id)?;

dao.put_last_ingest_if_newer(record.last_modified)?;
Ok(())
})?;
}
SuggestRecord::Pocket => {
let Some(attachment) = record.attachment.as_ref() else {
writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?;
continue;
};

let attachment: SuggestAttachment<_> = serde_json::from_slice(
&self.settings_client.get_attachment(&attachment.location)?,
)?;

writer.write(|dao| {
dao.drop_suggestions(&record_id)?;

dao.insert_pocket_suggestions(&record_id, attachment.suggestions())?;


dao.drop_unparsable_record_id(&record_id)?;


dao.put_last_ingest_if_newer(record.last_modified)?;
Ok(())
})?;
}

}
}
Ok(())
Expand Down Expand Up @@ -1394,6 +1417,17 @@ mod tests {
"hash": "",
"size": 0,
},
}, {
"id": "data-3",
"type": "pocket-suggestions",
"last_modified": 15,
"attachment": {
"filename": "data-3.json",
"mimetype": "application/json",
"location": "data-3.json",
"hash": "",
"size": 0,
},
}, {
"id": "icon-2",
"type": "icon",
Expand Down Expand Up @@ -1452,6 +1486,17 @@ mod tests {
"number_of_ratings": 888,
"score": 0.25
}]),
)?
.with_data(
"data-3.json",
json!([{
"description": "pocket suggestion",
"url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
"lowConfidenceKeywords": ["soft life", "workaholism", "toxic work culture", "work-life balance"],
"highConfidenceKeywords": ["burnout women", "grind culture", "women burnout"],
"title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
"score": 0.25
}]),
)?
.with_icon("icon-2.png", "i-am-an-icon".as_bytes().into())
.with_icon("icon-3.png", "also-an-icon".as_bytes().into());
Expand Down
7 changes: 7 additions & 0 deletions components/suggest/src/suggest.udl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ interface SuggestApiError {

enum SuggestionProvider {
"Amp",
"Pocket",
"Wikipedia",
"Amo",
};
Expand All @@ -38,6 +39,12 @@ interface Suggestion {
string click_url,
string raw_click_url
);
Pocket(
string title,
string url,
f64 score,
boolean is_top_pick
);
Wikipedia(
string title,
string url,
Expand Down
6 changes: 6 additions & 0 deletions components/suggest/src/suggestion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ pub enum Suggestion {
click_url: String,
raw_click_url: String,
},
Pocket {
title: String,
url: String,
score: f64,
is_top_pick: bool,
},
Wikipedia {
title: String,
url: String,
Expand Down

0 comments on commit 029f80a

Please sign in to comment.