Skip to content

Commit

Permalink
[DISCO-2503] Suggest: Pocket suggestion ingestion
Browse files Browse the repository at this point in the history
  • Loading branch information
tiftran committed Oct 5, 2023
1 parent b7cb31d commit 0ec35cc
Show file tree
Hide file tree
Showing 9 changed files with 418 additions and 75 deletions.
152 changes: 139 additions & 13 deletions components/suggest/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ use rusqlite::{
};
use sql_support::{open_database::open_database_with_flags, ConnExt};

use crate::rs::DownloadedAmoSuggestion;
use crate::pocket::{split_keyword, KeywordConfidence};
use crate::rs::{DownloadedAmoSuggestion, DownloadedPocketSuggestion};
use crate::{
keyword::full_keyword,
provider::SuggestionProvider,
Expand Down Expand Up @@ -124,16 +125,22 @@ impl<'a> SuggestDao<'a> {

/// Fetches suggestions that match the given keyword from the database.
pub fn fetch_by_keyword(&self, keyword: &str) -> Result<Vec<Suggestion>> {
self.conn.query_rows_and_then_cached(
"SELECT s.id, k.rank, s.title, s.url, s.provider
let (keyword_prefix, keyword_suffix) = split_keyword(keyword);
Ok(self.conn.query_rows_and_then_cached(
"SELECT s.id, k.rank, s.title, s.url, s.provider, NULL as confidence, NULL as keyword_suffix
FROM suggestions s
JOIN keywords k ON k.suggestion_id = s.id
WHERE k.keyword = :keyword
LIMIT 1",
UNION ALL
SELECT s.id, k.rank, s.title, s.url, s.provider, k.confidence, k.keyword_suffix
FROM suggestions s
JOIN pocket_keywords k ON k.suggestion_id = s.id
WHERE k.keyword_prefix = :keyword_prefix",
named_params! {
":keyword": keyword,
":keyword_prefix": keyword_prefix,
},
|row| -> Result<Suggestion>{
|row| -> Result<Option<Suggestion>> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
let raw_url = row.get::<_, String>("url")?;
Expand Down Expand Up @@ -164,7 +171,7 @@ impl<'a> SuggestDao<'a> {
let cooked_url = cook_raw_suggestion_url(&raw_url);
let raw_click_url = row.get::<_, String>("click_url")?;
let cooked_click_url = cook_raw_suggestion_url(&raw_click_url);
Ok(Suggestion::Amp {
Ok(Some(Suggestion::Amp {
block_id: row.get("block_id")?,
advertiser: row.get("advertiser")?,
iab_category: row.get("iab_category")?,
Expand All @@ -176,7 +183,7 @@ impl<'a> SuggestDao<'a> {
impression_url: row.get("impression_url")?,
click_url: cooked_click_url,
raw_click_url,
})
}))
}
)
},
Expand All @@ -191,12 +198,12 @@ impl<'a> SuggestDao<'a> {
},
true,
)?;
Ok(Suggestion::Wikipedia {
Ok(Some(Suggestion::Wikipedia {
title,
url: raw_url,
full_keyword: full_keyword(keyword, &keywords),
icon,
})
}))
}
SuggestionProvider::Amo => {
self.conn.query_row_and_then(
Expand All @@ -207,7 +214,7 @@ impl<'a> SuggestDao<'a> {
":suggestion_id": suggestion_id
},
|row| {
Ok(Suggestion::Amo{
Ok(Some(Suggestion::Amo{
title,
url: raw_url,
icon_url: row.get("icon_url")?,
Expand All @@ -216,12 +223,42 @@ impl<'a> SuggestDao<'a> {
number_of_ratings: row.get("number_of_ratings")?,
guid: row.get("guid")?,
score: row.get("score")?,
})
}))
})
},
SuggestionProvider::Pocket => {
let confidence = row.get("confidence")?;
let suffixes_match = match confidence {
KeywordConfidence::Low => row.get::<_, String>("keyword_suffix")?.starts_with(keyword_suffix),
KeywordConfidence::High => row.get::<_, String>("keyword_suffix")? == keyword_suffix,
};
if suffixes_match {
self.conn.query_row_and_then(
"SELECT p.score
FROM pocket_custom_details p
WHERE p.suggestion_id = :suggestion_id",
named_params! {
":suggestion_id": suggestion_id
},
|row| {
Ok(Some(Suggestion::Pocket {
title,
url: raw_url,
score: row.get("score")?,
is_top_pick: matches!(
confidence,
KeywordConfidence::High
)
}))
}
)
} else {
Ok(None)
}
}
}
},
)
}
)?.into_iter().flatten().collect())
}

/// Inserts all suggestions from a downloaded AMO attachment into
Expand Down Expand Up @@ -417,6 +454,95 @@ impl<'a> SuggestDao<'a> {
Ok(())
}

/// Inserts all suggestions from a downloaded Pocket attachment into
/// the database.
pub fn insert_pocket_suggestions(
&mut self,
record_id: &SuggestRecordId,
suggestions: &[DownloadedPocketSuggestion],
) -> Result<()> {
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
"INSERT INTO suggestions(
record_id,
provider,
title,
url
)
VALUES(
:record_id,
:provider,
:title,
:url
)
RETURNING id
",
named_params! {
":record_id": record_id.as_str(),
":provider": SuggestionProvider::Pocket,
":title": suggestion.title,
":url": suggestion.url,
},
|row| row.get(0),
true,
)?;
self.conn.execute(
"INSERT INTO pocket_custom_details(
suggestion_id,
score
)
VALUES(
:suggestion_id,
:score
)",
named_params! {
":suggestion_id": suggestion_id,
":score": suggestion.score,
},
)?;
for ((rank, keyword), confidence) in suggestion
.high_confidence_keywords
.iter()
.enumerate()
.zip(std::iter::repeat(KeywordConfidence::High))
.chain(
suggestion
.low_confidence_keywords
.iter()
.enumerate()
.zip(std::iter::repeat(KeywordConfidence::Low)),
)
{
let (keyword_prefix, keyword_suffix) = split_keyword(keyword);
self.conn.execute(
"INSERT INTO pocket_keywords(
keyword_prefix,
keyword_suffix,
confidence,
rank,
suggestion_id
)
VALUES(
:keyword_prefix,
:keyword_suffix,
:confidence,
:rank,
:suggestion_id
)",
named_params! {
":keyword_prefix": keyword_prefix,
":keyword_suffix": keyword_suffix,
":confidence": confidence,
":rank": rank,
":suggestion_id": suggestion_id,
},
)?;
}
}
Ok(())
}

/// Inserts or replaces an icon for a suggestion into the database.
pub fn put_icon(&mut self, icon_id: &str, data: &[u8]) -> Result<()> {
self.conn.execute(
Expand Down
1 change: 1 addition & 0 deletions components/suggest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use remote_settings::RemoteSettingsConfig;
mod db;
mod error;
mod keyword;
pub mod pocket;
mod provider;
mod rs;
mod schema;
Expand Down
59 changes: 59 additions & 0 deletions components/suggest/src/pocket.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

use rusqlite::types::{FromSql, FromSqlError, FromSqlResult, ToSqlOutput, ValueRef};
use rusqlite::{Result as RusqliteResult, ToSql};

/// Classification of Pocket confidence keywords, where High Confidence
/// require an exact match to keyword prefix and suffix.
/// While Low Confidence, requires a match on prefix and be a
/// substring for the suffix.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
#[repr(u8)]
pub enum KeywordConfidence {
Low = 0,
High = 1,
}

impl FromSql for KeywordConfidence {
fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
let v = value.as_i64()?;
u8::try_from(v)
.ok()
.and_then(KeywordConfidence::from_u8)
.ok_or_else(|| FromSqlError::OutOfRange(v))
}
}

impl KeywordConfidence {
#[inline]
pub(crate) fn from_u8(v: u8) -> Option<Self> {
match v {
0 => Some(KeywordConfidence::Low),
1 => Some(KeywordConfidence::High),
_ => None,
}
}
}

impl ToSql for KeywordConfidence {
fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
Ok(ToSqlOutput::from(*self as u8))
}
}

/// Split the keyword by the first whitespace into the prefix and the suffix.
/// Return an empty string as the suffix if there is no whitespace.
///
/// # Examples
///
/// ```
/// # use suggest::pocket::split_keyword;
/// assert_eq!(split_keyword("foo"), ("foo", ""));
/// assert_eq!(split_keyword("foo bar baz"), ("foo", "bar baz"));
/// ```
pub fn split_keyword(keyword: &str) -> (&str, &str) {
keyword.split_once(' ').unwrap_or((keyword, ""))
}
2 changes: 2 additions & 0 deletions components/suggest/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub enum SuggestionProvider {
Amp = 1,
Wikipedia = 2,
Amo = 3,
Pocket = 4,
}

impl FromSql for SuggestionProvider {
Expand All @@ -34,6 +35,7 @@ impl SuggestionProvider {
1 => Some(SuggestionProvider::Amp),
2 => Some(SuggestionProvider::Wikipedia),
3 => Some(SuggestionProvider::Amo),
4 => Some(SuggestionProvider::Pocket),
_ => None,
}
}
Expand Down
13 changes: 13 additions & 0 deletions components/suggest/src/rs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ pub(crate) enum SuggestRecord {
AmpWikipedia,
#[serde(rename = "amo-suggestions")]
Amo,
#[serde(rename = "pocket-suggestions")]
Pocket,
}

/// Represents either a single value, or a list of values. This is used to
Expand Down Expand Up @@ -253,3 +255,14 @@ pub(crate) struct DownloadedAmoSuggestion {
pub keywords: Vec<String>,
pub score: f64,
}
/// A Pocket suggestion to ingest from a Pocket Suggestion Attachment
#[derive(Clone, Debug, Deserialize)]
pub(crate) struct DownloadedPocketSuggestion {
pub url: String,
pub title: String,
#[serde(rename = "lowConfidenceKeywords")]
pub low_confidence_keywords: Vec<String>,
#[serde(rename = "highConfidenceKeywords")]
pub high_confidence_keywords: Vec<String>,
pub score: f64,
}
18 changes: 16 additions & 2 deletions components/suggest/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use rusqlite::{Connection, Transaction};
use sql_support::open_database::{self, ConnectionInitializer};

pub const VERSION: u32 = 5;
pub const VERSION: u32 = 6;

pub const SQL: &str = "
CREATE TABLE meta(
Expand All @@ -21,6 +21,15 @@ pub const SQL: &str = "
PRIMARY KEY (keyword, suggestion_id)
) WITHOUT ROWID;
CREATE TABLE pocket_keywords(
keyword_prefix TEXT NOT NULL,
keyword_suffix TEXT NOT NULL DEFAULT '',
confidence INTEGER NOT NULL,
rank INTEGER NOT NULL,
suggestion_id INTEGER NOT NULL REFERENCES suggestions(id),
PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
);
CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
CREATE TABLE suggestions(
Expand All @@ -43,6 +52,11 @@ pub const SQL: &str = "
ON DELETE CASCADE
);
CREATE TABLE pocket_custom_details(
suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
score REAL NOT NULL
);
CREATE TABLE wikipedia_custom_details(
suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
icon_id TEXT NOT NULL
Expand Down Expand Up @@ -96,7 +110,7 @@ impl ConnectionInitializer for SuggestConnectionInitializer {

fn upgrade_from(&self, _db: &Transaction<'_>, version: u32) -> open_database::Result<()> {
match version {
1..=4 => {
1..=5 => {
// These schema versions were used during development, and never
// shipped in any applications. Treat these databases as
// corrupt, so that they'll be replaced.
Expand Down
Loading

0 comments on commit 0ec35cc

Please sign in to comment.