summaryrefslogtreecommitdiffstats
path: root/third_party/rust/suggest/src/yelp.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/suggest/src/yelp.rs')
-rw-r--r--third_party/rust/suggest/src/yelp.rs497
1 files changed, 497 insertions, 0 deletions
diff --git a/third_party/rust/suggest/src/yelp.rs b/third_party/rust/suggest/src/yelp.rs
new file mode 100644
index 0000000000..2413709c67
--- /dev/null
+++ b/third_party/rust/suggest/src/yelp.rs
@@ -0,0 +1,497 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use rusqlite::types::ToSqlOutput;
+use rusqlite::{named_params, Result as RusqliteResult, ToSql};
+use sql_support::ConnExt;
+use url::form_urlencoded;
+
+use crate::{
+ db::SuggestDao,
+ provider::SuggestionProvider,
+ rs::{DownloadedYelpSuggestion, SuggestRecordId},
+ suggestion::Suggestion,
+ Result, SuggestionQuery,
+};
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
+#[repr(u8)]
+enum Modifier {
+ Pre = 0,
+ Post = 1,
+ Yelp = 2,
+}
+
+impl ToSql for Modifier {
+ fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
+ Ok(ToSqlOutput::from(*self as u8))
+ }
+}
+
+/// This module assumes like following query.
+/// "Yelp-modifier? Pre-modifier? Subject Post-modifier? (Location-modifier | Location-sign Location?)? Yelp-modifier?"
+/// For example, the query below is valid.
+/// "Yelp (Yelp-modifier) Best(Pre-modifier) Ramen(Subject) Delivery(Post-modifier) In(Location-sign) Tokyo(Location)"
+/// Also, as everything except Subject is optional, "Ramen" will be also valid query.
+/// However, "Best Best Ramen" and "Ramen Best" is out of the above appearance order rule,
+/// parsing will be failed. Also, every words except Location needs to be registered in DB.
+/// Please refer to the query test in store.rs for all of combination.
+/// Currently, the maximum query length is determined while refering to having word lengths in DB
+/// and location names.
+/// max subject: 50 + pre-modifier: 10 + post-modifier: 10 + location-sign: 7 + location: 50 = 127 = 150.
+const MAX_QUERY_LENGTH: usize = 150;
+
+/// The max number of words consisting the modifier. To improve the SQL performance by matching with
+/// "keyword=:modifier" (please see is_modifier()), define this how many words we should check.
+const MAX_MODIFIER_WORDS_NUMBER: usize = 2;
+
+/// At least this many characters must be typed for a subject to be matched.
+const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2;
+
+impl<'a> SuggestDao<'a> {
+ /// Inserts the suggestions for Yelp attachment into the database.
+ pub fn insert_yelp_suggestions(
+ &mut self,
+ record_id: &SuggestRecordId,
+ suggestion: &DownloadedYelpSuggestion,
+ ) -> Result<()> {
+ for keyword in &suggestion.subjects {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_subjects(record_id, keyword) VALUES(:record_id, :keyword)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":keyword": keyword,
+ },
+ )?;
+ }
+
+ for keyword in &suggestion.pre_modifiers {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":type": Modifier::Pre,
+ ":keyword": keyword,
+ },
+ )?;
+ }
+
+ for keyword in &suggestion.post_modifiers {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":type": Modifier::Post,
+ ":keyword": keyword,
+ },
+ )?;
+ }
+
+ for keyword in &suggestion.yelp_modifiers {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":type": Modifier::Yelp,
+ ":keyword": keyword,
+ },
+ )?;
+ }
+
+ for sign in &suggestion.location_signs {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_location_signs(record_id, keyword, need_location) VALUES(:record_id, :keyword, :need_location)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":keyword": sign.keyword,
+ ":need_location": sign.need_location,
+ },
+ )?;
+ }
+
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_custom_details(record_id, icon_id, score) VALUES(:record_id, :icon_id, :score)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":icon_id": suggestion.icon_id,
+ ":score": suggestion.score,
+ },
+ )?;
+
+ Ok(())
+ }
+
+ /// Fetch Yelp suggestion from given user's query.
+ pub fn fetch_yelp_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ if !query.providers.contains(&SuggestionProvider::Yelp) {
+ return Ok(vec![]);
+ }
+
+ if query.keyword.len() > MAX_QUERY_LENGTH {
+ return Ok(vec![]);
+ }
+
+ let query_string = &query.keyword.trim();
+ if !query_string.contains(' ') {
+ let Some((subject, subject_exact_match)) = self.find_subject(query_string)? else {
+ return Ok(vec![]);
+ };
+ let (icon, score) = self.fetch_custom_details()?;
+ let builder = SuggestionBuilder {
+ subject: &subject,
+ subject_exact_match,
+ pre_modifier: None,
+ post_modifier: None,
+ location_sign: None,
+ location: None,
+ need_location: false,
+ icon,
+ score,
+ };
+ return Ok(vec![builder.into()]);
+ }
+
+ // Find the yelp keyword modifier and remove them from the query.
+ let (query_without_yelp_modifiers, _, _) =
+ self.find_modifiers(query_string, Modifier::Yelp, Modifier::Yelp)?;
+
+ // Find the location sign and the location.
+ let (query_without_location, location_sign, location, need_location) =
+ self.find_location(&query_without_yelp_modifiers)?;
+
+ if let (Some(_), false) = (&location, need_location) {
+ // The location sign does not need the specific location, but user is setting something.
+ return Ok(vec![]);
+ }
+
+ if query_without_location.is_empty() {
+ // No remained query.
+ return Ok(vec![]);
+ }
+
+ // Find the modifiers.
+ let (subject_candidate, pre_modifier, post_modifier) =
+ self.find_modifiers(&query_without_location, Modifier::Pre, Modifier::Post)?;
+
+ let Some((subject, subject_exact_match)) = self.find_subject(&subject_candidate)? else {
+ return Ok(vec![]);
+ };
+
+ let (icon, score) = self.fetch_custom_details()?;
+ let builder = SuggestionBuilder {
+ subject: &subject,
+ subject_exact_match,
+ pre_modifier,
+ post_modifier,
+ location_sign,
+ location,
+ need_location,
+ icon,
+ score,
+ };
+ Ok(vec![builder.into()])
+ }
+
+ /// Fetch the custom details for Yelp suggestions.
+ /// It returns the location tuple as follows:
+ /// (
+ /// Option<Vec<u8>>: Icon data. If not found, returns None.
+ /// f64: Reflects score field in the yelp_custom_details table.
+ /// )
+ ///
+ /// Note that there should be only one record in `yelp_custom_details`
+ /// as all the Yelp assets are stored in the attachment of a single record
+ /// on Remote Settings. The following query will perform a table scan against
+ /// `yelp_custom_details` followed by an index search against `icons`,
+ /// which should be fine since there is only one record in the first table.
+ fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, f64)> {
+ let result = self.conn.query_row_and_then_cachable(
+ r#"
+ SELECT
+ i.data, y.score
+ FROM
+ yelp_custom_details y
+ LEFT JOIN
+ icons i
+ ON y.icon_id = i.id
+ LIMIT
+ 1
+ "#,
+ (),
+ |row| -> Result<_> { Ok((row.get::<_, Option<Vec<u8>>>(0)?, row.get::<_, f64>(1)?)) },
+ true,
+ )?;
+
+ Ok(result)
+ }
+
+ /// Find the location information from the given query string.
+ /// It returns the location tuple as follows:
+ /// (
+ /// String: Query string that is removed found location information.
+ /// Option<String>: Location sign found in yelp_location_signs table. If not found, returns None.
+ /// Option<String>: Specific location name after location sign. If not found, returns None.
+ /// bool: Reflects need_location field in the table.
+ /// )
+ fn find_location(&self, query: &str) -> Result<(String, Option<String>, Option<String>, bool)> {
+ let query_with_spaces = format!(" {} ", query);
+ let mut results: Vec<(usize, usize, i8)> = self.conn.query_rows_and_then_cached(
+ "
+ SELECT
+ INSTR(:query, ' ' || keyword || ' ') AS sign_index,
+ LENGTH(keyword) AS sign_length,
+ need_location
+ FROM yelp_location_signs
+ WHERE
+ sign_index > 0
+ ORDER BY
+ sign_length DESC
+ LIMIT 1
+ ",
+ named_params! {
+ ":query": &query_with_spaces.to_lowercase(),
+ },
+ |row| -> Result<_> {
+ Ok((
+ row.get::<_, usize>("sign_index")?,
+ row.get::<_, usize>("sign_length")?,
+ row.get::<_, i8>("need_location")?,
+ ))
+ },
+ )?;
+
+ let (sign_index, sign_length, need_location) = if let Some(res) = results.pop() {
+ res
+ } else {
+ return Ok((query.trim().to_string(), None, None, false));
+ };
+
+ let pre_location = query_with_spaces
+ .get(..sign_index)
+ .map(str::trim)
+ .map(str::to_string)
+ .unwrap_or_default();
+ let location_sign = query_with_spaces
+ .get(sign_index..sign_index + sign_length)
+ .map(str::trim)
+ .filter(|s| !s.is_empty())
+ .map(str::to_string);
+ let location = query_with_spaces
+ .get(sign_index + sign_length..)
+ .map(str::trim)
+ .filter(|s| !s.is_empty())
+ .map(str::to_string);
+
+ Ok((pre_location, location_sign, location, need_location == 1))
+ }
+
+ /// Find the pre/post modifier from the given query string.
+ /// It returns the modifiers tuple as follows:
+ /// (
+ /// String: Query string that is removed found the modifiers.
+ /// Option<String>: Pre-modifier found in the yelp_modifiers table. If not found, returns None.
+ /// Option<String>: Post-modifier found in the yelp_modifiers table. If not found, returns None.
+ /// )
+ fn find_modifiers(
+ &self,
+ query: &str,
+ pre_modifier_type: Modifier,
+ post_modifier_type: Modifier,
+ ) -> Result<(String, Option<String>, Option<String>)> {
+ if !query.contains(' ') {
+ return Ok((query.to_string(), None, None));
+ }
+
+ let words: Vec<_> = query.split_whitespace().collect();
+
+ let mut pre_modifier = None;
+ for n in (1..=MAX_MODIFIER_WORDS_NUMBER).rev() {
+ let mut candidate_chunks = words.chunks(n);
+ let candidate = candidate_chunks.next().unwrap_or(&[""]).join(" ");
+ if self.is_modifier(&candidate, pre_modifier_type)? {
+ pre_modifier = Some(candidate);
+ break;
+ }
+ }
+
+ let mut post_modifier = None;
+ for n in (1..=MAX_MODIFIER_WORDS_NUMBER).rev() {
+ let mut candidate_chunks = words.rchunks(n);
+ let candidate = candidate_chunks.next().unwrap_or(&[""]).join(" ");
+ if self.is_modifier(&candidate, post_modifier_type)? {
+ post_modifier = Some(candidate);
+ break;
+ }
+ }
+
+ let mut without_modifiers = query;
+ if let Some(ref modifier) = pre_modifier {
+ without_modifiers = &without_modifiers[modifier.len()..];
+ }
+ if let Some(ref modifier) = post_modifier {
+ without_modifiers = &without_modifiers[..without_modifiers.len() - modifier.len()];
+ }
+
+ Ok((
+ without_modifiers.trim().to_string(),
+ pre_modifier,
+ post_modifier,
+ ))
+ }
+
+ /// Find the subject from the given string.
+ /// It returns the Option. If it is not none, it contains the tuple as follows:
+ /// (
+ /// String: Subject.
+ /// bool: Whether the subject matched exactly with the paramter.
+ /// )
+ fn find_subject(&self, candidate: &str) -> Result<Option<(String, bool)>> {
+ if candidate.is_empty() {
+ return Ok(None);
+ }
+
+ // If the length of subject candidate is less than
+ // SUBJECT_PREFIX_MATCH_THRESHOLD, should exact match.
+ if candidate.len() < SUBJECT_PREFIX_MATCH_THRESHOLD {
+ return Ok(if self.is_subject(candidate)? {
+ Some((candidate.to_string(), true))
+ } else {
+ None
+ });
+ }
+
+ // Otherwise, apply prefix-match.
+ Ok(
+ match self.conn.query_row_and_then_cachable(
+ "SELECT keyword
+ FROM yelp_subjects
+ WHERE keyword BETWEEN :candidate AND :candidate || x'FFFF'
+ ORDER BY LENGTH(keyword) ASC, keyword ASC
+ LIMIT 1",
+ named_params! {
+ ":candidate": candidate.to_lowercase(),
+ },
+ |row| row.get::<_, String>(0),
+ true,
+ ) {
+ Ok(keyword) => {
+ debug_assert!(candidate.len() <= keyword.len());
+ Some((
+ format!("{}{}", candidate, &keyword[candidate.len()..]),
+ candidate.len() == keyword.len(),
+ ))
+ }
+ Err(_) => None,
+ },
+ )
+ }
+
+ fn is_modifier(&self, word: &str, modifier_type: Modifier) -> Result<bool> {
+ let result = self.conn.query_row_and_then_cachable(
+ "
+ SELECT EXISTS (
+ SELECT 1 FROM yelp_modifiers WHERE type = :type AND keyword = :word LIMIT 1
+ )
+ ",
+ named_params! {
+ ":type": modifier_type,
+ ":word": word.to_lowercase(),
+ },
+ |row| row.get::<_, bool>(0),
+ true,
+ )?;
+
+ Ok(result)
+ }
+
+ fn is_subject(&self, word: &str) -> Result<bool> {
+ let result = self.conn.query_row_and_then_cachable(
+ "
+ SELECT EXISTS (
+ SELECT 1 FROM yelp_subjects WHERE keyword = :word LIMIT 1
+ )
+ ",
+ named_params! {
+ ":word": word.to_lowercase(),
+ },
+ |row| row.get::<_, bool>(0),
+ true,
+ )?;
+
+ Ok(result)
+ }
+}
+
+struct SuggestionBuilder<'a> {
+ subject: &'a str,
+ subject_exact_match: bool,
+ pre_modifier: Option<String>,
+ post_modifier: Option<String>,
+ location_sign: Option<String>,
+ location: Option<String>,
+ need_location: bool,
+ icon: Option<Vec<u8>>,
+ score: f64,
+}
+
+impl<'a> From<SuggestionBuilder<'a>> for Suggestion {
+ fn from(builder: SuggestionBuilder<'a>) -> Suggestion {
+ // This location sign such the 'near by' needs to add as a description parameter.
+ let location_modifier = if !builder.need_location {
+ builder.location_sign.as_deref()
+ } else {
+ None
+ };
+ let description = [
+ builder.pre_modifier.as_deref(),
+ Some(builder.subject),
+ builder.post_modifier.as_deref(),
+ location_modifier,
+ ]
+ .iter()
+ .flatten()
+ .copied()
+ .collect::<Vec<_>>()
+ .join(" ");
+
+ // https://www.yelp.com/search?find_desc={description}&find_loc={location}
+ let mut url = String::from("https://www.yelp.com/search?");
+ let mut parameters = form_urlencoded::Serializer::new(String::new());
+ parameters.append_pair("find_desc", &description);
+ if let (Some(location), true) = (&builder.location, builder.need_location) {
+ parameters.append_pair("find_loc", location);
+ }
+ url.push_str(&parameters.finish());
+
+ let title = [
+ builder.pre_modifier.as_deref(),
+ Some(builder.subject),
+ builder.post_modifier.as_deref(),
+ builder.location_sign.as_deref(),
+ builder.location.as_deref(),
+ ]
+ .iter()
+ .flatten()
+ .copied()
+ .collect::<Vec<_>>()
+ .join(" ");
+
+ Suggestion::Yelp {
+ url,
+ title,
+ icon: builder.icon,
+ score: builder.score,
+ has_location_sign: location_modifier.is_none() && builder.location_sign.is_some(),
+ subject_exact_match: builder.subject_exact_match,
+ location_param: "find_loc".to_string(),
+ }
+ }
+}