From 554a156865a2f9fa273174f31cbae1b85c061123 Mon Sep 17 00:00:00 2001 From: Bart Akeley Date: Sun, 7 Jun 2020 16:11:30 -0500 Subject: [PATCH] async building of products from html scraping --- .vscode/settings.json | 16 ++++++++++++++++ project.clj | 5 +++-- scripts/ddl.sql | 1 + src/aretherecookies/app.clj | 2 +- src/aretherecookies/db.clj | 6 ++---- src/aretherecookies/handler.clj | 6 +++--- src/aretherecookies/queries.sql | 11 ++++++++--- src/aretherecookies/search/heb.clj | 1 + src/aretherecookies/search/search.clj | 13 ++++++++----- 9 files changed, 43 insertions(+), 18 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e9fec1b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "sqltools.connections": [ + { + "askForPassword": false, + "database": "wttp", + "driver": "PostgreSQL", + "name": "wttp", + "password": "abc123", + "port": 5432, + "previewLimit": 50, + "server": "localhost", + "username": "bartronx7" + } + ], + "sqltools.useNodeRuntime": true +} \ No newline at end of file diff --git a/project.clj b/project.clj index 9541550..29eb7ab 100644 --- a/project.clj +++ b/project.clj @@ -8,8 +8,9 @@ [environ "1.1.0"] [compojure "1.5.1"] [ring/ring-defaults "0.2.1"] - [org.clojure/java.jdbc "0.7.3"] - [org.postgresql/postgresql "42.2.2"] + [org.clojure/java.jdbc "0.7.11"] + [org.postgresql/postgresql "42.2.13"] + [org.slf4j/slf4j-simple "1.7.30"] [com.mchange/c3p0 "0.9.5.2"] [ring-middleware-format "0.7.2"] [org.clojure/data.json "0.2.6"] diff --git a/scripts/ddl.sql b/scripts/ddl.sql index d34777c..c96c445 100644 --- a/scripts/ddl.sql +++ b/scripts/ddl.sql @@ -41,6 +41,7 @@ CREATE TABLE search_term ( DROP TABLE IF EXISTS search_term_product CASCADE; CREATE TABLE search_term_product ( term VARCHAR(200) REFERENCES search_term(term) ON DELETE CASCADE, + rank integer NOT NULL, product_id VARCHAR(100) REFERENCES product(id) ON DELETE CASCADE ); diff --git a/src/aretherecookies/app.clj b/src/aretherecookies/app.clj index 863f9b1..a20bbb1 100644 --- a/src/aretherecookies/app.clj +++ b/src/aretherecookies/app.clj @@ -7,7 +7,7 @@ faves-delete-handler product-search-handler]] [aretherecookies.auth :refer [auth0-auth-backend]] - [aretherecookies.search.search :refer init-search] + [aretherecookies.search.search :refer [init-search]] [environ.core :refer [env]] [compojure.handler :refer [api]] [compojure.core :refer [defroutes GET POST PUT DELETE]] diff --git a/src/aretherecookies/db.clj b/src/aretherecookies/db.clj index f5e849a..efbab7d 100644 --- a/src/aretherecookies/db.clj +++ b/src/aretherecookies/db.clj @@ -88,10 +88,8 @@ (defn add-products "" [term products] - (println (str "add-products " term)) + (println "Adding" (count products) "products for" term) (let [product-vecs (map #(vals (select-keys % [:id :placeType :name :photo])) products) - term-pairs (map #(vec [term (:id %)]) products)] - (println "product-vecs" product-vecs) - (println "term-pairs" term-pairs) + term-pairs (map-indexed (fn [idx item] (vec [term idx (:id item)])) products)] (insert-into-product @pooled-db {:products product-vecs}) (insert-into-search-term-product @pooled-db {:pairs term-pairs}))) diff --git a/src/aretherecookies/handler.clj b/src/aretherecookies/handler.clj index ab4717d..37bb7b3 100644 --- a/src/aretherecookies/handler.clj +++ b/src/aretherecookies/handler.clj @@ -6,7 +6,7 @@ delete-faves search-products]] [aretherecookies.helpers :refer [safe-json]] - [aretherecookies.search.heb :refer [memoized-search-heb]] + [aretherecookies.search.search :refer [queue-search]] [buddy.auth :refer [authenticated? throw-unauthorized]])) @@ -78,5 +78,5 @@ (if-not (empty? search) (let [products (search-products search)] (queue-search search) - (safe-json {:products products}) - (safe-json {:products []})))) + (safe-json {:products products})) + (safe-json {:products []})))) diff --git a/src/aretherecookies/queries.sql b/src/aretherecookies/queries.sql index 6e6cb84..88acedb 100644 --- a/src/aretherecookies/queries.sql +++ b/src/aretherecookies/queries.sql @@ -28,14 +28,19 @@ RETURNING product_id, date -- :name search-products-by-term SELECT product.* FROM product -INNER JOIN search_term_product on search_term_product=product.id +INNER JOIN search_term_product on search_term_product.product_id=product.id INNER JOIN search_term ON search_term_product.term=search_term.term WHERE search_term.term=:v:search OR dmetaphone(search_term.term)=dmetaphone(:v:search) OR dmetaphone_alt(search_term.term)=dmetaphone(:v:search) +ORDER BY search_term_product.rank ASC; -- :name select-search-term -SELECT * FROM search_term WHERE term=:v:term +SELECT * FROM search_term +WHERE search_term.term=:v:term +OR dmetaphone(search_term.term)=dmetaphone(:v:term) +OR dmetaphone_alt(search_term.term)=dmetaphone(:v:term) +ORDER BY last_queried DESC; -- :name insert-into-product INSERT INTO product (id, provider_type, name, photo) VALUES :t*:products @@ -48,6 +53,6 @@ ON CONFLICT (term) DO UPDATE SET last_queried=now() RETURNING last_queried; -- :name insert-into-search-term-product -INSERT INTO search_term_product (term, product_id) VALUES :t*:pairs +INSERT INTO search_term_product (term, rank, product_id) VALUES :t*:pairs ON CONFLICT DO NOTHING RETURNING term, product_id; diff --git a/src/aretherecookies/search/heb.clj b/src/aretherecookies/search/heb.clj index 395ade0..a38c43d 100644 --- a/src/aretherecookies/search/heb.clj +++ b/src/aretherecookies/search/heb.clj @@ -18,6 +18,7 @@ (defn search-heb "passes a search text to heb and parses html response" [search] + (println "Searching HEB for " search) (if-not (empty? search) (as-> (java.net.URLEncoder/encode search) $ (str "https://www.heb.com/search/?q=" $) diff --git a/src/aretherecookies/search/search.clj b/src/aretherecookies/search/search.clj index bb23034..f9f6c1f 100644 --- a/src/aretherecookies/search/search.clj +++ b/src/aretherecookies/search/search.clj @@ -1,7 +1,8 @@ (ns aretherecookies.search.search - (:require [clojure.core.async :as async :refer [chan, sliding-buffer, go, >!, !!, ! searches-chan term) nil))) - + (let [cutoff (doto (Calendar/getInstance) (.add Calendar/DATE -1)) + last-queried (:last_queried (first (get-search-term term))) + is-outdated (or (nil? last-queried) (>= (.getTimeInMillis cutoff) (.getTime last-queried)))] + (if is-outdated (>!! searches-chan term) nil))) + (defn search-all-sources ""