async building of products from html scraping

This commit is contained in:
Bart Akeley 2020-06-07 16:11:30 -05:00
parent dbf403360e
commit 554a156865
9 changed files with 43 additions and 18 deletions

16
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
"sqltools.connections": [
{
"askForPassword": false,
"database": "wttp",
"driver": "PostgreSQL",
"name": "wttp",
"password": "abc123",
"port": 5432,
"previewLimit": 50,
"server": "localhost",
"username": "bartronx7"
}
],
"sqltools.useNodeRuntime": true
}

View file

@ -8,8 +8,9 @@
[environ "1.1.0"]
[compojure "1.5.1"]
[ring/ring-defaults "0.2.1"]
[org.clojure/java.jdbc "0.7.3"]
[org.postgresql/postgresql "42.2.2"]
[org.clojure/java.jdbc "0.7.11"]
[org.postgresql/postgresql "42.2.13"]
[org.slf4j/slf4j-simple "1.7.30"]
[com.mchange/c3p0 "0.9.5.2"]
[ring-middleware-format "0.7.2"]
[org.clojure/data.json "0.2.6"]

View file

@ -41,6 +41,7 @@ CREATE TABLE search_term (
DROP TABLE IF EXISTS search_term_product CASCADE;
CREATE TABLE search_term_product (
term VARCHAR(200) REFERENCES search_term(term) ON DELETE CASCADE,
rank integer NOT NULL,
product_id VARCHAR(100) REFERENCES product(id) ON DELETE CASCADE
);

View file

@ -7,7 +7,7 @@
faves-delete-handler
product-search-handler]]
[aretherecookies.auth :refer [auth0-auth-backend]]
[aretherecookies.search.search :refer init-search]
[aretherecookies.search.search :refer [init-search]]
[environ.core :refer [env]]
[compojure.handler :refer [api]]
[compojure.core :refer [defroutes GET POST PUT DELETE]]

View file

@ -88,10 +88,8 @@
(defn add-products
""
[term products]
(println (str "add-products " term))
(println "Adding" (count products) "products for" term)
(let [product-vecs (map #(vals (select-keys % [:id :placeType :name :photo])) products)
term-pairs (map #(vec [term (:id %)]) products)]
(println "product-vecs" product-vecs)
(println "term-pairs" term-pairs)
term-pairs (map-indexed (fn [idx item] (vec [term idx (:id item)])) products)]
(insert-into-product @pooled-db {:products product-vecs})
(insert-into-search-term-product @pooled-db {:pairs term-pairs})))

View file

@ -6,7 +6,7 @@
delete-faves
search-products]]
[aretherecookies.helpers :refer [safe-json]]
[aretherecookies.search.heb :refer [memoized-search-heb]]
[aretherecookies.search.search :refer [queue-search]]
[buddy.auth :refer [authenticated?
throw-unauthorized]]))
@ -78,5 +78,5 @@
(if-not (empty? search)
(let [products (search-products search)]
(queue-search search)
(safe-json {:products products})
(safe-json {:products []}))))
(safe-json {:products products}))
(safe-json {:products []}))))

View file

@ -28,14 +28,19 @@ RETURNING product_id, date
-- :name search-products-by-term
SELECT product.* FROM product
INNER JOIN search_term_product on search_term_product=product.id
INNER JOIN search_term_product on search_term_product.product_id=product.id
INNER JOIN search_term ON search_term_product.term=search_term.term
WHERE search_term.term=:v:search
OR dmetaphone(search_term.term)=dmetaphone(:v:search)
OR dmetaphone_alt(search_term.term)=dmetaphone(:v:search)
ORDER BY search_term_product.rank ASC;
-- :name select-search-term
SELECT * FROM search_term WHERE term=:v:term
SELECT * FROM search_term
WHERE search_term.term=:v:term
OR dmetaphone(search_term.term)=dmetaphone(:v:term)
OR dmetaphone_alt(search_term.term)=dmetaphone(:v:term)
ORDER BY last_queried DESC;
-- :name insert-into-product
INSERT INTO product (id, provider_type, name, photo) VALUES :t*:products
@ -48,6 +53,6 @@ ON CONFLICT (term) DO UPDATE SET last_queried=now()
RETURNING last_queried;
-- :name insert-into-search-term-product
INSERT INTO search_term_product (term, product_id) VALUES :t*:pairs
INSERT INTO search_term_product (term, rank, product_id) VALUES :t*:pairs
ON CONFLICT DO NOTHING
RETURNING term, product_id;

View file

@ -18,6 +18,7 @@
(defn search-heb
"passes a search text to heb and parses html response"
[search]
(println "Searching HEB for " search)
(if-not (empty? search)
(as-> (java.net.URLEncoder/encode search) $
(str "https://www.heb.com/search/?q=" $)

View file

@ -1,7 +1,8 @@
(ns aretherecookies.search.search
(:require [clojure.core.async :as async :refer [chan, sliding-buffer, go, >!, <!]]
(:require [clojure.core.async :as async :refer [chan, sliding-buffer, go, >!!, <!]]
[aretherecookies.db :refer [get-search-term, add-products, update-search-term]]
[aretherecookies.search.heb :refer [search-heb]]))
[aretherecookies.search.heb :refer [search-heb]])
(:import [java.util Calendar]))
(def searches-chan (chan (sliding-buffer 500)))
@ -10,9 +11,11 @@
(defn queue-search
""
[term]
;; TODO change empty check to a datetime check against some staleness threshold
(go (if (empty? (:last_queried (first (get-search-term term)))) (>! searches-chan term) nil)))
(let [cutoff (doto (Calendar/getInstance) (.add Calendar/DATE -1))
last-queried (:last_queried (first (get-search-term term)))
is-outdated (or (nil? last-queried) (>= (.getTimeInMillis cutoff) (.getTime last-queried)))]
(if is-outdated (>!! searches-chan term) nil)))
(defn search-all-sources
""