mirror of
https://gitlab.com/wheres-the-tp/server.git
synced 2026-01-25 04:34:55 -06:00
async building of products from html scraping
This commit is contained in:
parent
dbf403360e
commit
554a156865
9 changed files with 43 additions and 18 deletions
16
.vscode/settings.json
vendored
Normal file
16
.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
"sqltools.connections": [
|
||||||
|
{
|
||||||
|
"askForPassword": false,
|
||||||
|
"database": "wttp",
|
||||||
|
"driver": "PostgreSQL",
|
||||||
|
"name": "wttp",
|
||||||
|
"password": "abc123",
|
||||||
|
"port": 5432,
|
||||||
|
"previewLimit": 50,
|
||||||
|
"server": "localhost",
|
||||||
|
"username": "bartronx7"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"sqltools.useNodeRuntime": true
|
||||||
|
}
|
||||||
|
|
@ -8,8 +8,9 @@
|
||||||
[environ "1.1.0"]
|
[environ "1.1.0"]
|
||||||
[compojure "1.5.1"]
|
[compojure "1.5.1"]
|
||||||
[ring/ring-defaults "0.2.1"]
|
[ring/ring-defaults "0.2.1"]
|
||||||
[org.clojure/java.jdbc "0.7.3"]
|
[org.clojure/java.jdbc "0.7.11"]
|
||||||
[org.postgresql/postgresql "42.2.2"]
|
[org.postgresql/postgresql "42.2.13"]
|
||||||
|
[org.slf4j/slf4j-simple "1.7.30"]
|
||||||
[com.mchange/c3p0 "0.9.5.2"]
|
[com.mchange/c3p0 "0.9.5.2"]
|
||||||
[ring-middleware-format "0.7.2"]
|
[ring-middleware-format "0.7.2"]
|
||||||
[org.clojure/data.json "0.2.6"]
|
[org.clojure/data.json "0.2.6"]
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ CREATE TABLE search_term (
|
||||||
DROP TABLE IF EXISTS search_term_product CASCADE;
|
DROP TABLE IF EXISTS search_term_product CASCADE;
|
||||||
CREATE TABLE search_term_product (
|
CREATE TABLE search_term_product (
|
||||||
term VARCHAR(200) REFERENCES search_term(term) ON DELETE CASCADE,
|
term VARCHAR(200) REFERENCES search_term(term) ON DELETE CASCADE,
|
||||||
|
rank integer NOT NULL,
|
||||||
product_id VARCHAR(100) REFERENCES product(id) ON DELETE CASCADE
|
product_id VARCHAR(100) REFERENCES product(id) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
faves-delete-handler
|
faves-delete-handler
|
||||||
product-search-handler]]
|
product-search-handler]]
|
||||||
[aretherecookies.auth :refer [auth0-auth-backend]]
|
[aretherecookies.auth :refer [auth0-auth-backend]]
|
||||||
[aretherecookies.search.search :refer init-search]
|
[aretherecookies.search.search :refer [init-search]]
|
||||||
[environ.core :refer [env]]
|
[environ.core :refer [env]]
|
||||||
[compojure.handler :refer [api]]
|
[compojure.handler :refer [api]]
|
||||||
[compojure.core :refer [defroutes GET POST PUT DELETE]]
|
[compojure.core :refer [defroutes GET POST PUT DELETE]]
|
||||||
|
|
|
||||||
|
|
@ -88,10 +88,8 @@
|
||||||
(defn add-products
|
(defn add-products
|
||||||
""
|
""
|
||||||
[term products]
|
[term products]
|
||||||
(println (str "add-products " term))
|
(println "Adding" (count products) "products for" term)
|
||||||
(let [product-vecs (map #(vals (select-keys % [:id :placeType :name :photo])) products)
|
(let [product-vecs (map #(vals (select-keys % [:id :placeType :name :photo])) products)
|
||||||
term-pairs (map #(vec [term (:id %)]) products)]
|
term-pairs (map-indexed (fn [idx item] (vec [term idx (:id item)])) products)]
|
||||||
(println "product-vecs" product-vecs)
|
|
||||||
(println "term-pairs" term-pairs)
|
|
||||||
(insert-into-product @pooled-db {:products product-vecs})
|
(insert-into-product @pooled-db {:products product-vecs})
|
||||||
(insert-into-search-term-product @pooled-db {:pairs term-pairs})))
|
(insert-into-search-term-product @pooled-db {:pairs term-pairs})))
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
delete-faves
|
delete-faves
|
||||||
search-products]]
|
search-products]]
|
||||||
[aretherecookies.helpers :refer [safe-json]]
|
[aretherecookies.helpers :refer [safe-json]]
|
||||||
[aretherecookies.search.heb :refer [memoized-search-heb]]
|
[aretherecookies.search.search :refer [queue-search]]
|
||||||
[buddy.auth :refer [authenticated?
|
[buddy.auth :refer [authenticated?
|
||||||
throw-unauthorized]]))
|
throw-unauthorized]]))
|
||||||
|
|
||||||
|
|
@ -78,5 +78,5 @@
|
||||||
(if-not (empty? search)
|
(if-not (empty? search)
|
||||||
(let [products (search-products search)]
|
(let [products (search-products search)]
|
||||||
(queue-search search)
|
(queue-search search)
|
||||||
(safe-json {:products products})
|
(safe-json {:products products}))
|
||||||
(safe-json {:products []}))))
|
(safe-json {:products []}))))
|
||||||
|
|
|
||||||
|
|
@ -28,14 +28,19 @@ RETURNING product_id, date
|
||||||
|
|
||||||
-- :name search-products-by-term
|
-- :name search-products-by-term
|
||||||
SELECT product.* FROM product
|
SELECT product.* FROM product
|
||||||
INNER JOIN search_term_product on search_term_product=product.id
|
INNER JOIN search_term_product on search_term_product.product_id=product.id
|
||||||
INNER JOIN search_term ON search_term_product.term=search_term.term
|
INNER JOIN search_term ON search_term_product.term=search_term.term
|
||||||
WHERE search_term.term=:v:search
|
WHERE search_term.term=:v:search
|
||||||
OR dmetaphone(search_term.term)=dmetaphone(:v:search)
|
OR dmetaphone(search_term.term)=dmetaphone(:v:search)
|
||||||
OR dmetaphone_alt(search_term.term)=dmetaphone(:v:search)
|
OR dmetaphone_alt(search_term.term)=dmetaphone(:v:search)
|
||||||
|
ORDER BY search_term_product.rank ASC;
|
||||||
|
|
||||||
-- :name select-search-term
|
-- :name select-search-term
|
||||||
SELECT * FROM search_term WHERE term=:v:term
|
SELECT * FROM search_term
|
||||||
|
WHERE search_term.term=:v:term
|
||||||
|
OR dmetaphone(search_term.term)=dmetaphone(:v:term)
|
||||||
|
OR dmetaphone_alt(search_term.term)=dmetaphone(:v:term)
|
||||||
|
ORDER BY last_queried DESC;
|
||||||
|
|
||||||
-- :name insert-into-product
|
-- :name insert-into-product
|
||||||
INSERT INTO product (id, provider_type, name, photo) VALUES :t*:products
|
INSERT INTO product (id, provider_type, name, photo) VALUES :t*:products
|
||||||
|
|
@ -48,6 +53,6 @@ ON CONFLICT (term) DO UPDATE SET last_queried=now()
|
||||||
RETURNING last_queried;
|
RETURNING last_queried;
|
||||||
|
|
||||||
-- :name insert-into-search-term-product
|
-- :name insert-into-search-term-product
|
||||||
INSERT INTO search_term_product (term, product_id) VALUES :t*:pairs
|
INSERT INTO search_term_product (term, rank, product_id) VALUES :t*:pairs
|
||||||
ON CONFLICT DO NOTHING
|
ON CONFLICT DO NOTHING
|
||||||
RETURNING term, product_id;
|
RETURNING term, product_id;
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@
|
||||||
(defn search-heb
|
(defn search-heb
|
||||||
"passes a search text to heb and parses html response"
|
"passes a search text to heb and parses html response"
|
||||||
[search]
|
[search]
|
||||||
|
(println "Searching HEB for " search)
|
||||||
(if-not (empty? search)
|
(if-not (empty? search)
|
||||||
(as-> (java.net.URLEncoder/encode search) $
|
(as-> (java.net.URLEncoder/encode search) $
|
||||||
(str "https://www.heb.com/search/?q=" $)
|
(str "https://www.heb.com/search/?q=" $)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
(ns aretherecookies.search.search
|
(ns aretherecookies.search.search
|
||||||
(:require [clojure.core.async :as async :refer [chan, sliding-buffer, go, >!, <!]]
|
(:require [clojure.core.async :as async :refer [chan, sliding-buffer, go, >!!, <!]]
|
||||||
[aretherecookies.db :refer [get-search-term, add-products, update-search-term]]
|
[aretherecookies.db :refer [get-search-term, add-products, update-search-term]]
|
||||||
[aretherecookies.search.heb :refer [search-heb]]))
|
[aretherecookies.search.heb :refer [search-heb]])
|
||||||
|
(:import [java.util Calendar]))
|
||||||
|
|
||||||
|
|
||||||
(def searches-chan (chan (sliding-buffer 500)))
|
(def searches-chan (chan (sliding-buffer 500)))
|
||||||
|
|
@ -10,9 +11,11 @@
|
||||||
(defn queue-search
|
(defn queue-search
|
||||||
""
|
""
|
||||||
[term]
|
[term]
|
||||||
;; TODO change empty check to a datetime check against some staleness threshold
|
(let [cutoff (doto (Calendar/getInstance) (.add Calendar/DATE -1))
|
||||||
(go (if (empty? (:last_queried (first (get-search-term term)))) (>! searches-chan term) nil)))
|
last-queried (:last_queried (first (get-search-term term)))
|
||||||
|
is-outdated (or (nil? last-queried) (>= (.getTimeInMillis cutoff) (.getTime last-queried)))]
|
||||||
|
(if is-outdated (>!! searches-chan term) nil)))
|
||||||
|
|
||||||
|
|
||||||
(defn search-all-sources
|
(defn search-all-sources
|
||||||
""
|
""
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue