From 7d46e1836312d9b743e063f94d1ae6665f9585dc Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Sun, 2 Apr 2017 15:15:21 +0200 Subject: [PATCH] Explicitly specify the tsearch configuration to use The main reason this didn't work before was because of server dictionary configuration, not because of the code. But make it explicit so we're not relying on session level settings. --- tools/search/crawler/lib/basecrawler.py | 4 ++-- tools/search/sql/functions.sql | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/search/crawler/lib/basecrawler.py b/tools/search/crawler/lib/basecrawler.py index 62934c6d..6254f45c 100644 --- a/tools/search/crawler/lib/basecrawler.py +++ b/tools/search/crawler/lib/basecrawler.py @@ -147,9 +147,9 @@ class BaseSiteCrawler(object): 'internal': internal, } curs = self.dbconn.cursor() - curs.execute("UPDATE webpages SET title=%(title)s, txt=%(txt)s, fti=to_tsvector(%(txt)s), lastscanned=%(lastmod)s, relprio=%(relprio)s, isinternal=%(internal)s WHERE site=%(site)s AND suburl=%(url)s", params) + curs.execute("UPDATE webpages SET title=%(title)s, txt=%(txt)s, fti=to_tsvector('public.pg', %(txt)s), lastscanned=%(lastmod)s, relprio=%(relprio)s, isinternal=%(internal)s WHERE site=%(site)s AND suburl=%(url)s", params) if curs.rowcount != 1: - curs.execute("INSERT INTO webpages (site, suburl, title, txt, fti, lastscanned, relprio, isinternal) VALUES (%(site)s, %(url)s, %(title)s, %(txt)s, to_tsvector(%(txt)s), %(lastmod)s, %(relprio)s, %(internal)s)", params) + curs.execute("INSERT INTO webpages (site, suburl, title, txt, fti, lastscanned, relprio, isinternal) VALUES (%(site)s, %(url)s, %(title)s, %(txt)s, to_tsvector('public.pg', %(txt)s), %(lastmod)s, %(relprio)s, %(internal)s)", params) with self.counterlock: self.pages_new += 1 else: diff --git a/tools/search/sql/functions.sql b/tools/search/sql/functions.sql index 6572b960..4a0ba1c3 100644 --- a/tools/search/sql/functions.sql +++ b/tools/search/sql/functions.sql @@ -71,7 +71,7 @@ DECLARE curs refcursor; pagecount int; BEGIN - tsq := plainto_tsquery(query); + tsq := plainto_tsquery('public.pg', query); IF numnode(tsq) = 0 THEN siteid = 0;baseurl=NULL;suburl=NULL;title=NULL;headline=NULL;rank=0; RETURN NEXT; @@ -106,7 +106,3 @@ BEGIN END; $$ LANGUAGE 'plpgsql'; -/* Seems broken, so stop doing this for now - * ALTER FUNCTION site_search(text, int, int, bool, text, boolean) SET default_text_search_config = 'public.pg'; - */ - -- 2.39.5