fts.sql 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. -- 创建表结构
  2. CREATE TABLE fts_texts (
  3. paragraph integer,
  4. book integer,
  5. wid varchar(50),
  6. -- 单个出现的黑体字,权重较大
  7. bold_single text,
  8. -- 成对出现的黑体字,权重一般
  9. bold_double text,
  10. -- 连续三个或三个以上的黑体字,权重较低
  11. bold_multiple text,
  12. content text
  13. );
  14. -- 创建全文检索配置 pali
  15. CREATE TEXT SEARCH CONFIGURATION pali ( parser = pg_catalog.default );
  16. -- 创建全文检索配置 pali_unaccent 无标音符号版
  17. CREATE TEXT SEARCH CONFIGURATION pali_unaccent ( parser = pg_catalog.default );
  18. -- 创建巴利语词形转换字典
  19. CREATE TEXT SEARCH DICTIONARY pali_stem (
  20. TEMPLATE = synonym,
  21. SYNONYMS = pali
  22. );
  23. -- 创建巴利语停用词字典
  24. CREATE TEXT SEARCH DICTIONARY pali_stopwords (
  25. TEMPLATE = pg_catalog.simple, STOPWORDS = pali,
  26. ACCEPT = true
  27. );
  28. -- 修改全文检索配置 pali 使用我们创建的字典
  29. ALTER TEXT SEARCH CONFIGURATION pali
  30. ADD MAPPING FOR asciiword, word, hword_part, hword_asciipart
  31. WITH pali_stem, pali_stopwords;
  32. -- 修改全文检索配置 pali_unaccent 使用我们创建的字典
  33. CREATE EXTENSION IF NOT EXISTS "unaccent";
  34. ALTER TEXT SEARCH CONFIGURATION pali_unaccent
  35. ADD MAPPING FOR asciiword, word, hword_part, hword_asciipart
  36. WITH unaccent, pali_stem, pali_stopwords;
  37. -- 添加自动更新的 TSVECTOR 字段
  38. ALTER TABLE fts_texts
  39. ADD COLUMN full_text_search_weighted TSVECTOR
  40. GENERATED ALWAYS AS (
  41. setweight(to_tsvector('pali', coalesce(content,'')), 'A') || ' ' ||
  42. setweight(to_tsvector('pali', coalesce(bold_single,'')), 'B') || ' ' ||
  43. setweight(to_tsvector('pali', coalesce(bold_double,'')), 'C') || ' ' ||
  44. setweight(to_tsvector('pali', coalesce(bold_multiple,'')), 'D')
  45. ) STORED;
  46. ALTER TABLE fts_texts
  47. ADD COLUMN full_text_search_weighted_unaccent TSVECTOR
  48. GENERATED ALWAYS AS (
  49. setweight(to_tsvector('pali_unaccent', coalesce(content,'')), 'A') || ' ' ||
  50. setweight(to_tsvector('pali_unaccent', coalesce(bold_single,'')), 'B') || ' ' ||
  51. setweight(to_tsvector('pali_unaccent', coalesce(bold_double,'')), 'C') || ' ' ||
  52. setweight(to_tsvector('pali_unaccent', coalesce(bold_multiple,'')), 'D')
  53. ) STORED;
  54. -- 为该字段创建索引
  55. CREATE INDEX full_text_search_weighted_idx
  56. ON fts_texts USING GIN (full_text_search_weighted);
  57. CREATE INDEX full_text_search_weighted__unaccent_idx
  58. ON fts_texts USING GIN (full_text_search_weighted_unaccent);
  59. -- 创建查询函数
  60. CREATE OR REPLACE FUNCTION query_pali(query_str TEXT)
  61. RETURNS TABLE(
  62. rank NUMERIC,
  63. paragraph INTEGER,
  64. wid VARCHAR,
  65. bold_single TEXT,
  66. bold_double TEXT,
  67. bold_multiple TEXT,
  68. content TEXT,
  69. full_text_search_weighted TSVECTOR,
  70. full_text_search_weighted_unaccent TSVECTOR)
  71. AS $$
  72. SELECT
  73. ts_rank('{0.1, 0.2, 0.4, 1}',
  74. full_text_search_weighted,
  75. websearch_to_tsquery('pali', query_str)) +
  76. ts_rank('{0.1, 0.2, 0.4, 1}',
  77. full_text_search_weighted_unaccent,
  78. websearch_to_tsquery('pali_unaccent', query_str)), -- AS rank
  79. paragraph, wid, bold_single, bold_double, bold_multiple, content,
  80. full_text_search_weighted, full_text_search_weighted_unaccent
  81. FROM fts_texts
  82. WHERE
  83. full_text_search_weighted @@ websearch_to_tsquery('pali', query_str) OR
  84. full_text_search_weighted_unaccent @@ websearch_to_tsquery('pali_unaccent', query_str);
  85. $$ LANGUAGE SQL;