2
0

paliword_sc.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. <?php
  2. //全文搜索
  3. require_once '../config.php';
  4. require_once '../public/casesuf.inc';
  5. require_once '../public/union.inc';
  6. require_once "../public/_pdo.php";
  7. require_once "../public/load_lang.php"; //语言文件
  8. require_once "../public/function.php";
  9. require_once "../search/word_function.php";
  10. require_once "../db/pali_text.php";
  11. $_redis = redis_connect();
  12. $_dbPaliText = new PaliText($_redis);
  13. _load_book_index();
  14. $op = $_GET["op"];
  15. $word = mb_strtolower($_GET["key"], 'UTF-8');
  16. $org_word = $word;
  17. $arrWordList = str_getcsv($word, " ");
  18. $count_return = 0;
  19. $dict_list = array();
  20. global $PDO;
  21. function microtime_float()
  22. {
  23. list($usec, $sec) = explode(" ", microtime());
  24. return ((float) $usec + (float) $sec);
  25. }
  26. $result = array();
  27. $result["error"] = "";
  28. $_start = microtime(true);
  29. $result["time"][] = array("event" => "begin", "time" => $_start);
  30. $_pagesize = 20;
  31. if (isset($_GET["page"])) {
  32. $_page = (int) $_GET["page"];
  33. } else {
  34. $_page = 0;
  35. }
  36. if (count($arrWordList) > 1) {
  37. # 查询多个词
  38. $out_data = array();
  39. /*
  40. PDO_Connect(_FILE_DB_PALITEXT_);
  41. # 首先精确匹配
  42. $words = implode(" ", $arrWordList);
  43. $query = "SELECT book,paragraph, text as content FROM "._TABLE_PALI_TEXT_." WHERE text like ? LIMIT ? OFFSET ?";
  44. $Fetch1 = PDO_FetchAll($query, array("%{$words}%", $_pagesize, $_page * $_pagesize));
  45. */
  46. #postgresql full text search
  47. $dns = _DB_ENGIN_.":host="._DB_HOST_.";port="._DB_PORT_.";dbname="._DB_NAME_.";user="._DB_USERNAME_.";password="._DB_PASSWORD_.";";
  48. PDO_Connect($dns,_DB_USERNAME_,_DB_PASSWORD_);
  49. $query = "SELECT
  50. ts_rank('{0.1, 0.2, 0.4, 1}',
  51. full_text_search_weighted,
  52. websearch_to_tsquery('pali', ?)) +
  53. ts_rank('{0.1, 0.2, 0.4, 1}',
  54. full_text_search_weighted_unaccent,
  55. websearch_to_tsquery('pali_unaccent', ?))
  56. AS rank,
  57. ts_headline('pali', content,
  58. websearch_to_tsquery('pali', ?),
  59. 'StartSel = <highlight>, StopSel = </highlight>,MaxWords=3500, MinWords=3500,HighlightAll=TRUE')
  60. AS highlight,
  61. book,paragraph,content
  62. FROM fts_texts
  63. WHERE
  64. full_text_search_weighted
  65. @@ websearch_to_tsquery('pali', ?) OR
  66. full_text_search_weighted_unaccent
  67. @@ websearch_to_tsquery('pali_unaccent', ?)
  68. ORDER BY rank DESC
  69. LIMIT 40;";
  70. $Fetch1 = PDO_FetchAll($query, array($word, $word, $word, $word, $word));
  71. foreach ($Fetch1 as $key => $value) {
  72. # code...
  73. $newRecode["title"] = $_dbPaliText->getTitle($value["book"], $value["paragraph"]);
  74. $newRecode["path"] = _get_para_path($value["book"], $value["paragraph"]);
  75. $newRecode["book"] = $value["book"];
  76. $newRecode["para"] = $value["paragraph"];
  77. $newRecode["palitext"] = $value["content"];
  78. if(isset($value["highlight"])){
  79. $newRecode["highlight"] = $value["highlight"];
  80. }
  81. $newRecode["keyword"] = $arrWordList;
  82. if(isset($value["rank"])){
  83. $newRecode["wt"] = $value["rank"];
  84. }else{
  85. $newRecode["wt"] = 1;
  86. }
  87. $out_data[] = $newRecode;
  88. }
  89. $result["time"][] = array("event" => "fts精确匹配结束", "time" => microtime(true)-$_start);
  90. $result["data"] = $out_data;
  91. echo json_encode($result, JSON_UNESCAPED_UNICODE);
  92. # 然后查特别不精确的
  93. exit;
  94. }
  95. //计算某词在三藏中出现的次数
  96. $time_start = microtime_float();
  97. $arrRealWordList = countWordInPali($word);
  98. $countWord = count($arrRealWordList);
  99. $result["time"][] = array("event" => "计算某词在三藏中出现的次数", "time" => microtime(true) - $_start);
  100. if ($countWord == 0) {
  101. #没查到 模糊查询
  102. PDO_Connect(_FILE_DB_PALITEXT_);
  103. $query = "SELECT book,paragraph, text FROM "._TABLE_PALI_TEXT_." WHERE text like ? LIMIT ? OFFSET ?";
  104. $Fetch = PDO_FetchAll($query, array("%{$word}%", $_pagesize, $_page * $_pagesize));
  105. $result["data"] = $Fetch;
  106. exit;
  107. }
  108. $strQueryWordId = "("; //实际出现的单词id查询字串
  109. $aQueryWordList = array(); //id 为键 拼写为值的数组
  110. $aInputWordList = array(); //id 为键 拼写为值的数组 该词是否被选择
  111. $aShowWordList = array(); //拼写为键 个数为值的数组
  112. $aShowWordIdList = array(); //拼写为键 值Id的数组
  113. $arrQueryId=array();
  114. for ($i = 0; $i < $countWord; $i++) {
  115. $value = $arrRealWordList[$i];
  116. $strQueryWordId .= "'{$value["id"]}',";
  117. $arrQueryId[] = $value["id"];
  118. $aQueryWordList["{$value["id"]}"] = $value["word"];
  119. $aInputWordList["{$value["id"]}"] = false;
  120. $aShowWordList[$value["word"]] = $value["count"];
  121. $aShowWordIdList[$value["word"]] = $value["id"];
  122. }
  123. if (isset($_GET["words"])) {
  124. $word_selected = json_decode($_GET["words"]);
  125. if (count($word_selected) > 0) {
  126. $strQueryWordId = "(";
  127. foreach ($word_selected as $key => $value) {
  128. $strQueryWordId .= "'{$value}',";
  129. $aInputWordList["{$value}"] = true;
  130. $arrQueryId[] = $value;
  131. }
  132. }
  133. }
  134. $strQueryWordId = mb_substr($strQueryWordId, 0, mb_strlen($strQueryWordId, "UTF-8") - 1, "UTF-8");
  135. $strQueryWordId .= ")";
  136. $queryTime = (microtime_float() - $time_start) * 1000;
  137. //显示单词列表
  138. arsort($aShowWordList);
  139. $result["time"][] = array("event" => "单词列表排序结束", "time" => microtime(true) - $_start);
  140. $out_case = array();
  141. $word_count = 0;
  142. foreach ($aShowWordList as $x => $x_value) {
  143. $caseword = array();
  144. $caseword["id"] = $aShowWordIdList[$x];
  145. $caseword["spell"] = $x;
  146. $caseword["count"] = $x_value;
  147. $caseword["selected"] = $aInputWordList["{$aShowWordIdList[$x]}"];
  148. $word_count += $x_value;
  149. $out_case[] = $caseword;
  150. }
  151. $result["case"] = $out_case;
  152. $result["case_num"] = $countWord;
  153. $result["case_count"] = $word_count;
  154. //查找这些词出现在哪些书中
  155. $booklist = get_new_book_list($strQueryWordId);
  156. $result["book_list"] = $booklist;
  157. $result["book_tag"] = get_book_tag($strQueryWordId);
  158. $result["time"][] = array("event" => "查找书结束", "time" => microtime(true) - $_start);
  159. $wordInBookCounter = 0;
  160. $strFirstBookList = "(";
  161. foreach ($booklist as $onebook) {
  162. $wordInBookCounter += $onebook["count"];
  163. $strFirstBookList .= "'" . $onebook["book"] . "',";
  164. if ($wordInBookCounter >= 20) {
  165. break;
  166. }
  167. }
  168. $strFirstBookList = mb_substr($strFirstBookList, 0, mb_strlen($strFirstBookList, "UTF-8") - 1, "UTF-8");
  169. $strFirstBookList .= ")";
  170. $strQueryBookId = " ";
  171. if (isset($_GET["book"])) {
  172. $book_selected = json_decode($_GET["book"]);
  173. $bookSelected = array();
  174. if (count($book_selected) > 0) {
  175. $strQueryBookId = " AND book IN (";
  176. foreach ($book_selected as $key => $value) {
  177. $strQueryBookId .= "'{$value}',";
  178. $bookSelected[$value] = 1;
  179. }
  180. $strQueryBookId = mb_substr($strQueryBookId, 0, mb_strlen($strQueryBookId, "UTF-8") - 1, "UTF-8");
  181. $strQueryBookId .= ")";
  182. foreach ($result["book_list"] as $bookindex => $bookvalue) {
  183. # code...
  184. $bookid = $bookvalue["book"];
  185. if (isset($bookSelected["{$bookid}"])) {
  186. $result["book_list"][$bookindex]["selected"] = true;
  187. } else {
  188. $result["book_list"][$bookindex]["selected"] = false;
  189. }
  190. }
  191. }
  192. }
  193. $result["time"][] = array("event" => "准备查询", "time" => microtime(true) - $_start);
  194. //前20条记录
  195. $time_start = microtime_float();
  196. PDO_Connect(_FILE_DB_PALI_INDEX_);
  197. $query = "SELECT count(*) from (SELECT book FROM "._TABLE_WORD_." WHERE \"wordindex\" in $strQueryWordId $strQueryBookId group by book,paragraph) as qr ";
  198. $result["record_count"] = PDO_FetchOne($query);
  199. $result["time"][] = array("event" => "查询记录数", "time" => microtime(true) - $_start);
  200. $query = "SELECT book,paragraph, sum(weight) as wt FROM "._TABLE_WORD_." WHERE \"wordindex\" in $strQueryWordId $strQueryBookId GROUP BY book,paragraph ORDER BY wt DESC LIMIT ? OFFSET ?";
  201. $Fetch = PDO_FetchAll($query,array($_pagesize , $_page * $_pagesize));
  202. $result["time"][] = array("event" => "查询结束", "time" => microtime(true) - $_start);
  203. $out_data = array();
  204. $queryTime = (microtime_float() - $time_start) * 1000;
  205. $iFetch = count($Fetch);
  206. if ($iFetch > 0) {
  207. PDO_Connect(_FILE_DB_PALITEXT_);
  208. for ($i = 0; $i < $iFetch; $i++) {
  209. $newRecode = array();
  210. $paliword = array();
  211. foreach ($arrQueryId as $value) {
  212. # code...
  213. $paliword[] = $aQueryWordList["{$value}"];
  214. }
  215. $book = $Fetch[$i]["book"];
  216. $paragraph = $Fetch[$i]["paragraph"];
  217. $bookInfo = _get_book_info($book);
  218. $bookname = $bookInfo->title;
  219. $c1 = $bookInfo->c1;
  220. $c2 = $bookInfo->c2;
  221. $c3 = $bookInfo->c3;
  222. $path_1 = $c1 . ">";
  223. if ($c2 !== "") {
  224. $path_1 = $path_1 . $c2 . ">";
  225. }
  226. if ($c3 !== "") {
  227. $path_1 = $path_1 . $c3 . ">";
  228. }
  229. $path_1 = $path_1 . "《{$bookname}》>";
  230. $query = "SELECT * from "._TABLE_PALI_TEXT_." where book = ? and paragraph = ? limit 1";
  231. $FetchPaliText = PDO_FetchAll($query,array($book,$paragraph));
  232. $countPaliText = count($FetchPaliText);
  233. if ($countPaliText > 0) {
  234. $path = "";
  235. $parent = $FetchPaliText[0]["parent"];
  236. $deep = 0;
  237. $sFirstParentTitle = "";
  238. //循环查找父标题 得到整条路径
  239. while ($parent > -1) {
  240. $query = "SELECT * from "._TABLE_PALI_TEXT_." where book = ? and paragraph = ? limit 1";
  241. $FetParent = PDO_FetchAll($query,array($book,$parent));
  242. $path = "{$FetParent[0]["toc"]}>{$path}";
  243. if ($sFirstParentTitle == "") {
  244. $sFirstParentTitle = $FetParent[0]["toc"];
  245. }
  246. $parent = $FetParent[0]["parent"];
  247. $deep++;
  248. if ($deep > 5) {
  249. break;
  250. }
  251. }
  252. $path = $path_1 . $path . "para. " . $paragraph;
  253. $newRecode["title"] = $sFirstParentTitle;
  254. $newRecode["path"] = $path;
  255. $newRecode["book"] = $book;
  256. $newRecode["para"] = $paragraph;
  257. $newRecode["palitext"] = $FetchPaliText[0]["html"];
  258. $newRecode["keyword"] = $paliword;
  259. $newRecode["wt"] = $Fetch[$i]["wt"];
  260. $out_data[] = $newRecode;
  261. }
  262. }
  263. }
  264. $result["time"][] = array("event" => "查询路径结束", "time" => microtime(true) - $_start);
  265. $result["data"] = $out_data;
  266. echo json_encode($result, JSON_UNESCAPED_UNICODE);