paliword_sc.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. <?php
  2. //全文搜索
  3. require_once '../config.php';
  4. require_once '../public/casesuf.inc';
  5. require_once '../public/union.inc';
  6. require_once "../public/_pdo.php";
  7. require_once "../public/load_lang.php"; //语言文件
  8. require_once "../public/function.php";
  9. require_once "../search/word_function.php";
  10. require_once "../db/pali_text.php";
  11. $_redis = redis_connect();
  12. $_dbPaliText = new PaliText($_redis);
  13. _load_book_index();
  14. $op = $_GET["op"];
  15. $word = mb_strtolower($_GET["key"], 'UTF-8');
  16. $org_word = $word;
  17. $arrWordList = str_getcsv($word, " ");
  18. $count_return = 0;
  19. $dict_list = array();
  20. global $PDO;
  21. function microtime_float()
  22. {
  23. list($usec, $sec) = explode(" ", microtime());
  24. return ((float) $usec + (float) $sec);
  25. }
  26. $result = array();
  27. $result["error"] = "";
  28. $_start = microtime(true);
  29. $result["time"][] = array("event" => "begin", "time" => $_start);
  30. $_pagesize = 20;
  31. if (isset($_GET["page"])) {
  32. $_page = (int) $_GET["page"];
  33. } else {
  34. $_page = 0;
  35. }
  36. if (count($arrWordList) > 1) {
  37. # 查询多个词
  38. $out_data = array();
  39. /*
  40. PDO_Connect(_FILE_DB_PALITEXT_);
  41. # 首先精确匹配
  42. $words = implode(" ", $arrWordList);
  43. $query = "SELECT book,paragraph, text FROM "._TABLE_PALI_TEXT_." WHERE text like ? LIMIT ? OFFSET ?";
  44. $Fetch1 = PDO_FetchAll($query, array("%{$words}%", $_pagesize, $_page * $_pagesize));
  45. */
  46. $dns = _DB_ENGIN_.":host="._DB_HOST_.";port="._DB_PORT_.";dbname="._DB_NAME_.";user="._DB_USERNAME_.";password="._DB_PASSWORD_.";";
  47. PDO_Connect(_FILE_DB_PALITEXT_,_DB_USERNAME_,_DB_PASSWORD_);
  48. $query = "SELECT
  49. ts_rank('{0.1, 0.2, 0.4, 1}',
  50. full_text_search_weighted,
  51. websearch_to_tsquery('pali', ?)) +
  52. ts_rank('{0.1, 0.2, 0.4, 1}',
  53. full_text_search_weighted_unaccent,
  54. websearch_to_tsquery('pali_unaccent', ?))
  55. AS rank,
  56. ts_headline('pali', content,
  57. websearch_to_tsquery('pali', ?),
  58. 'StartSel = <highlight>, StopSel = </highlight>,MaxWords=3500, MinWords=3500,HighlightAll=TRUE')
  59. AS highlight,
  60. book,paragraph,content
  61. FROM fts_texts
  62. WHERE
  63. full_text_search_weighted
  64. @@ websearch_to_tsquery('pali', ?) OR
  65. full_text_search_weighted_unaccent
  66. @@ websearch_to_tsquery('pali_unaccent', ?)
  67. ORDER BY rank DESC
  68. LIMIT 40;";
  69. $Fetch1 = PDO_FetchAll($query, array($word, $word, $word, $word, $word));
  70. foreach ($Fetch1 as $key => $value) {
  71. # code...
  72. $newRecode["title"] = $_dbPaliText->getTitle($value["book"], $value["paragraph"]);
  73. $newRecode["path"] = _get_para_path($value["book"], $value["paragraph"]);
  74. $newRecode["book"] = $value["book"];
  75. $newRecode["para"] = $value["paragraph"];
  76. $newRecode["palitext"] = $value["content"];
  77. $newRecode["highlight"] = $value["highlight"];
  78. $newRecode["keyword"] = $arrWordList;
  79. $newRecode["wt"] = $value["rank"];
  80. $out_data[] = $newRecode;
  81. }
  82. $result["time"][] = array("event" => "fts精确匹配结束", "time" => microtime(true)-$_start);
  83. /*
  84. #然后查分散的
  85. $strQuery = "";
  86. foreach ($arrWordList as $oneword) {
  87. $strQuery .= "\"text\" like \"% {$oneword} %\" AND";
  88. }
  89. $strQuery = substr($strQuery, 0, -3);
  90. $query = "SELECT book,paragraph, html FROM pali_text WHERE {$strQuery} LIMIT 0,20";
  91. $Fetch2 = PDO_FetchAll($query);
  92. foreach ($Fetch2 as $key => $value) {
  93. # code...
  94. $newRecode["title"] = $_dbPaliText->getTitle($value["book"], $value["paragraph"]);
  95. $newRecode["path"] = _get_para_path($value["book"], $value["paragraph"]);
  96. $newRecode["book"] = $value["book"];
  97. $newRecode["para"] = $value["paragraph"];
  98. $newRecode["palitext"] = $value["text"];
  99. $newRecode["keyword"] = $arrWordList;
  100. $newRecode["wt"] = 0;
  101. $out_data[] = $newRecode;
  102. }
  103. $result["time"][] = array("event" => "查分散的结束", "time" => microtime(true)-$_start);
  104. */
  105. $result["data"] = $out_data;
  106. echo json_encode($result, JSON_UNESCAPED_UNICODE);
  107. # 然后查特别不精确的
  108. exit;
  109. }
  110. //计算某词在三藏中出现的次数
  111. $time_start = microtime_float();
  112. $arrRealWordList = countWordInPali($word);
  113. $countWord = count($arrRealWordList);
  114. $result["time"][] = array("event" => "计算某词在三藏中出现的次数", "time" => microtime(true) - $_start);
  115. if ($countWord == 0) {
  116. #没查到 模糊查询
  117. PDO_Connect(_FILE_DB_PALITEXT_);
  118. $query = "SELECT book,paragraph, text FROM "._TABLE_PALI_TEXT_." WHERE text like ? LIMIT ? OFFSET ?";
  119. $Fetch = PDO_FetchAll($query, array("%{$word}%", $_pagesize, $_page * $_pagesize));
  120. $result["data"] = $Fetch;
  121. exit;
  122. }
  123. $strQueryWordId = "("; //实际出现的单词id查询字串
  124. $aQueryWordList = array(); //id 为键 拼写为值的数组
  125. $aInputWordList = array(); //id 为键 拼写为值的数组 该词是否被选择
  126. $aShowWordList = array(); //拼写为键 个数为值的数组
  127. $aShowWordIdList = array(); //拼写为键 值Id的数组
  128. $arrQueryId=array();
  129. for ($i = 0; $i < $countWord; $i++) {
  130. $value = $arrRealWordList[$i];
  131. $strQueryWordId .= "'{$value["id"]}',";
  132. $arrQueryId[] = $value["id"];
  133. $aQueryWordList["{$value["id"]}"] = $value["word"];
  134. $aInputWordList["{$value["id"]}"] = false;
  135. $aShowWordList[$value["word"]] = $value["count"];
  136. $aShowWordIdList[$value["word"]] = $value["id"];
  137. }
  138. if (isset($_GET["words"])) {
  139. $word_selected = json_decode($_GET["words"]);
  140. if (count($word_selected) > 0) {
  141. $strQueryWordId = "(";
  142. foreach ($word_selected as $key => $value) {
  143. $strQueryWordId .= "'{$value}',";
  144. $aInputWordList["{$value}"] = true;
  145. $arrQueryId[] = $value;
  146. }
  147. }
  148. }
  149. $strQueryWordId = mb_substr($strQueryWordId, 0, mb_strlen($strQueryWordId, "UTF-8") - 1, "UTF-8");
  150. $strQueryWordId .= ")";
  151. $queryTime = (microtime_float() - $time_start) * 1000;
  152. //显示单词列表
  153. arsort($aShowWordList);
  154. $result["time"][] = array("event" => "单词列表排序结束", "time" => microtime(true) - $_start);
  155. $out_case = array();
  156. $word_count = 0;
  157. foreach ($aShowWordList as $x => $x_value) {
  158. $caseword = array();
  159. $caseword["id"] = $aShowWordIdList[$x];
  160. $caseword["spell"] = $x;
  161. $caseword["count"] = $x_value;
  162. $caseword["selected"] = $aInputWordList["{$aShowWordIdList[$x]}"];
  163. $word_count += $x_value;
  164. $out_case[] = $caseword;
  165. }
  166. $result["case"] = $out_case;
  167. $result["case_num"] = $countWord;
  168. $result["case_count"] = $word_count;
  169. //查找这些词出现在哪些书中
  170. $booklist = get_new_book_list($strQueryWordId);
  171. $result["book_list"] = $booklist;
  172. $result["book_tag"] = get_book_tag($strQueryWordId);
  173. $result["time"][] = array("event" => "查找书结束", "time" => microtime(true) - $_start);
  174. $wordInBookCounter = 0;
  175. $strFirstBookList = "(";
  176. foreach ($booklist as $onebook) {
  177. $wordInBookCounter += $onebook["count"];
  178. $strFirstBookList .= "'" . $onebook["book"] . "',";
  179. if ($wordInBookCounter >= 20) {
  180. break;
  181. }
  182. }
  183. $strFirstBookList = mb_substr($strFirstBookList, 0, mb_strlen($strFirstBookList, "UTF-8") - 1, "UTF-8");
  184. $strFirstBookList .= ")";
  185. $strQueryBookId = " ";
  186. if (isset($_GET["book"])) {
  187. $book_selected = json_decode($_GET["book"]);
  188. $bookSelected = array();
  189. if (count($book_selected) > 0) {
  190. $strQueryBookId = " AND book IN (";
  191. foreach ($book_selected as $key => $value) {
  192. $strQueryBookId .= "'{$value}',";
  193. $bookSelected[$value] = 1;
  194. }
  195. $strQueryBookId = mb_substr($strQueryBookId, 0, mb_strlen($strQueryBookId, "UTF-8") - 1, "UTF-8");
  196. $strQueryBookId .= ")";
  197. foreach ($result["book_list"] as $bookindex => $bookvalue) {
  198. # code...
  199. $bookid = $bookvalue["book"];
  200. if (isset($bookSelected["{$bookid}"])) {
  201. $result["book_list"][$bookindex]["selected"] = true;
  202. } else {
  203. $result["book_list"][$bookindex]["selected"] = false;
  204. }
  205. }
  206. }
  207. }
  208. $result["time"][] = array("event" => "准备查询", "time" => microtime(true) - $_start);
  209. //前20条记录
  210. $time_start = microtime_float();
  211. PDO_Connect(_FILE_DB_PALI_INDEX_);
  212. $query = "SELECT count(*) from (SELECT book FROM "._TABLE_WORD_." WHERE \"wordindex\" in $strQueryWordId $strQueryBookId group by book,paragraph) as qr where true ";
  213. $result["record_count"] = PDO_FetchOne($query);
  214. $result["time"][] = array("event" => "查询记录数", "time" => microtime(true) - $_start);
  215. $query = "SELECT book,paragraph, sum(weight) as wt FROM "._TABLE_WORD_." WHERE \"wordindex\" in $strQueryWordId $strQueryBookId GROUP BY book,paragraph ORDER BY wt DESC LIMIT ? OFFSET ?";
  216. $Fetch = PDO_FetchAll($query,array($_pagesize , $_page * $_pagesize));
  217. $result["time"][] = array("event" => "查询结束", "time" => microtime(true) - $_start);
  218. $out_data = array();
  219. $queryTime = (microtime_float() - $time_start) * 1000;
  220. $iFetch = count($Fetch);
  221. if ($iFetch > 0) {
  222. PDO_Connect(_FILE_DB_PALITEXT_);
  223. for ($i = 0; $i < $iFetch; $i++) {
  224. $newRecode = array();
  225. $paliword = array();
  226. foreach ($arrQueryId as $value) {
  227. # code...
  228. $paliword[] = $aQueryWordList["{$value}"];
  229. }
  230. $book = $Fetch[$i]["book"];
  231. $paragraph = $Fetch[$i]["paragraph"];
  232. $bookInfo = _get_book_info($book);
  233. $bookname = $bookInfo->title;
  234. $c1 = $bookInfo->c1;
  235. $c2 = $bookInfo->c2;
  236. $c3 = $bookInfo->c3;
  237. $path_1 = $c1 . ">";
  238. if ($c2 !== "") {
  239. $path_1 = $path_1 . $c2 . ">";
  240. }
  241. if ($c3 !== "") {
  242. $path_1 = $path_1 . $c3 . ">";
  243. }
  244. $path_1 = $path_1 . "《{$bookname}》>";
  245. $query = "SELECT * from "._TABLE_PALI_TEXT_." where book = ? and paragraph = ? limit 1";
  246. $FetchPaliText = PDO_FetchAll($query,array($book,$paragraph));
  247. $countPaliText = count($FetchPaliText);
  248. if ($countPaliText > 0) {
  249. $path = "";
  250. $parent = $FetchPaliText[0]["parent"];
  251. $deep = 0;
  252. $sFirstParentTitle = "";
  253. //循环查找父标题 得到整条路径
  254. while ($parent > -1) {
  255. $query = "SELECT * from "._TABLE_PALI_TEXT_." where book = ? and paragraph = ? limit 1";
  256. $FetParent = PDO_FetchAll($query,array($book,$parent));
  257. $path = "{$FetParent[0]["toc"]}>{$path}";
  258. if ($sFirstParentTitle == "") {
  259. $sFirstParentTitle = $FetParent[0]["toc"];
  260. }
  261. $parent = $FetParent[0]["parent"];
  262. $deep++;
  263. if ($deep > 5) {
  264. break;
  265. }
  266. }
  267. $path = $path_1 . $path . "para. " . $paragraph;
  268. $newRecode["title"] = $sFirstParentTitle;
  269. $newRecode["path"] = $path;
  270. $newRecode["book"] = $book;
  271. $newRecode["para"] = $paragraph;
  272. $newRecode["palitext"] = $FetchPaliText[0]["html"];
  273. $newRecode["keyword"] = $paliword;
  274. $newRecode["wt"] = $Fetch[$i]["wt"];
  275. $out_data[] = $newRecode;
  276. }
  277. }
  278. }
  279. $result["time"][] = array("event" => "查询路径结束", "time" => microtime(true) - $_start);
  280. $result["data"] = $out_data;
  281. echo json_encode($result, JSON_UNESCAPED_UNICODE);