paliword_sc.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. <?php
  2. //全文搜索
  3. require_once '../config.php';
  4. require_once '../public/casesuf.inc';
  5. require_once '../public/union.inc';
  6. require_once "../public/_pdo.php";
  7. require_once "../public/load_lang.php"; //语言文件
  8. require_once "../public/function.php";
  9. require_once "../search/word_function.php";
  10. require_once "../db/pali_text.php";
  11. $_redis = redis_connect();
  12. $_dbPaliText = new PaliText($_redis);
  13. _load_book_index();
  14. $op = $_GET["op"];
  15. $word = mb_strtolower($_GET["key"], 'UTF-8');
  16. $org_word = $word;
  17. $arrWordList = str_getcsv($word, " ");
  18. $count_return = 0;
  19. $dict_list = array();
  20. global $PDO;
  21. function microtime_float()
  22. {
  23. list($usec, $sec) = explode(" ", microtime());
  24. return ((float) $usec + (float) $sec);
  25. }
  26. $result = array();
  27. $result["error"] = "";
  28. $_start = microtime(true);
  29. $result["time"][] = array("event" => "begin", "time" => $_start);
  30. $_pagesize = 20;
  31. if (isset($_GET["page"])) {
  32. $_page = (int) $_GET["page"];
  33. } else {
  34. $_page = 0;
  35. }
  36. if (count($arrWordList) > 1) {
  37. # 查询多个词
  38. $out_data = array();
  39. /*
  40. PDO_Connect(_FILE_DB_PALITEXT_);
  41. # 首先精确匹配
  42. $words = implode(" ", $arrWordList);
  43. $query = "SELECT book,paragraph, text as content FROM "._TABLE_PALI_TEXT_." WHERE text like ? LIMIT ? OFFSET ?";
  44. $Fetch1 = PDO_FetchAll($query, array("%{$words}%", $_pagesize, $_page * $_pagesize));
  45. */
  46. #postgresql full text search
  47. $dns = _DB_ENGIN_.":host="._DB_HOST_.";port="._DB_PORT_.";dbname="._DB_NAME_.";user="._DB_USERNAME_.";password="._DB_PASSWORD_.";";
  48. PDO_Connect($dns,_DB_USERNAME_,_DB_PASSWORD_);
  49. $query = "SELECT
  50. ts_rank('{0.1, 0.2, 0.4, 1}',
  51. full_text_search_weighted,
  52. websearch_to_tsquery('pali', ?)) +
  53. ts_rank('{0.1, 0.2, 0.4, 1}',
  54. full_text_search_weighted_unaccent,
  55. websearch_to_tsquery('pali_unaccent', ?))
  56. AS rank,
  57. ts_headline('pali', content,
  58. websearch_to_tsquery('pali', ?),
  59. 'StartSel = <highlight>, StopSel = </highlight>,MaxWords=3500, MinWords=3500,HighlightAll=TRUE')
  60. AS highlight,
  61. book,paragraph,content
  62. FROM fts_texts
  63. WHERE
  64. full_text_search_weighted
  65. @@ websearch_to_tsquery('pali', ?) OR
  66. full_text_search_weighted_unaccent
  67. @@ websearch_to_tsquery('pali_unaccent', ?)
  68. ORDER BY rank DESC
  69. LIMIT 40;";
  70. $Fetch1 = PDO_FetchAll($query, array($word, $word, $word, $word, $word));
  71. foreach ($Fetch1 as $key => $value) {
  72. # code...
  73. $newRecode["title"] = $_dbPaliText->getTitle($value["book"], $value["paragraph"]);
  74. $newRecode["path"] = _get_para_path($value["book"], $value["paragraph"]);
  75. $newRecode["book"] = $value["book"];
  76. $newRecode["para"] = $value["paragraph"];
  77. $newRecode["palitext"] = $value["content"];
  78. if(isset($value["highlight"])){
  79. $newRecode["highlight"] = $value["highlight"];
  80. }
  81. $newRecode["keyword"] = $arrWordList;
  82. if(isset($value["rank"])){
  83. $newRecode["wt"] = $value["rank"];
  84. }else{
  85. $newRecode["wt"] = 1;
  86. }
  87. $out_data[] = $newRecode;
  88. }
  89. $result["time"][] = array("event" => "fts精确匹配结束", "time" => microtime(true)-$_start);
  90. /*
  91. #然后查分散的
  92. $strQuery = "";
  93. foreach ($arrWordList as $oneword) {
  94. $strQuery .= "\"text\" like \"% {$oneword} %\" AND";
  95. }
  96. $strQuery = substr($strQuery, 0, -3);
  97. $query = "SELECT book,paragraph, html FROM pali_text WHERE {$strQuery} LIMIT 0,20";
  98. $Fetch2 = PDO_FetchAll($query);
  99. foreach ($Fetch2 as $key => $value) {
  100. # code...
  101. $newRecode["title"] = $_dbPaliText->getTitle($value["book"], $value["paragraph"]);
  102. $newRecode["path"] = _get_para_path($value["book"], $value["paragraph"]);
  103. $newRecode["book"] = $value["book"];
  104. $newRecode["para"] = $value["paragraph"];
  105. $newRecode["palitext"] = $value["text"];
  106. $newRecode["keyword"] = $arrWordList;
  107. $newRecode["wt"] = 0;
  108. $out_data[] = $newRecode;
  109. }
  110. $result["time"][] = array("event" => "查分散的结束", "time" => microtime(true)-$_start);
  111. */
  112. $result["data"] = $out_data;
  113. echo json_encode($result, JSON_UNESCAPED_UNICODE);
  114. # 然后查特别不精确的
  115. exit;
  116. }
  117. //计算某词在三藏中出现的次数
  118. $time_start = microtime_float();
  119. $arrRealWordList = countWordInPali($word);
  120. $countWord = count($arrRealWordList);
  121. $result["time"][] = array("event" => "计算某词在三藏中出现的次数", "time" => microtime(true) - $_start);
  122. if ($countWord == 0) {
  123. #没查到 模糊查询
  124. PDO_Connect(_FILE_DB_PALITEXT_);
  125. $query = "SELECT book,paragraph, text FROM "._TABLE_PALI_TEXT_." WHERE text like ? LIMIT ? OFFSET ?";
  126. $Fetch = PDO_FetchAll($query, array("%{$word}%", $_pagesize, $_page * $_pagesize));
  127. $result["data"] = $Fetch;
  128. exit;
  129. }
  130. $strQueryWordId = "("; //实际出现的单词id查询字串
  131. $aQueryWordList = array(); //id 为键 拼写为值的数组
  132. $aInputWordList = array(); //id 为键 拼写为值的数组 该词是否被选择
  133. $aShowWordList = array(); //拼写为键 个数为值的数组
  134. $aShowWordIdList = array(); //拼写为键 值Id的数组
  135. $arrQueryId=array();
  136. for ($i = 0; $i < $countWord; $i++) {
  137. $value = $arrRealWordList[$i];
  138. $strQueryWordId .= "'{$value["id"]}',";
  139. $arrQueryId[] = $value["id"];
  140. $aQueryWordList["{$value["id"]}"] = $value["word"];
  141. $aInputWordList["{$value["id"]}"] = false;
  142. $aShowWordList[$value["word"]] = $value["count"];
  143. $aShowWordIdList[$value["word"]] = $value["id"];
  144. }
  145. if (isset($_GET["words"])) {
  146. $word_selected = json_decode($_GET["words"]);
  147. if (count($word_selected) > 0) {
  148. $strQueryWordId = "(";
  149. foreach ($word_selected as $key => $value) {
  150. $strQueryWordId .= "'{$value}',";
  151. $aInputWordList["{$value}"] = true;
  152. $arrQueryId[] = $value;
  153. }
  154. }
  155. }
  156. $strQueryWordId = mb_substr($strQueryWordId, 0, mb_strlen($strQueryWordId, "UTF-8") - 1, "UTF-8");
  157. $strQueryWordId .= ")";
  158. $queryTime = (microtime_float() - $time_start) * 1000;
  159. //显示单词列表
  160. arsort($aShowWordList);
  161. $result["time"][] = array("event" => "单词列表排序结束", "time" => microtime(true) - $_start);
  162. $out_case = array();
  163. $word_count = 0;
  164. foreach ($aShowWordList as $x => $x_value) {
  165. $caseword = array();
  166. $caseword["id"] = $aShowWordIdList[$x];
  167. $caseword["spell"] = $x;
  168. $caseword["count"] = $x_value;
  169. $caseword["selected"] = $aInputWordList["{$aShowWordIdList[$x]}"];
  170. $word_count += $x_value;
  171. $out_case[] = $caseword;
  172. }
  173. $result["case"] = $out_case;
  174. $result["case_num"] = $countWord;
  175. $result["case_count"] = $word_count;
  176. //查找这些词出现在哪些书中
  177. $booklist = get_new_book_list($strQueryWordId);
  178. $result["book_list"] = $booklist;
  179. $result["book_tag"] = get_book_tag($strQueryWordId);
  180. $result["time"][] = array("event" => "查找书结束", "time" => microtime(true) - $_start);
  181. $wordInBookCounter = 0;
  182. $strFirstBookList = "(";
  183. foreach ($booklist as $onebook) {
  184. $wordInBookCounter += $onebook["count"];
  185. $strFirstBookList .= "'" . $onebook["book"] . "',";
  186. if ($wordInBookCounter >= 20) {
  187. break;
  188. }
  189. }
  190. $strFirstBookList = mb_substr($strFirstBookList, 0, mb_strlen($strFirstBookList, "UTF-8") - 1, "UTF-8");
  191. $strFirstBookList .= ")";
  192. $strQueryBookId = " ";
  193. if (isset($_GET["book"])) {
  194. $book_selected = json_decode($_GET["book"]);
  195. $bookSelected = array();
  196. if (count($book_selected) > 0) {
  197. $strQueryBookId = " AND book IN (";
  198. foreach ($book_selected as $key => $value) {
  199. $strQueryBookId .= "'{$value}',";
  200. $bookSelected[$value] = 1;
  201. }
  202. $strQueryBookId = mb_substr($strQueryBookId, 0, mb_strlen($strQueryBookId, "UTF-8") - 1, "UTF-8");
  203. $strQueryBookId .= ")";
  204. foreach ($result["book_list"] as $bookindex => $bookvalue) {
  205. # code...
  206. $bookid = $bookvalue["book"];
  207. if (isset($bookSelected["{$bookid}"])) {
  208. $result["book_list"][$bookindex]["selected"] = true;
  209. } else {
  210. $result["book_list"][$bookindex]["selected"] = false;
  211. }
  212. }
  213. }
  214. }
  215. $result["time"][] = array("event" => "准备查询", "time" => microtime(true) - $_start);
  216. //前20条记录
  217. $time_start = microtime_float();
  218. PDO_Connect(_FILE_DB_PALI_INDEX_);
  219. $query = "SELECT count(*) from (SELECT book FROM "._TABLE_WORD_." WHERE \"wordindex\" in $strQueryWordId $strQueryBookId group by book,paragraph) as qr ";
  220. $result["record_count"] = PDO_FetchOne($query);
  221. $result["time"][] = array("event" => "查询记录数", "time" => microtime(true) - $_start);
  222. $query = "SELECT book,paragraph, sum(weight) as wt FROM "._TABLE_WORD_." WHERE \"wordindex\" in $strQueryWordId $strQueryBookId GROUP BY book,paragraph ORDER BY wt DESC LIMIT ? OFFSET ?";
  223. $Fetch = PDO_FetchAll($query,array($_pagesize , $_page * $_pagesize));
  224. $result["time"][] = array("event" => "查询结束", "time" => microtime(true) - $_start);
  225. $out_data = array();
  226. $queryTime = (microtime_float() - $time_start) * 1000;
  227. $iFetch = count($Fetch);
  228. if ($iFetch > 0) {
  229. PDO_Connect(_FILE_DB_PALITEXT_);
  230. for ($i = 0; $i < $iFetch; $i++) {
  231. $newRecode = array();
  232. $paliword = array();
  233. foreach ($arrQueryId as $value) {
  234. # code...
  235. $paliword[] = $aQueryWordList["{$value}"];
  236. }
  237. $book = $Fetch[$i]["book"];
  238. $paragraph = $Fetch[$i]["paragraph"];
  239. $bookInfo = _get_book_info($book);
  240. $bookname = $bookInfo->title;
  241. $c1 = $bookInfo->c1;
  242. $c2 = $bookInfo->c2;
  243. $c3 = $bookInfo->c3;
  244. $path_1 = $c1 . ">";
  245. if ($c2 !== "") {
  246. $path_1 = $path_1 . $c2 . ">";
  247. }
  248. if ($c3 !== "") {
  249. $path_1 = $path_1 . $c3 . ">";
  250. }
  251. $path_1 = $path_1 . "《{$bookname}》>";
  252. $query = "SELECT * from "._TABLE_PALI_TEXT_." where book = ? and paragraph = ? limit 1";
  253. $FetchPaliText = PDO_FetchAll($query,array($book,$paragraph));
  254. $countPaliText = count($FetchPaliText);
  255. if ($countPaliText > 0) {
  256. $path = "";
  257. $parent = $FetchPaliText[0]["parent"];
  258. $deep = 0;
  259. $sFirstParentTitle = "";
  260. //循环查找父标题 得到整条路径
  261. while ($parent > -1) {
  262. $query = "SELECT * from "._TABLE_PALI_TEXT_." where book = ? and paragraph = ? limit 1";
  263. $FetParent = PDO_FetchAll($query,array($book,$parent));
  264. $path = "{$FetParent[0]["toc"]}>{$path}";
  265. if ($sFirstParentTitle == "") {
  266. $sFirstParentTitle = $FetParent[0]["toc"];
  267. }
  268. $parent = $FetParent[0]["parent"];
  269. $deep++;
  270. if ($deep > 5) {
  271. break;
  272. }
  273. }
  274. $path = $path_1 . $path . "para. " . $paragraph;
  275. $newRecode["title"] = $sFirstParentTitle;
  276. $newRecode["path"] = $path;
  277. $newRecode["book"] = $book;
  278. $newRecode["para"] = $paragraph;
  279. $newRecode["palitext"] = $FetchPaliText[0]["html"];
  280. $newRecode["keyword"] = $paliword;
  281. $newRecode["wt"] = $Fetch[$i]["wt"];
  282. $out_data[] = $newRecode;
  283. }
  284. }
  285. }
  286. $result["time"][] = array("event" => "查询路径结束", "time" => microtime(true) - $_start);
  287. $result["data"] = $out_data;
  288. echo json_encode($result, JSON_UNESCAPED_UNICODE);