dict_find_auto.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. <?php
  2. require_once "../path.php";
  3. require_once "../public/_pdo.php";
  4. require_once "../public/function.php";
  5. require_once '../ucenter/setting_function.php';
  6. $user_setting = get_setting();
  7. if (isset($_GET["book"])) {
  8. $in_book = $_GET["book"];
  9. }
  10. if (isset($_GET["para"])) {
  11. $in_para = $_GET["para"];
  12. }
  13. $para_list = str_getcsv($in_para);
  14. $strQueryPara = "("; //单词查询字串
  15. foreach ($para_list as $para) {
  16. $strQueryPara .= "'{$para}',";
  17. }
  18. $strQueryPara = mb_substr($strQueryPara, 0, mb_strlen($strQueryPara, "UTF-8") - 1, "UTF-8");
  19. $strQueryPara .= ")";
  20. if (isset($_GET["debug"])) {
  21. $debug = true;
  22. } else {
  23. $debug = false;
  24. }
  25. function microtime_float()
  26. {
  27. list($usec, $sec) = explode(" ", microtime());
  28. return ((float) $usec + (float) $sec);
  29. }
  30. $time_start = microtime_float();
  31. //open database
  32. global $PDO;
  33. //查询单词表
  34. $db_file = _DIR_PALICANON_TEMPLET_ . "/p" . $in_book . "_tpl.db3";
  35. PDO_Connect("sqlite:{$db_file}");
  36. $query = "SELECT paragraph,wid,real FROM \"main\" WHERE (\"paragraph\" in " . $strQueryPara . " ) and \"real\"<>\"\" and \"type\"<>'.ctl.' ";
  37. if ($debug) {
  38. echo "filename:" . $db_file . "<br>";
  39. echo $query . "<br>";
  40. }
  41. $FetchAllWord = PDO_FetchAll($query);
  42. $iFetch = count($FetchAllWord);
  43. if ($iFetch == 0) {
  44. echo json_encode(array(), JSON_UNESCAPED_UNICODE);
  45. exit;
  46. }
  47. $voc_list = array();
  48. foreach ($FetchAllWord as $word) {
  49. $voc_list[$word["real"]] = 1;
  50. }
  51. if ($debug) {
  52. echo "单词表共计:" . count($voc_list) . "词<br>";
  53. }
  54. //查询单词表结束
  55. $word_list = array();
  56. foreach ($voc_list as $word => $value) {
  57. array_push($word_list, $word);
  58. }
  59. $lookup_loop = 2;
  60. $dict_word_spell = array();
  61. $output = array();
  62. $db_file_list = array();
  63. //用户词典
  64. array_push($db_file_list, array(_FILE_DB_WBW1_, " ORDER BY rowid DESC"));
  65. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/sys_regular.db", " ORDER BY confidence DESC"));
  66. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/sys_irregular.db", ""));
  67. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/union.db", ""));
  68. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/comp.db", ""));
  69. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/pm.db", ""));
  70. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/bhmf.db", ""));
  71. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/shuihan.db", ""));
  72. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/concise.db", ""));
  73. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/uhan_en.db", ""));
  74. for ($i = 0; $i < $lookup_loop; $i++) {
  75. $parent_list = array();
  76. $strQueryWord = "("; //单词查询字串
  77. foreach ($word_list as $word) {
  78. $word = str_replace("'", "’", $word);
  79. $strQueryWord .= "'{$word}',";
  80. }
  81. $strQueryWord = mb_substr($strQueryWord, 0, mb_strlen($strQueryWord, "UTF-8") - 1, "UTF-8");
  82. $strQueryWord .= ")";
  83. if ($debug) {
  84. echo "<h2>第{$i}轮查询:$strQueryWord</h2>";
  85. }
  86. foreach ($db_file_list as $db) {
  87. $db_file = $db[0];
  88. $db_sort = $db[1];
  89. if ($debug) {
  90. echo "dict:$db_file<br>";
  91. }
  92. PDO_Connect("sqlite:{$db_file}");
  93. PDO_Execute("PRAGMA synchronous = OFF");
  94. PDO_Execute("PRAGMA journal_mode = WAL");
  95. PDO_Execute("PRAGMA foreign_keys = ON");
  96. PDO_Execute("PRAGMA busy_timeout = 5000");
  97. $strOrderby = $db[1];
  98. if ($i == 0) {
  99. $query = "select * from dict where \"pali\" in {$strQueryWord} AND ( type <> '.n:base.' AND type <> '.ti:base.' AND type <> '.adj:base.' AND type <> '.pron:base.' AND type <> '.v:base.' AND type <> '.part.' ) " . $strOrderby;
  100. } else {
  101. $query = "select * from dict where \"pali\" in {$strQueryWord} " . $strOrderby;
  102. }
  103. if ($debug) {
  104. echo $query . "<br>";
  105. }
  106. try {
  107. $Fetch = PDO_FetchAll($query);
  108. } catch (Exception $e) {
  109. if ($debug) {
  110. echo 'Caught exception: ', $e->getMessage(), "\n";
  111. }
  112. continue;
  113. }
  114. $iFetch = count($Fetch);
  115. if ($debug) {
  116. echo "count:{$iFetch}<br>";
  117. }
  118. if ($iFetch > 0) {
  119. foreach ($Fetch as $one) {
  120. $id = $one["id"];
  121. if (isset($one["guid"])) {
  122. $guid = $one["guid"];
  123. } else {
  124. $guid = "";
  125. }
  126. if (isset($one["lang"])) {
  127. $language = $one["lang"];
  128. } else if (isset($one["language"])) {
  129. $language = $one["language"];
  130. } else {
  131. $language = "en";
  132. }
  133. $pali = $one["pali"];
  134. $dict_word_spell["{$pali}"] = 1;
  135. $type = $one["type"];
  136. $gramma = $one["gramma"];
  137. $parent = $one["parent"];
  138. if (inLangSetting($language, $user_setting["dict.lang"])) {
  139. $mean = $one["mean"];
  140. } else {
  141. $mean = "";
  142. }
  143. if (isset($one["note"])) {
  144. $note = $one["note"];
  145. } else {
  146. $note = "";
  147. }
  148. if (isset($one["parts"])) {
  149. $parts = $one["parts"];
  150. } else if (isset($one["factors"])) {
  151. $parts = $one["factors"];
  152. } else {
  153. $parts = "";
  154. }
  155. if (isset($one["partmean"])) {
  156. $partmean = $one["partmean"];
  157. } else if (isset($one["factormean"])) {
  158. $partmean = $one["factormean"];
  159. } else {
  160. $partmean = "";
  161. }
  162. if (inLangSetting($language, $user_setting["dict.lang"]) == false) {
  163. $partmean = "";
  164. }
  165. if (isset($one["part_id"])) {
  166. $part_id = $one["part_id"];
  167. } else {
  168. $part_id = "";
  169. }
  170. if (isset($one["status"])) {
  171. $status = $one["status"];
  172. } else {
  173. $status = "";
  174. }
  175. if (isset($one["dict_name"])) {
  176. $dict_name = $one["dict_name"];
  177. } else {
  178. $dict_name = "";
  179. }
  180. array_push($output, array(
  181. "id" => $id,
  182. "guid" => $guid,
  183. "pali" => $pali,
  184. "type" => $type,
  185. "gramma" => $gramma,
  186. "parent" => $parent,
  187. "mean" => $mean,
  188. "note" => $note,
  189. "parts" => $parts,
  190. "part_id" => $part_id,
  191. "partmean" => $partmean,
  192. "status" => $status,
  193. "dict_name" => $dict_name,
  194. "language" => $language,
  195. ));
  196. if (!empty($parent)) {
  197. if ($pali != $parent) {
  198. $parent_list[$one["parent"]] = 1;
  199. }
  200. }
  201. if ($type != "part") {
  202. if (isset($one["factors"])) {
  203. $parts = str_getcsv($one["factors"], '+');
  204. foreach ($parts as $x) {
  205. if (!empty($x)) {
  206. if ($x != $pali) {
  207. $parent_list[$x] = 1;
  208. }
  209. }
  210. }
  211. }
  212. }
  213. }
  214. }
  215. $PDO = null;
  216. }
  217. /*
  218. if($i==0){
  219. //自动查找单词词干
  220. $word_base=getPaliWordBase($in_word);
  221. foreach($word_base as $x=>$infolist){
  222. foreach($infolist as $gramma){
  223. array_push($output,
  224. array("pali"=>$in_word,
  225. "type"=>$gramma["type"],
  226. "gramma"=>$gramma["gramma"],
  227. "mean"=>"",
  228. "parent"=>$x,
  229. "parts"=>$gramma["parts"],
  230. "partmean"=>"",
  231. "language"=>"en",
  232. "dict_name"=>"auto",
  233. "status"=>128
  234. ));
  235. $part_list=str_getcsv($gramma["parts"],"+");
  236. foreach($part_list as $part){
  237. $parent_list[$part]=1;
  238. }
  239. }
  240. }
  241. }
  242. */
  243. if ($debug) {
  244. echo "parent:" . count($parent_list) . "<br>";
  245. //print_r($parent_list)."<br>";
  246. }
  247. if (count($parent_list) == 0) {
  248. break;
  249. } else {
  250. $word_list = array();
  251. foreach ($parent_list as $x => $value) {
  252. array_push($word_list, $x);
  253. }
  254. }
  255. }
  256. //查询结束
  257. //删除无效数据
  258. $newOutput = array();
  259. foreach ($output as $value) {
  260. if ($value["dict_name"] == "auto") {
  261. if (isset($dict_word_spell["{$value["parent"]}"])) {
  262. array_push($newOutput, $value);
  263. }
  264. } else {
  265. array_push($newOutput, $value);
  266. }
  267. }
  268. if ($debug) {
  269. echo "<textarea width=\"100%\" >";
  270. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  271. echo "</textarea>";
  272. }
  273. if ($debug) {
  274. echo "生成:" . count($output) . "<br>";
  275. echo "有效:" . count($newOutput) . "<br>";
  276. }
  277. //开始匹配
  278. $counter = 0;
  279. $output = array();
  280. foreach ($FetchAllWord as $word) {
  281. $pali = $word["real"];
  282. $type = "";
  283. $gramma = "";
  284. $mean = "";
  285. $parent = "";
  286. $parts = "";
  287. $partmean = "";
  288. foreach ($newOutput as $dictword) {
  289. if ($dictword["pali"] == $pali) {
  290. if ($type == "" && $gramma == "") {
  291. $type = $dictword["type"];
  292. $gramma = $dictword["gramma"];
  293. }
  294. if (trim($mean) == "") {
  295. $mean = str_getcsv($dictword["mean"], "$")[0];
  296. }
  297. if ($parent == "") {
  298. $parent = $dictword["parent"];
  299. }
  300. if ($parts == "") {
  301. $parts = $dictword["parts"];
  302. }
  303. if ($partmean == "") {
  304. $partmean = $dictword["partmean"];
  305. }
  306. }
  307. }
  308. if ($mean == "" && $parent != "") {
  309. foreach ($newOutput as $parentword) {
  310. if ($parentword["pali"] == $parent) {
  311. if ($parentword["mean"] != "") {
  312. $mean = trim(str_getcsv($parentword["mean"], "$")[0]);
  313. if ($mean != "") {
  314. break;
  315. }
  316. }
  317. }
  318. }
  319. }
  320. if ($type != "" ||
  321. $gramma != "" ||
  322. $mean != "" ||
  323. $parent != "" ||
  324. $parts != "" ||
  325. $partmean != "") {
  326. $counter++;
  327. }
  328. array_push($output,
  329. array("book" => $in_book,
  330. "paragraph" => $word["paragraph"],
  331. "num" => $word["wid"],
  332. "pali" => $word["real"],
  333. "type" => $type,
  334. "gramma" => $gramma,
  335. "mean" => $mean,
  336. "parent" => $parent,
  337. "parts" => $parts,
  338. "partmean" => $partmean,
  339. "status" => 3,
  340. ));
  341. }
  342. if ($debug) {
  343. echo "<textarea width=\"100%\" >";
  344. }
  345. echo json_encode($output, JSON_UNESCAPED_UNICODE);
  346. if ($debug) {
  347. echo "</textarea>";
  348. }
  349. if ($debug) {
  350. echo "匹配" . (($counter / count($FetchAllWord)) * 100) . "<br>";
  351. foreach ($output as $result) {
  352. //echo "{$result["pali"]}-{$result["mean"]}-{$result["parent"]}<br>";
  353. }
  354. $queryTime = (microtime_float() - $time_start) * 1000;
  355. echo "<div >搜索时间:$queryTime 毫秒</div>";
  356. }