dict_find_auto.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. <?php
  2. include("../log/pref_log.php");
  3. require_once "../config.php";
  4. require_once "../public/_pdo.php";
  5. require_once "../public/function.php";
  6. require_once '../ucenter/setting_function.php';
  7. require_once "../redis/function.php";
  8. $redis = redis_connect();
  9. global $error;
  10. $error = array();
  11. set_error_handler(function(int $number, string $message) {
  12. global $error;
  13. $error[] = "Handler captured error $number: '$message'" . PHP_EOL ;
  14. });
  15. $user_setting = get_setting();
  16. if (isset($_GET["book"])) {
  17. $in_book = (int)$_GET["book"];
  18. }
  19. if (isset($_GET["para"])) {
  20. $in_para = (int)$_GET["para"];
  21. }
  22. $para_list = str_getcsv($in_para);
  23. $strQueryPara = "("; //单词查询字串
  24. foreach ($para_list as $para) {
  25. $strQueryPara .= "'{$para}',";
  26. }
  27. $strQueryPara = mb_substr($strQueryPara, 0, mb_strlen($strQueryPara, "UTF-8") - 1, "UTF-8");
  28. $strQueryPara .= ")";
  29. if (isset($_GET["debug"])) {
  30. $debug = true;
  31. } else {
  32. $debug = false;
  33. }
  34. function microtime_float()
  35. {
  36. list($usec, $sec) = explode(" ", microtime());
  37. return ((float) $usec + (float) $sec);
  38. }
  39. $time_start = microtime_float();
  40. //open database
  41. global $PDO;
  42. //查询单词表
  43. $db_file = _DIR_PALICANON_TEMPLET_ . "/p" . $in_book . "_tpl.db3";
  44. PDO_Connect(_FILE_DB_PALICANON_TEMPLET_);
  45. $query = "SELECT paragraph,wid,real FROM "._TABLE_PALICANON_TEMPLET_." WHERE ( book = ".$PDO->quote($in_book)." AND paragraph in " . $strQueryPara . " ) and real <> '' and type <> '.ctl.' ";
  46. if ($debug) {
  47. echo $query . "<br>";
  48. }
  49. $FetchAllWord = PDO_FetchAll($query);
  50. $iFetch = count($FetchAllWord);
  51. if ($iFetch == 0) {
  52. echo json_encode(array(), JSON_UNESCAPED_UNICODE);
  53. exit;
  54. }
  55. $voc_list = array();
  56. foreach ($FetchAllWord as $word) {
  57. $voc_list[$word["real"]] = 1;
  58. }
  59. if ($debug) {
  60. echo "单词表共计:" . count($voc_list) . "词<br>";
  61. }
  62. //查询单词表结束
  63. $word_list = array();
  64. foreach ($voc_list as $word => $value) {
  65. array_push($word_list, $word);
  66. }
  67. $lookup_loop = 2;
  68. $dict_word_spell = array();
  69. $output = array();
  70. $db_file_list = array();
  71. //字典列表
  72. /*
  73. array_push($db_file_list, array(_FILE_DB_WBW1_, " ORDER BY rowid DESC"));
  74. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/sys_regular.db", " ORDER BY confidence DESC"));
  75. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/sys_irregular.db", ""));
  76. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/union.db", ""));
  77. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/comp.db", ""));
  78. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/pm.db", ""));
  79. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/bhmf.db", ""));
  80. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/shuihan.db", ""));
  81. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/concise.db", ""));
  82. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/uhan_en.db", ""));
  83. */
  84. $db_file_list[] = array("","wbwdict://new/".$_COOKIE["userid"],true);
  85. $db_file_list[] = array(_FILE_DB_TERM_,"dict://term",true);
  86. $db_file_list[] = array(_FILE_DB_WBW1_,Redis["prefix"]."dict/user",true);
  87. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/sys_regular.db","dict://regular",true);
  88. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/sys_irregular.db","dict://irregular",true);
  89. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/union.db","dict://union",true);
  90. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/comp.db","dict://comp",true);
  91. $db_file_list[] = array( _DIR_DICT_3RD_ . "/pm.db","dict://pm",true);
  92. $db_file_list[] = array( _DIR_DICT_3RD_ . "/bhmf.db","dict://bhmf",true);
  93. $db_file_list[] = array( _DIR_DICT_3RD_ . "/shuihan.db","dict://shuihan",true);
  94. $db_file_list[] = array( _DIR_DICT_3RD_ . "/concise.db","dict://concise",true);
  95. $db_file_list[] = array( _DIR_DICT_3RD_ . "/uhan_en.db","dict://uhan_en",true);
  96. $_dict_db = array();
  97. foreach ($db_file_list as $db_file) {
  98. try {
  99. if ($redis && !empty($db_file[1])) {
  100. $dbh=null;
  101. }
  102. else{
  103. $dbh = new PDO("sqlite:" . $db_file[0], "", "");
  104. $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  105. $dbh->query("PRAGMA synchronous = OFF");
  106. $dbh->query("PRAGMA journal_mode = WAL");
  107. $dbh->query("PRAGMA foreign_keys = ON");
  108. $dbh->query("PRAGMA busy_timeout = 5000");
  109. }
  110. $_dict_db[] = array("file" => $db_file[0], "dbh" => $dbh,"redis"=>$db_file[1],"static"=>$db_file[2]);
  111. } catch (PDOException $e) {
  112. if ($debug) {
  113. print "Error!: " . $e->getMessage() . "<br/>";
  114. }
  115. }
  116. }
  117. $lookuped=array();
  118. for ($i = 0; $i < $lookup_loop; $i++) {
  119. $parent_list = array();
  120. # 记录已经查过的词,下次就不查了
  121. $newWordList = array();
  122. foreach ($word_list as $lsWord) {
  123. if(!isset($lookuped[$lsWord]) && !empty($lsWord)){
  124. $newWordList[]=$lsWord;
  125. $lookuped[$lsWord]=1;
  126. }
  127. }
  128. if(count($newWordList)==0){
  129. break;
  130. }
  131. $word_list = $newWordList;
  132. # 记录已经查过的词结束
  133. $strQueryWord = "("; //单词查询字串
  134. foreach ($word_list as $word) {
  135. $word = str_replace("'", "’", $word);
  136. $strQueryWord .= "'{$word}',";
  137. }
  138. $strQueryWord = mb_substr($strQueryWord, 0, mb_strlen($strQueryWord, "UTF-8") - 1, "UTF-8");
  139. $strQueryWord .= ")";
  140. if ($debug) {
  141. echo "<h2>第{$i}轮查询:$strQueryWord</h2>";
  142. }
  143. foreach ($_dict_db as $db) {
  144. $db_file = $db["file"];
  145. if ($debug) {
  146. echo "dict:$db_file<br>";
  147. }
  148. $strOrderby = $db["file"][1];
  149. if ($i == 0) {
  150. $query = "select * from dict where pali in {$strQueryWord} AND ( type <> '.n:base.' AND type <> '.ti:base.' AND type <> '.adj:base.' AND type <> '.pron:base.' AND type <> '.v:base.' AND type <> '.part.' ) " . $strOrderby;
  151. } else {
  152. $query = "select * from dict where pali in {$strQueryWord} " . $strOrderby;
  153. }
  154. if ($debug) {
  155. echo $query . "<br>";
  156. }
  157. $Fetch = array();
  158. if ($redis && !empty($db["redis"])) {
  159. if ($debug) {
  160. echo "<spen style='color:green;'>redis</spen>:{$db["redis"]}<br>";
  161. }
  162. foreach ($word_list as $word) {
  163. $wordData = $redis->hGet($db["redis"],$word);
  164. if($wordData){
  165. if(!empty($wordData)){
  166. $arrWord = json_decode($wordData,true);
  167. foreach ($arrWord as $one) {
  168. # code...
  169. if(count($one)==14){
  170. $Fetch[] = array("id"=>$one[0],
  171. "pali"=>$one[1],
  172. "type"=>$one[2],
  173. "gramma"=>$one[3],
  174. "parent"=>$one[4],
  175. "mean"=>$one[5],
  176. "note"=>$one[6],
  177. "parts"=>$one[7],
  178. "partmean"=>$one[8],
  179. "status"=>$one[9],
  180. "confidence"=>$one[10],
  181. "dict_name"=>$one[12],
  182. "lang"=>$one[13]
  183. );
  184. }
  185. else{
  186. $Fetch[] = array("id"=>$one[0],
  187. "pali"=>$one[1],
  188. "type"=>$one[2],
  189. "gramma"=>$one[3],
  190. "parent"=>$one[4],
  191. "mean"=>$one[5],
  192. "note"=>$one[6],
  193. "parts"=>$one[7],
  194. "partmean"=>"",
  195. "status"=>$one[8],
  196. "confidence"=>$one[9],
  197. "dict_name"=>$one[10],
  198. "lang"=>$one[12]
  199. );
  200. }
  201. }
  202. }
  203. }
  204. else{
  205. # 没找到就不找了
  206. }
  207. }
  208. }
  209. else{
  210. try {
  211. //$Fetch = PDO_FetchAll($query);
  212. $stmt = $db["dbh"]->query($query);
  213. if ($stmt) {
  214. $Fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  215. } else {
  216. $Fetch = array();
  217. if ($debug) {
  218. echo "无效的Statement句柄";
  219. }
  220. }
  221. } catch (Exception $e) {
  222. if ($debug) {
  223. echo 'Caught exception: ', $e->getMessage(), "\n";
  224. }
  225. continue;
  226. }
  227. }
  228. $iFetch = count($Fetch);
  229. if ($debug) {
  230. echo "count:{$iFetch}<br>";
  231. }
  232. if ($iFetch > 0) {
  233. foreach ($Fetch as $one) {
  234. $id = $one["id"];
  235. if (isset($one["guid"])) {
  236. $guid = $one["guid"];
  237. } else {
  238. $guid = "";
  239. }
  240. if (isset($one["lang"])) {
  241. $language = $one["lang"];
  242. } else if (isset($one["language"])) {
  243. $language = $one["language"];
  244. } else {
  245. $language = "en";
  246. }
  247. $pali = $one["pali"];
  248. $dict_word_spell["{$pali}"] = 1;
  249. $type = $one["type"];
  250. $gramma = $one["gramma"];
  251. $parent = $one["parent"];
  252. if (inLangSetting($language, $user_setting["dict.lang"])) {
  253. $mean = $one["mean"];
  254. } else {
  255. $mean = "";
  256. }
  257. if (isset($one["note"])) {
  258. $note = $one["note"];
  259. } else {
  260. $note = "";
  261. }
  262. if (isset($one["parts"])) {
  263. $parts = $one["parts"];
  264. } else if (isset($one["factors"])) {
  265. $parts = $one["factors"];
  266. } else {
  267. $parts = "";
  268. }
  269. if (isset($one["partmean"])) {
  270. $partmean = $one["partmean"];
  271. } else if (isset($one["factormean"])) {
  272. $partmean = $one["factormean"];
  273. } else {
  274. $partmean = "";
  275. }
  276. if (inLangSetting($language, $user_setting["dict.lang"]) == false) {
  277. $partmean = "";
  278. }
  279. if (isset($one["part_id"])) {
  280. $part_id = $one["part_id"];
  281. } else {
  282. $part_id = "";
  283. }
  284. if (isset($one["status"])) {
  285. $status = $one["status"];
  286. } else {
  287. $status = "";
  288. }
  289. if (isset($one["dict_name"])) {
  290. $dict_name = $one["dict_name"];
  291. } else {
  292. $dict_name = "";
  293. }
  294. array_push($output, array(
  295. "id" => $id,
  296. "guid" => $guid,
  297. "pali" => $pali,
  298. "type" => $type,
  299. "gramma" => $gramma,
  300. "parent" => $parent,
  301. "mean" => $mean,
  302. "note" => $note,
  303. "parts" => $parts,
  304. "part_id" => $part_id,
  305. "partmean" => $partmean,
  306. "status" => $status,
  307. "dict_name" => $dict_name,
  308. "language" => $language,
  309. ));
  310. if (!empty($parent)) {
  311. if ($pali != $parent) {
  312. $parent_list[$one["parent"]] = 1;
  313. }
  314. }
  315. if ($type != "part") {
  316. if (isset($one["factors"])) {
  317. $parts = str_getcsv($one["factors"], '+');
  318. foreach ($parts as $x) {
  319. if (!empty($x)) {
  320. if ($x != $pali) {
  321. $parent_list[$x] = 1;
  322. }
  323. }
  324. }
  325. }
  326. }
  327. }
  328. }
  329. $PDO = null;
  330. }
  331. /*
  332. if($i==0){
  333. //自动查找单词词干
  334. $word_base=getPaliWordBase($in_word);
  335. foreach($word_base as $x=>$infolist){
  336. foreach($infolist as $gramma){
  337. array_push($output,
  338. array("pali"=>$in_word,
  339. "type"=>$gramma["type"],
  340. "gramma"=>$gramma["gramma"],
  341. "mean"=>"",
  342. "parent"=>$x,
  343. "parts"=>$gramma["parts"],
  344. "partmean"=>"",
  345. "language"=>"en",
  346. "dict_name"=>"auto",
  347. "status"=>128
  348. ));
  349. $part_list=str_getcsv($gramma["parts"],"+");
  350. foreach($part_list as $part){
  351. $parent_list[$part]=1;
  352. }
  353. }
  354. }
  355. }
  356. */
  357. if ($debug) {
  358. echo "parent:" . count($parent_list) . "<br>";
  359. //print_r($parent_list)."<br>";
  360. }
  361. if (count($parent_list) == 0) {
  362. break;
  363. } else {
  364. $word_list = array();
  365. foreach ($parent_list as $x => $value) {
  366. array_push($word_list, $x);
  367. }
  368. }
  369. }
  370. //查询结束
  371. //删除无效数据
  372. $newOutput = array();
  373. foreach ($output as $value) {
  374. if ($value["dict_name"] == "auto") {
  375. if (isset($dict_word_spell["{$value["parent"]}"])) {
  376. array_push($newOutput, $value);
  377. }
  378. } else {
  379. array_push($newOutput, $value);
  380. }
  381. }
  382. if ($debug) {
  383. echo "<textarea width=\"100%\" >";
  384. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  385. echo "</textarea>";
  386. }
  387. if ($debug) {
  388. echo "生成:" . count($output) . "<br>";
  389. echo "有效:" . count($newOutput) . "<br>";
  390. }
  391. //开始匹配
  392. $counter = 0;
  393. $output = array();
  394. foreach ($FetchAllWord as $word) {
  395. $pali = $word["real"];
  396. $type = "";
  397. $gramma = "";
  398. $mean = "";
  399. $parent = "";
  400. $parts = "";
  401. $partmean = "";
  402. foreach ($newOutput as $dictword) {
  403. if ($dictword["pali"] == $pali) {
  404. if ($type == "" && $gramma == "") {
  405. $type = $dictword["type"];
  406. $gramma = $dictword["gramma"];
  407. }
  408. if (trim($mean) == "") {
  409. $mean = str_getcsv($dictword["mean"], "$")[0];
  410. }
  411. if ($parent == "") {
  412. $parent = $dictword["parent"];
  413. }
  414. if ($parts == "") {
  415. $parts = $dictword["parts"];
  416. }
  417. if ($partmean == "") {
  418. $partmean = $dictword["partmean"];
  419. }
  420. }
  421. }
  422. if ($mean == "" && $parent != "") {
  423. foreach ($newOutput as $parentword) {
  424. if ($parentword["pali"] == $parent) {
  425. if ($parentword["mean"] != "") {
  426. $mean = trim(str_getcsv($parentword["mean"], "$")[0]);
  427. if ($mean != "") {
  428. break;
  429. }
  430. }
  431. }
  432. }
  433. }
  434. if ($type != "" ||
  435. $gramma != "" ||
  436. $mean != "" ||
  437. $parent != "" ||
  438. $parts != "" ||
  439. $partmean != "") {
  440. $counter++;
  441. }
  442. array_push($output,
  443. array("book" => $in_book,
  444. "paragraph" => $word["paragraph"],
  445. "num" => $word["wid"],
  446. "pali" => $word["real"],
  447. "type" => $type,
  448. "gramma" => $gramma,
  449. "mean" => $mean,
  450. "parent" => $parent,
  451. "parts" => $parts,
  452. "partmean" => $partmean,
  453. "status" => 3,
  454. ));
  455. }
  456. if ($debug) {
  457. echo "<textarea width=\"100%\" >";
  458. }
  459. echo json_encode($output, JSON_UNESCAPED_UNICODE);
  460. if ($debug) {
  461. echo "</textarea>";
  462. }
  463. if ($debug) {
  464. echo "匹配" . (($counter / count($FetchAllWord)) * 100) . "<br>";
  465. foreach ($output as $result) {
  466. //echo "{$result["pali"]}-{$result["mean"]}-{$result["parent"]}<br>";
  467. }
  468. $queryTime = (microtime_float() - $time_start) * 1000;
  469. echo "<div >搜索时间:$queryTime 毫秒</div>";
  470. }
  471. PrefLog();