dict_find_auto.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. <?php
  2. require_once "../path.php";
  3. require_once "../public/_pdo.php";
  4. require_once "../public/function.php";
  5. require_once '../ucenter/setting_function.php';
  6. global $error;
  7. $error = array();
  8. set_error_handler(function(int $number, string $message) {
  9. global $error;
  10. $error[] = "Handler captured error $number: '$message'" . PHP_EOL ;
  11. });
  12. $user_setting = get_setting();
  13. if (isset($_GET["book"])) {
  14. $in_book = $_GET["book"];
  15. }
  16. if (isset($_GET["para"])) {
  17. $in_para = $_GET["para"];
  18. }
  19. $para_list = str_getcsv($in_para);
  20. $strQueryPara = "("; //单词查询字串
  21. foreach ($para_list as $para) {
  22. $strQueryPara .= "'{$para}',";
  23. }
  24. $strQueryPara = mb_substr($strQueryPara, 0, mb_strlen($strQueryPara, "UTF-8") - 1, "UTF-8");
  25. $strQueryPara .= ")";
  26. if (isset($_GET["debug"])) {
  27. $debug = true;
  28. } else {
  29. $debug = false;
  30. }
  31. function microtime_float()
  32. {
  33. list($usec, $sec) = explode(" ", microtime());
  34. return ((float) $usec + (float) $sec);
  35. }
  36. $time_start = microtime_float();
  37. //open database
  38. global $PDO;
  39. //查询单词表
  40. $db_file = _DIR_PALICANON_TEMPLET_ . "/p" . $in_book . "_tpl.db3";
  41. PDO_Connect("sqlite:{$db_file}");
  42. $query = "SELECT paragraph,wid,real FROM \"main\" WHERE (\"paragraph\" in " . $strQueryPara . " ) and \"real\"<>\"\" and \"type\"<>'.ctl.' ";
  43. if ($debug) {
  44. echo "filename:" . $db_file . "<br>";
  45. echo $query . "<br>";
  46. }
  47. $FetchAllWord = PDO_FetchAll($query);
  48. $iFetch = count($FetchAllWord);
  49. if ($iFetch == 0) {
  50. echo json_encode(array(), JSON_UNESCAPED_UNICODE);
  51. exit;
  52. }
  53. $voc_list = array();
  54. foreach ($FetchAllWord as $word) {
  55. $voc_list[$word["real"]] = 1;
  56. }
  57. if ($debug) {
  58. echo "单词表共计:" . count($voc_list) . "词<br>";
  59. }
  60. //查询单词表结束
  61. $word_list = array();
  62. foreach ($voc_list as $word => $value) {
  63. array_push($word_list, $word);
  64. }
  65. $lookup_loop = 2;
  66. $dict_word_spell = array();
  67. $output = array();
  68. $db_file_list = array();
  69. //用户词典
  70. array_push($db_file_list, array(_FILE_DB_WBW1_, " ORDER BY rowid DESC"));
  71. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/sys_regular.db", " ORDER BY confidence DESC"));
  72. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/sys_irregular.db", ""));
  73. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/union.db", ""));
  74. array_push($db_file_list, array(_DIR_DICT_SYSTEM_ . "/comp.db", ""));
  75. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/pm.db", ""));
  76. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/bhmf.db", ""));
  77. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/shuihan.db", ""));
  78. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/concise.db", ""));
  79. array_push($db_file_list, array(_DIR_DICT_3RD_ . "/uhan_en.db", ""));
  80. $_dict_db = array();
  81. foreach ($db_file_list as $db_file) {
  82. try {
  83. $dbh = new PDO("sqlite:" . $db_file[0], "", "");
  84. $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  85. $dbh->query("PRAGMA synchronous = OFF");
  86. $dbh->query("PRAGMA journal_mode = WAL");
  87. $dbh->query("PRAGMA foreign_keys = ON");
  88. $dbh->query("PRAGMA busy_timeout = 5000");
  89. $_dict_db[] = array("file" => $db_file, "dbh" => $dbh);
  90. } catch (PDOException $e) {
  91. if ($debug) {
  92. print "Error!: " . $e->getMessage() . "<br/>";
  93. }
  94. }
  95. }
  96. for ($i = 0; $i < $lookup_loop; $i++) {
  97. $parent_list = array();
  98. $strQueryWord = "("; //单词查询字串
  99. foreach ($word_list as $word) {
  100. $word = str_replace("'", "’", $word);
  101. $strQueryWord .= "'{$word}',";
  102. }
  103. $strQueryWord = mb_substr($strQueryWord, 0, mb_strlen($strQueryWord, "UTF-8") - 1, "UTF-8");
  104. $strQueryWord .= ")";
  105. if ($debug) {
  106. echo "<h2>第{$i}轮查询:$strQueryWord</h2>";
  107. }
  108. foreach ($_dict_db as $db) {
  109. $db_file = $db["file"][0];
  110. $db_sort = $db["file"][1];
  111. if ($debug) {
  112. echo "dict:$db_file<br>";
  113. }
  114. $strOrderby = $db["file"][1];
  115. if ($i == 0) {
  116. $query = "select * from dict where \"pali\" in {$strQueryWord} AND ( type <> '.n:base.' AND type <> '.ti:base.' AND type <> '.adj:base.' AND type <> '.pron:base.' AND type <> '.v:base.' AND type <> '.part.' ) " . $strOrderby;
  117. } else {
  118. $query = "select * from dict where \"pali\" in {$strQueryWord} " . $strOrderby;
  119. }
  120. if ($debug) {
  121. echo $query . "<br>";
  122. }
  123. try {
  124. //$Fetch = PDO_FetchAll($query);
  125. $stmt = $db["dbh"]->query($query);
  126. if ($stmt) {
  127. $Fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  128. } else {
  129. $Fetch = array();
  130. if ($debug) {
  131. echo "无效的Statement句柄";
  132. }
  133. }
  134. } catch (Exception $e) {
  135. if ($debug) {
  136. echo 'Caught exception: ', $e->getMessage(), "\n";
  137. }
  138. continue;
  139. }
  140. $iFetch = count($Fetch);
  141. if ($debug) {
  142. echo "count:{$iFetch}<br>";
  143. }
  144. if ($iFetch > 0) {
  145. foreach ($Fetch as $one) {
  146. $id = $one["id"];
  147. if (isset($one["guid"])) {
  148. $guid = $one["guid"];
  149. } else {
  150. $guid = "";
  151. }
  152. if (isset($one["lang"])) {
  153. $language = $one["lang"];
  154. } else if (isset($one["language"])) {
  155. $language = $one["language"];
  156. } else {
  157. $language = "en";
  158. }
  159. $pali = $one["pali"];
  160. $dict_word_spell["{$pali}"] = 1;
  161. $type = $one["type"];
  162. $gramma = $one["gramma"];
  163. $parent = $one["parent"];
  164. if (inLangSetting($language, $user_setting["dict.lang"])) {
  165. $mean = $one["mean"];
  166. } else {
  167. $mean = "";
  168. }
  169. if (isset($one["note"])) {
  170. $note = $one["note"];
  171. } else {
  172. $note = "";
  173. }
  174. if (isset($one["parts"])) {
  175. $parts = $one["parts"];
  176. } else if (isset($one["factors"])) {
  177. $parts = $one["factors"];
  178. } else {
  179. $parts = "";
  180. }
  181. if (isset($one["partmean"])) {
  182. $partmean = $one["partmean"];
  183. } else if (isset($one["factormean"])) {
  184. $partmean = $one["factormean"];
  185. } else {
  186. $partmean = "";
  187. }
  188. if (inLangSetting($language, $user_setting["dict.lang"]) == false) {
  189. $partmean = "";
  190. }
  191. if (isset($one["part_id"])) {
  192. $part_id = $one["part_id"];
  193. } else {
  194. $part_id = "";
  195. }
  196. if (isset($one["status"])) {
  197. $status = $one["status"];
  198. } else {
  199. $status = "";
  200. }
  201. if (isset($one["dict_name"])) {
  202. $dict_name = $one["dict_name"];
  203. } else {
  204. $dict_name = "";
  205. }
  206. array_push($output, array(
  207. "id" => $id,
  208. "guid" => $guid,
  209. "pali" => $pali,
  210. "type" => $type,
  211. "gramma" => $gramma,
  212. "parent" => $parent,
  213. "mean" => $mean,
  214. "note" => $note,
  215. "parts" => $parts,
  216. "part_id" => $part_id,
  217. "partmean" => $partmean,
  218. "status" => $status,
  219. "dict_name" => $dict_name,
  220. "language" => $language,
  221. ));
  222. if (!empty($parent)) {
  223. if ($pali != $parent) {
  224. $parent_list[$one["parent"]] = 1;
  225. }
  226. }
  227. if ($type != "part") {
  228. if (isset($one["factors"])) {
  229. $parts = str_getcsv($one["factors"], '+');
  230. foreach ($parts as $x) {
  231. if (!empty($x)) {
  232. if ($x != $pali) {
  233. $parent_list[$x] = 1;
  234. }
  235. }
  236. }
  237. }
  238. }
  239. }
  240. }
  241. $PDO = null;
  242. }
  243. /*
  244. if($i==0){
  245. //自动查找单词词干
  246. $word_base=getPaliWordBase($in_word);
  247. foreach($word_base as $x=>$infolist){
  248. foreach($infolist as $gramma){
  249. array_push($output,
  250. array("pali"=>$in_word,
  251. "type"=>$gramma["type"],
  252. "gramma"=>$gramma["gramma"],
  253. "mean"=>"",
  254. "parent"=>$x,
  255. "parts"=>$gramma["parts"],
  256. "partmean"=>"",
  257. "language"=>"en",
  258. "dict_name"=>"auto",
  259. "status"=>128
  260. ));
  261. $part_list=str_getcsv($gramma["parts"],"+");
  262. foreach($part_list as $part){
  263. $parent_list[$part]=1;
  264. }
  265. }
  266. }
  267. }
  268. */
  269. if ($debug) {
  270. echo "parent:" . count($parent_list) . "<br>";
  271. //print_r($parent_list)."<br>";
  272. }
  273. if (count($parent_list) == 0) {
  274. break;
  275. } else {
  276. $word_list = array();
  277. foreach ($parent_list as $x => $value) {
  278. array_push($word_list, $x);
  279. }
  280. }
  281. }
  282. //查询结束
  283. //删除无效数据
  284. $newOutput = array();
  285. foreach ($output as $value) {
  286. if ($value["dict_name"] == "auto") {
  287. if (isset($dict_word_spell["{$value["parent"]}"])) {
  288. array_push($newOutput, $value);
  289. }
  290. } else {
  291. array_push($newOutput, $value);
  292. }
  293. }
  294. if ($debug) {
  295. echo "<textarea width=\"100%\" >";
  296. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  297. echo "</textarea>";
  298. }
  299. if ($debug) {
  300. echo "生成:" . count($output) . "<br>";
  301. echo "有效:" . count($newOutput) . "<br>";
  302. }
  303. //开始匹配
  304. $counter = 0;
  305. $output = array();
  306. foreach ($FetchAllWord as $word) {
  307. $pali = $word["real"];
  308. $type = "";
  309. $gramma = "";
  310. $mean = "";
  311. $parent = "";
  312. $parts = "";
  313. $partmean = "";
  314. foreach ($newOutput as $dictword) {
  315. if ($dictword["pali"] == $pali) {
  316. if ($type == "" && $gramma == "") {
  317. $type = $dictword["type"];
  318. $gramma = $dictword["gramma"];
  319. }
  320. if (trim($mean) == "") {
  321. $mean = str_getcsv($dictword["mean"], "$")[0];
  322. }
  323. if ($parent == "") {
  324. $parent = $dictword["parent"];
  325. }
  326. if ($parts == "") {
  327. $parts = $dictword["parts"];
  328. }
  329. if ($partmean == "") {
  330. $partmean = $dictword["partmean"];
  331. }
  332. }
  333. }
  334. if ($mean == "" && $parent != "") {
  335. foreach ($newOutput as $parentword) {
  336. if ($parentword["pali"] == $parent) {
  337. if ($parentword["mean"] != "") {
  338. $mean = trim(str_getcsv($parentword["mean"], "$")[0]);
  339. if ($mean != "") {
  340. break;
  341. }
  342. }
  343. }
  344. }
  345. }
  346. if ($type != "" ||
  347. $gramma != "" ||
  348. $mean != "" ||
  349. $parent != "" ||
  350. $parts != "" ||
  351. $partmean != "") {
  352. $counter++;
  353. }
  354. array_push($output,
  355. array("book" => $in_book,
  356. "paragraph" => $word["paragraph"],
  357. "num" => $word["wid"],
  358. "pali" => $word["real"],
  359. "type" => $type,
  360. "gramma" => $gramma,
  361. "mean" => $mean,
  362. "parent" => $parent,
  363. "parts" => $parts,
  364. "partmean" => $partmean,
  365. "status" => 3,
  366. ));
  367. }
  368. if ($debug) {
  369. echo "<textarea width=\"100%\" >";
  370. }
  371. echo json_encode($output, JSON_UNESCAPED_UNICODE);
  372. if ($debug) {
  373. echo "</textarea>";
  374. }
  375. if ($debug) {
  376. echo "匹配" . (($counter / count($FetchAllWord)) * 100) . "<br>";
  377. foreach ($output as $result) {
  378. //echo "{$result["pali"]}-{$result["mean"]}-{$result["parent"]}<br>";
  379. }
  380. $queryTime = (microtime_float() - $time_start) * 1000;
  381. echo "<div >搜索时间:$queryTime 毫秒</div>";
  382. }