dict_find_auto.php 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. <?php
  2. include "../path.php";
  3. include "../public/_pdo.php";
  4. include "../public/function.php";
  5. if(isset($_GET["book"])){
  6. $in_book=$_GET["book"];
  7. }
  8. if(isset($_GET["para"])){
  9. $in_para=$_GET["para"];
  10. }
  11. $para_list=str_getcsv($in_para);
  12. $strQueryPara="(";//单词查询字串
  13. foreach($para_list as $para){
  14. $strQueryPara.="'{$para}',";
  15. }
  16. $strQueryPara=mb_substr($strQueryPara, 0,mb_strlen($strQueryPara,"UTF-8")-1,"UTF-8");
  17. $strQueryPara.=")";
  18. if(isset($_GET["debug"])){
  19. $debug=true;;
  20. }
  21. else{
  22. $debug=false;
  23. }
  24. function microtime_float()
  25. {
  26. list($usec, $sec) = explode(" ", microtime());
  27. return ((float)$usec + (float)$sec);
  28. }
  29. $time_start = microtime_float();
  30. //open database
  31. global $PDO;
  32. //查询单词表
  33. $db_file = _DIR_PALICANON_TEMPLET_."/p".$in_book."_tpl.db3";
  34. PDO_Connect("sqlite:$db_file");
  35. $query="SELECT paragraph,wid,real FROM \"main\" WHERE (\"paragraph\" in ".$strQueryPara." ) and \"real\"<>\"\" and \"type\"<>'.ctl.' ";
  36. if($debug){
  37. echo "filename:".$db_file."<br>";
  38. echo $query."<br>";
  39. }
  40. $FetchAllWord = PDO_FetchAll($query);
  41. $iFetch=count($FetchAllWord);
  42. if($iFetch==0){
  43. echo json_encode(array(), JSON_UNESCAPED_UNICODE);
  44. exit;
  45. }
  46. $voc_list=array();
  47. foreach($FetchAllWord as $word){
  48. $voc_list[$word["real"]]=1;
  49. }
  50. if($debug){
  51. echo "单词表共计:".count($voc_list)."词<br>";
  52. }
  53. //查询单词表结束
  54. $word_list=array();
  55. foreach($voc_list as $word=>$value){
  56. array_push($word_list,$word);
  57. }
  58. $lookup_loop=2;
  59. $dict_word_spell=array();
  60. $output=array();
  61. $db_file_list=array();
  62. //用户词典
  63. array_push($db_file_list , array(_FILE_DB_WBW_," ORDER BY rowid DESC"));
  64. array_push($db_file_list , array(_DIR_DICT_SYSTEM_."/sys_regular.db"," ORDER BY confidence DESC"));
  65. array_push($db_file_list , array(_DIR_DICT_SYSTEM_."/sys_irregular.db",""));
  66. array_push($db_file_list , array(_DIR_DICT_SYSTEM_."/union.db",""));
  67. array_push($db_file_list , array(_DIR_DICT_SYSTEM_."/comp.db",""));
  68. array_push($db_file_list , array(_DIR_DICT_3RD_."/pm.db",""));
  69. array_push($db_file_list , array(_DIR_DICT_3RD_."/bhmf.db",""));
  70. array_push($db_file_list , array(_DIR_DICT_3RD_."/shuihan.db",""));
  71. array_push($db_file_list , array(_DIR_DICT_3RD_."/concise.db",""));
  72. array_push($db_file_list , array(_DIR_DICT_3RD_."/uhan_en.db",""));
  73. for($i=0;$i<$lookup_loop;$i++)
  74. {
  75. $parent_list=array();
  76. $strQueryWord="(";//单词查询字串
  77. foreach($word_list as $word){
  78. $word=str_replace("'","’",$word);
  79. $strQueryWord.="'{$word}',";
  80. }
  81. $strQueryWord=mb_substr($strQueryWord, 0,mb_strlen($strQueryWord,"UTF-8")-1,"UTF-8");
  82. $strQueryWord.=")";
  83. if($debug){
  84. echo "<h2>第{$i}轮查询:$strQueryWord</h2>";
  85. }
  86. foreach($db_file_list as $db){
  87. $db_file=$db[0];
  88. $db_sort=$db[1];
  89. if($debug){
  90. echo "dict:$db_file<br>";
  91. }
  92. PDO_Connect("sqlite:{$db_file}");
  93. PDO_Execute("PRAGMA synchronous = OFF");
  94. PDO_Execute("PRAGMA journal_mode = WAL");
  95. PDO_Execute("PRAGMA foreign_keys = ON");
  96. PDO_Execute("PRAGMA busy_timeout = 5000");
  97. $strOrderby=$db[1];
  98. if($i==0){
  99. $query = "select * from dict where \"pali\" in {$strQueryWord} AND ( type <> '.n:base.' AND type <> '.ti:base.' AND type <> '.adj:base.' AND type <> '.pron:base.' AND type <> '.v:base.' AND type <> '.part.' ) ".$strOrderby;
  100. }
  101. else{
  102. $query = "select * from dict where \"pali\" in {$strQueryWord} ".$strOrderby;
  103. }
  104. if($debug){
  105. echo $query."<br>";
  106. }
  107. try {
  108. $Fetch = PDO_FetchAll($query);
  109. } catch (Exception $e) {
  110. if($debug){
  111. echo 'Caught exception: ', $e->getMessage(), "\n";
  112. }
  113. continue;
  114. }
  115. $iFetch=count($Fetch);
  116. if($debug){
  117. echo "count:{$iFetch}<br>";
  118. }
  119. if($iFetch>0){
  120. foreach($Fetch as $one){
  121. $id = $one["id"];
  122. if(isset($one["guid"])){
  123. $guid = $one["guid"];
  124. }
  125. else{
  126. $guid = "";
  127. }
  128. $pali = $one["pali"];
  129. $dict_word_spell["{$pali}"]=1;
  130. $type = $one["type"];
  131. $gramma = $one["gramma"];
  132. $parent = $one["parent"];
  133. $mean = $one["mean"];
  134. if(isset($one["note"])){
  135. $note = $one["note"];
  136. }
  137. else{
  138. $note = "";
  139. }
  140. if(isset($one["parts"])){
  141. $parts = $one["parts"];
  142. }
  143. else if(isset($one["factors"])){
  144. $parts = $one["factors"];
  145. }
  146. else{
  147. $parts = "";
  148. }
  149. if(isset($one["partmean"])){
  150. $partmean = $one["partmean"];
  151. }
  152. else if(isset($one["factormean"])){
  153. $partmean = $one["factormean"];
  154. }
  155. else{
  156. $partmean = "";
  157. }
  158. if(isset($one["part_id"])){
  159. $part_id = $one["part_id"];
  160. }
  161. else{
  162. $part_id = "";
  163. }
  164. if(isset($one["status"])){
  165. $status = $one["status"];
  166. }
  167. else{
  168. $status = "";
  169. }
  170. if(isset($one["dict_name"])){
  171. $dict_name = $one["dict_name"];
  172. }
  173. else{
  174. $dict_name = "";
  175. }
  176. if(isset($one["language"])){
  177. $language = $one["language"];
  178. }
  179. else{
  180. $language = "en";
  181. }
  182. array_push($output,array(
  183. "id"=>$id,
  184. "guid"=>$guid,
  185. "pali"=>$pali,
  186. "type"=>$type,
  187. "gramma"=>$gramma,
  188. "parent"=>$parent,
  189. "mean"=>$mean,
  190. "note"=>$note,
  191. "parts"=>$parts,
  192. "part_id"=>$part_id,
  193. "partmean"=>$partmean,
  194. "status"=>$status,
  195. "dict_name"=>$dict_name,
  196. "language"=>$language
  197. ));
  198. if(!empty($parent)){
  199. if($pali != $parent){
  200. $parent_list[$one["parent"]]=1;
  201. }
  202. }
  203. if($type!="part"){
  204. if(isset($one["factors"])){
  205. $parts=str_getcsv($one["factors"],'+');
  206. foreach($parts as $x){
  207. if(!empty($x)){
  208. if($x != $pali){
  209. $parent_list[$x]=1;
  210. }
  211. }
  212. }
  213. }
  214. }
  215. }
  216. }
  217. $PDO = null;
  218. }
  219. /*
  220. if($i==0){
  221. //自动查找单词词干
  222. $word_base=getPaliWordBase($in_word);
  223. foreach($word_base as $x=>$infolist){
  224. foreach($infolist as $gramma){
  225. array_push($output,
  226. array("pali"=>$in_word,
  227. "type"=>$gramma["type"],
  228. "gramma"=>$gramma["gramma"],
  229. "mean"=>"",
  230. "parent"=>$x,
  231. "parts"=>$gramma["parts"],
  232. "partmean"=>"",
  233. "language"=>"en",
  234. "dict_name"=>"auto",
  235. "status"=>128
  236. ));
  237. $part_list=str_getcsv($gramma["parts"],"+");
  238. foreach($part_list as $part){
  239. $parent_list[$part]=1;
  240. }
  241. }
  242. }
  243. }
  244. */
  245. if($debug){
  246. echo "parent:".count($parent_list)."<br>";
  247. //print_r($parent_list)."<br>";
  248. }
  249. if(count($parent_list)==0){
  250. break;
  251. }
  252. else{
  253. $word_list=array();
  254. foreach($parent_list as $x=>$value){
  255. array_push($word_list,$x);
  256. }
  257. }
  258. }
  259. //查询结束
  260. //删除无效数据
  261. $newOutput = array();
  262. foreach($output as $value){
  263. if($value["dict_name"]=="auto"){
  264. if(isset($dict_word_spell["{$value["parent"]}"])){
  265. array_push($newOutput,$value);
  266. }
  267. }
  268. else
  269. {
  270. array_push($newOutput,$value);
  271. }
  272. }
  273. if($debug){
  274. echo "<textarea width=\"100%\" >";
  275. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  276. echo "</textarea>";
  277. }
  278. if($debug){
  279. echo "生成:".count($output)."<br>";
  280. echo "有效:".count($newOutput)."<br>";
  281. }
  282. //开始匹配
  283. $counter=0;
  284. $output=array();
  285. foreach($FetchAllWord as $word){
  286. $pali=$word["real"];
  287. $type="";
  288. $gramma="";
  289. $mean="";
  290. $parent="";
  291. $parts="";
  292. $partmean="";
  293. foreach($newOutput as $dictword){
  294. if($dictword["pali"]==$pali){
  295. if($type=="" && $gramma==""){
  296. $type=$dictword["type"];
  297. $gramma=$dictword["gramma"];
  298. }
  299. if(trim($mean)=="" ){
  300. $mean=str_getcsv($dictword["mean"],"$")[0];
  301. }
  302. if($parent=="" ){
  303. $parent=$dictword["parent"];
  304. }
  305. if($parts=="" ){
  306. $parts=$dictword["parts"];
  307. }
  308. if($partmean==""){
  309. $partmean=$dictword["partmean"];
  310. }
  311. }
  312. }
  313. if($mean=="" && $parent!=""){
  314. foreach($newOutput as $parentword){
  315. if($parentword["pali"]==$parent){
  316. if($parentword["mean"]!=""){
  317. $mean=trim(str_getcsv($parentword["mean"],"$")[0]);
  318. if($mean!=""){
  319. break;
  320. }
  321. }
  322. }
  323. }
  324. }
  325. if( $type!="" ||
  326. $gramma!="" ||
  327. $mean!="" ||
  328. $parent!="" ||
  329. $parts!="" ||
  330. $partmean!=""){
  331. $counter++;
  332. }
  333. array_push($output,
  334. array("book"=>$in_book,
  335. "paragraph"=>$word["paragraph"],
  336. "num"=>$word["wid"],
  337. "pali"=>$word["real"],
  338. "type"=>$type,
  339. "gramma"=>$gramma,
  340. "mean"=>$mean,
  341. "parent"=>$parent,
  342. "parts"=>$parts,
  343. "partmean"=>$partmean,
  344. "status"=>3
  345. ));
  346. }
  347. if($debug){
  348. echo "<textarea width=\"100%\" >";
  349. }
  350. echo json_encode($output, JSON_UNESCAPED_UNICODE);
  351. if($debug){
  352. echo "</textarea>";
  353. }
  354. if($debug){
  355. echo "匹配".(($counter/count($FetchAllWord))*100)."<br>";
  356. foreach($output as $result){
  357. //echo "{$result["pali"]}-{$result["mean"]}-{$result["parent"]}<br>";
  358. }
  359. $queryTime=(microtime_float()-$time_start)*1000;
  360. echo "<div >搜索时间:$queryTime 毫秒</div>";
  361. }
  362. ?>