dict_find_auto.php 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. <?php
  2. require 'checklogin.inc';
  3. include "../public/config.php";
  4. include "../public/_pdo.php";
  5. include "../public/function.php";
  6. if(isset($_GET["book"])){
  7. $in_book=$_GET["book"];
  8. }
  9. if(isset($_GET["para"])){
  10. $in_para=$_GET["para"];
  11. }
  12. $para_list=str_getcsv($in_para);
  13. $strQueryPara="(";//单词查询字串
  14. foreach($para_list as $para){
  15. $strQueryPara.="'{$para}',";
  16. }
  17. $strQueryPara=mb_substr($strQueryPara, 0,mb_strlen($strQueryPara,"UTF-8")-1,"UTF-8");
  18. $strQueryPara.=")";
  19. if(isset($_GET["debug"])){
  20. $debug=true;;
  21. }
  22. else{
  23. $debug=false;
  24. }
  25. function microtime_float()
  26. {
  27. list($usec, $sec) = explode(" ", microtime());
  28. return ((float)$usec + (float)$sec);
  29. }
  30. $time_start = microtime_float();
  31. $dict_file_user=$dir_user_base.$userid.$dir_dict_user.'/';
  32. $dict_file_sys=$dir_dict_system;
  33. $dict_file_third=$dir_dict_3rd;
  34. //open database
  35. global $PDO;
  36. //查询单词表
  37. $db_file = "{$dir_palicanon}templet/p".$in_book."_tpl.db3";
  38. PDO_Connect("sqlite:$db_file");
  39. $query="SELECT paragraph,vri,real FROM \"main\" WHERE (\"paragraph\" in ".$strQueryPara." ) and \"real\"<>\"\" and \"type\"<>'.ctl.' ";
  40. if($debug){
  41. echo "filename:".$db_file."<br>";
  42. echo $query."<br>";
  43. }
  44. $FetchAllWord = PDO_FetchAll($query);
  45. $iFetch=count($FetchAllWord);
  46. if($iFetch==0){
  47. echo json_encode(array(), JSON_UNESCAPED_UNICODE);
  48. exit;
  49. }
  50. $voc_list=array();
  51. foreach($FetchAllWord as $word){
  52. $voc_list[$word["real"]]=1;
  53. }
  54. if($debug){
  55. echo "单词表共计:".count($voc_list)."词<br>";
  56. }
  57. //查询单词表结束
  58. $word_list=array();
  59. foreach($voc_list as $word=>$value){
  60. array_push($word_list,$word);
  61. }
  62. $lookup_loop=2;
  63. $dict_word_spell=array();
  64. $output=array();
  65. $db_file_list=array();
  66. //用户词典
  67. array_push($db_file_list , array($_file_db_wbw," ORDER BY rowid DESC"));
  68. array_push($db_file_list , array($dict_file_sys."sys_regular.db"," ORDER BY confidence DESC"));
  69. array_push($db_file_list , array($dict_file_sys."sys_irregular.db",""));
  70. array_push($db_file_list , array($dict_file_sys."union.db",""));
  71. array_push($db_file_list , array($dict_file_sys."comp.db",""));
  72. array_push($db_file_list , array($dict_file_third."pm.db",""));
  73. array_push($db_file_list , array($dict_file_third."bhmf.db",""));
  74. array_push($db_file_list , array($dict_file_third."shuihan.db",""));
  75. array_push($db_file_list , array($dict_file_third."concise.db",""));
  76. array_push($db_file_list , array($dict_file_third."uhan_en.db",""));
  77. for($i=0;$i<$lookup_loop;$i++)
  78. {
  79. $parent_list=array();
  80. $strQueryWord="(";//单词查询字串
  81. foreach($word_list as $word){
  82. $word=str_replace("'","’",$word);
  83. $strQueryWord.="'{$word}',";
  84. }
  85. $strQueryWord=mb_substr($strQueryWord, 0,mb_strlen($strQueryWord,"UTF-8")-1,"UTF-8");
  86. $strQueryWord.=")";
  87. if($debug){
  88. echo "<h2>第{$i}轮查询:$strQueryWord</h2>";
  89. }
  90. foreach($db_file_list as $db){
  91. $db_file=$db[0];
  92. $db_sort=$db[1];
  93. if($debug){
  94. echo "dict:$db_file<br>";
  95. }
  96. PDO_Connect("sqlite:{$db_file}");
  97. PDO_Execute("PRAGMA synchronous = OFF");
  98. PDO_Execute("PRAGMA journal_mode = WAL");
  99. PDO_Execute("PRAGMA foreign_keys = ON");
  100. PDO_Execute("PRAGMA busy_timeout = 5000");
  101. $strOrderby=$db[1];
  102. if($i==0){
  103. $query = "select * from dict where \"pali\" in {$strQueryWord} AND ( type <> '.n:base.' AND type <> '.ti:base.' AND type <> '.adj:base.' AND type <> '.pron:base.' AND type <> '.v:base.' AND type <> '.part.' ) ".$strOrderby;
  104. }
  105. else{
  106. $query = "select * from dict where \"pali\" in {$strQueryWord} ".$strOrderby;
  107. }
  108. if($debug){
  109. echo $query."<br>";
  110. }
  111. try {
  112. $Fetch = PDO_FetchAll($query);
  113. } catch (Exception $e) {
  114. if($debug){
  115. echo 'Caught exception: ', $e->getMessage(), "\n";
  116. }
  117. continue;
  118. }
  119. $iFetch=count($Fetch);
  120. if($debug){
  121. echo "count:{$iFetch}<br>";
  122. }
  123. if($iFetch>0){
  124. foreach($Fetch as $one){
  125. $id = $one["id"];
  126. if(isset($one["guid"])){
  127. $guid = $one["guid"];
  128. }
  129. else{
  130. $guid = "";
  131. }
  132. $pali = $one["pali"];
  133. $dict_word_spell["{$pali}"]=1;
  134. $type = $one["type"];
  135. $gramma = $one["gramma"];
  136. $parent = $one["parent"];
  137. $mean = $one["mean"];
  138. if(isset($one["note"])){
  139. $note = $one["note"];
  140. }
  141. else{
  142. $note = "";
  143. }
  144. if(isset($one["parts"])){
  145. $parts = $one["parts"];
  146. }
  147. else if(isset($one["factors"])){
  148. $parts = $one["factors"];
  149. }
  150. else{
  151. $parts = "";
  152. }
  153. if(isset($one["partmean"])){
  154. $partmean = $one["partmean"];
  155. }
  156. else if(isset($one["factormean"])){
  157. $partmean = $one["factormean"];
  158. }
  159. else{
  160. $partmean = "";
  161. }
  162. if(isset($one["part_id"])){
  163. $part_id = $one["part_id"];
  164. }
  165. else{
  166. $part_id = "";
  167. }
  168. if(isset($one["status"])){
  169. $status = $one["status"];
  170. }
  171. else{
  172. $status = "";
  173. }
  174. if(isset($one["dict_name"])){
  175. $dict_name = $one["dict_name"];
  176. }
  177. else{
  178. $dict_name = "";
  179. }
  180. if(isset($one["language"])){
  181. $language = $one["language"];
  182. }
  183. else{
  184. $language = "en";
  185. }
  186. array_push($output,array(
  187. "id"=>$id,
  188. "guid"=>$guid,
  189. "pali"=>$pali,
  190. "type"=>$type,
  191. "gramma"=>$gramma,
  192. "parent"=>$parent,
  193. "mean"=>$mean,
  194. "note"=>$note,
  195. "parts"=>$parts,
  196. "part_id"=>$part_id,
  197. "partmean"=>$partmean,
  198. "status"=>$status,
  199. "dict_name"=>$dict_name,
  200. "language"=>$language
  201. ));
  202. if(!empty($parent)){
  203. if($pali != $parent){
  204. $parent_list[$one["parent"]]=1;
  205. }
  206. }
  207. if($type!="part"){
  208. if(isset($one["factors"])){
  209. $parts=str_getcsv($one["factors"],'+');
  210. foreach($parts as $x){
  211. if(!empty($x)){
  212. if($x != $pali){
  213. $parent_list[$x]=1;
  214. }
  215. }
  216. }
  217. }
  218. }
  219. }
  220. }
  221. $PDO = null;
  222. }
  223. /*
  224. if($i==0){
  225. //自动查找单词词干
  226. $word_base=getPaliWordBase($in_word);
  227. foreach($word_base as $x=>$infolist){
  228. foreach($infolist as $gramma){
  229. array_push($output,
  230. array("pali"=>$in_word,
  231. "type"=>$gramma["type"],
  232. "gramma"=>$gramma["gramma"],
  233. "mean"=>"",
  234. "parent"=>$x,
  235. "parts"=>$gramma["parts"],
  236. "partmean"=>"",
  237. "language"=>"en",
  238. "dict_name"=>"auto",
  239. "status"=>128
  240. ));
  241. $part_list=str_getcsv($gramma["parts"],"+");
  242. foreach($part_list as $part){
  243. $parent_list[$part]=1;
  244. }
  245. }
  246. }
  247. }
  248. */
  249. if($debug){
  250. echo "parent:".count($parent_list)."<br>";
  251. //print_r($parent_list)."<br>";
  252. }
  253. if(count($parent_list)==0){
  254. break;
  255. }
  256. else{
  257. $word_list=array();
  258. foreach($parent_list as $x=>$value){
  259. array_push($word_list,$x);
  260. }
  261. }
  262. }
  263. //查询结束
  264. //删除无效数据
  265. $newOutput = array();
  266. foreach($output as $value){
  267. if($value["dict_name"]=="auto"){
  268. if(isset($dict_word_spell["{$value["parent"]}"])){
  269. array_push($newOutput,$value);
  270. }
  271. }
  272. else
  273. {
  274. array_push($newOutput,$value);
  275. }
  276. }
  277. if($debug){
  278. echo "<textarea width=\"100%\" >";
  279. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  280. echo "</textarea>";
  281. }
  282. if($debug){
  283. echo "生成:".count($output)."<br>";
  284. echo "有效:".count($newOutput)."<br>";
  285. }
  286. //开始匹配
  287. $counter=0;
  288. $output=array();
  289. foreach($FetchAllWord as $word){
  290. $pali=$word["real"];
  291. $type="";
  292. $gramma="";
  293. $mean="";
  294. $parent="";
  295. $parts="";
  296. $partmean="";
  297. foreach($newOutput as $dictword){
  298. if($dictword["pali"]==$pali){
  299. if($type=="" && $gramma==""){
  300. $type=$dictword["type"];
  301. $gramma=$dictword["gramma"];
  302. }
  303. if(trim($mean)=="" ){
  304. $mean=str_getcsv($dictword["mean"],"$")[0];
  305. }
  306. if($parent=="" ){
  307. $parent=$dictword["parent"];
  308. }
  309. if($parts=="" ){
  310. $parts=$dictword["parts"];
  311. }
  312. if($partmean==""){
  313. $partmean=$dictword["partmean"];
  314. }
  315. }
  316. }
  317. if($mean=="" && $parent!=""){
  318. foreach($newOutput as $parentword){
  319. if($parentword["pali"]==$parent){
  320. if($parentword["mean"]!=""){
  321. $mean=trim(str_getcsv($parentword["mean"],"$")[0]);
  322. if($mean!=""){
  323. break;
  324. }
  325. }
  326. }
  327. }
  328. }
  329. if( $type!="" ||
  330. $gramma!="" ||
  331. $mean!="" ||
  332. $parent!="" ||
  333. $parts!="" ||
  334. $partmean!=""){
  335. $counter++;
  336. }
  337. array_push($output,
  338. array("book"=>$in_book,
  339. "paragraph"=>$word["paragraph"],
  340. "num"=>$word["vri"],
  341. "pali"=>$word["real"],
  342. "type"=>$type,
  343. "gramma"=>$gramma,
  344. "mean"=>$mean,
  345. "parent"=>$parent,
  346. "parts"=>$parts,
  347. "partmean"=>$partmean,
  348. "status"=>3
  349. ));
  350. }
  351. if($debug){
  352. echo "<textarea width=\"100%\" >";
  353. }
  354. echo json_encode($output, JSON_UNESCAPED_UNICODE);
  355. if($debug){
  356. echo "</textarea>";
  357. }
  358. if($debug){
  359. echo "匹配".(($counter/count($FetchAllWord))*100)."<br>";
  360. foreach($output as $result){
  361. //echo "{$result["pali"]}-{$result["mean"]}-{$result["parent"]}<br>";
  362. }
  363. $queryTime=(microtime_float()-$time_start)*1000;
  364. echo "<div >搜索时间:$queryTime 毫秒</div>";
  365. }
  366. ?>