dict_find_auto.php 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. <?php
  2. require_once "../path.php";
  3. require_once "../public/_pdo.php";
  4. require_once "../public/function.php";
  5. require_once '../ucenter/setting_function.php';
  6. $user_setting = get_setting();
  7. if(isset($_GET["book"])){
  8. $in_book=$_GET["book"];
  9. }
  10. if(isset($_GET["para"])){
  11. $in_para=$_GET["para"];
  12. }
  13. $para_list=str_getcsv($in_para);
  14. $strQueryPara="(";//单词查询字串
  15. foreach($para_list as $para){
  16. $strQueryPara.="'{$para}',";
  17. }
  18. $strQueryPara=mb_substr($strQueryPara, 0,mb_strlen($strQueryPara,"UTF-8")-1,"UTF-8");
  19. $strQueryPara.=")";
  20. if(isset($_GET["debug"])){
  21. $debug=true;;
  22. }
  23. else{
  24. $debug=false;
  25. }
  26. function microtime_float()
  27. {
  28. list($usec, $sec) = explode(" ", microtime());
  29. return ((float)$usec + (float)$sec);
  30. }
  31. $time_start = microtime_float();
  32. //open database
  33. global $PDO;
  34. //查询单词表
  35. $db_file = _DIR_PALICANON_TEMPLET_."/p".$in_book."_tpl.db3";
  36. PDO_Connect("sqlite:$db_file");
  37. $query="SELECT paragraph,wid,real FROM \"main\" WHERE (\"paragraph\" in ".$strQueryPara." ) and \"real\"<>\"\" and \"type\"<>'.ctl.' ";
  38. if($debug){
  39. echo "filename:".$db_file."<br>";
  40. echo $query."<br>";
  41. }
  42. $FetchAllWord = PDO_FetchAll($query);
  43. $iFetch=count($FetchAllWord);
  44. if($iFetch==0){
  45. echo json_encode(array(), JSON_UNESCAPED_UNICODE);
  46. exit;
  47. }
  48. $voc_list=array();
  49. foreach($FetchAllWord as $word){
  50. $voc_list[$word["real"]]=1;
  51. }
  52. if($debug){
  53. echo "单词表共计:".count($voc_list)."词<br>";
  54. }
  55. //查询单词表结束
  56. $word_list=array();
  57. foreach($voc_list as $word=>$value){
  58. array_push($word_list,$word);
  59. }
  60. $lookup_loop=2;
  61. $dict_word_spell=array();
  62. $output=array();
  63. $db_file_list=array();
  64. //用户词典
  65. array_push($db_file_list , array(_FILE_DB_WBW_," ORDER BY rowid DESC"));
  66. array_push($db_file_list , array(_DIR_DICT_SYSTEM_."/sys_regular.db"," ORDER BY confidence DESC"));
  67. array_push($db_file_list , array(_DIR_DICT_SYSTEM_."/sys_irregular.db",""));
  68. array_push($db_file_list , array(_DIR_DICT_SYSTEM_."/union.db",""));
  69. array_push($db_file_list , array(_DIR_DICT_SYSTEM_."/comp.db",""));
  70. array_push($db_file_list , array(_DIR_DICT_3RD_."/pm.db",""));
  71. array_push($db_file_list , array(_DIR_DICT_3RD_."/bhmf.db",""));
  72. array_push($db_file_list , array(_DIR_DICT_3RD_."/shuihan.db",""));
  73. array_push($db_file_list , array(_DIR_DICT_3RD_."/concise.db",""));
  74. array_push($db_file_list , array(_DIR_DICT_3RD_."/uhan_en.db",""));
  75. for($i=0;$i<$lookup_loop;$i++)
  76. {
  77. $parent_list=array();
  78. $strQueryWord="(";//单词查询字串
  79. foreach($word_list as $word){
  80. $word=str_replace("'","’",$word);
  81. $strQueryWord.="'{$word}',";
  82. }
  83. $strQueryWord=mb_substr($strQueryWord, 0,mb_strlen($strQueryWord,"UTF-8")-1,"UTF-8");
  84. $strQueryWord.=")";
  85. if($debug){
  86. echo "<h2>第{$i}轮查询:$strQueryWord</h2>";
  87. }
  88. foreach($db_file_list as $db){
  89. $db_file=$db[0];
  90. $db_sort=$db[1];
  91. if($debug){
  92. echo "dict:$db_file<br>";
  93. }
  94. PDO_Connect("sqlite:{$db_file}");
  95. PDO_Execute("PRAGMA synchronous = OFF");
  96. PDO_Execute("PRAGMA journal_mode = WAL");
  97. PDO_Execute("PRAGMA foreign_keys = ON");
  98. PDO_Execute("PRAGMA busy_timeout = 5000");
  99. $strOrderby=$db[1];
  100. if($i==0){
  101. $query = "select * from dict where \"pali\" in {$strQueryWord} AND ( type <> '.n:base.' AND type <> '.ti:base.' AND type <> '.adj:base.' AND type <> '.pron:base.' AND type <> '.v:base.' AND type <> '.part.' ) ".$strOrderby;
  102. }
  103. else{
  104. $query = "select * from dict where \"pali\" in {$strQueryWord} ".$strOrderby;
  105. }
  106. if($debug){
  107. echo $query."<br>";
  108. }
  109. try {
  110. $Fetch = PDO_FetchAll($query);
  111. } catch (Exception $e) {
  112. if($debug){
  113. echo 'Caught exception: ', $e->getMessage(), "\n";
  114. }
  115. continue;
  116. }
  117. $iFetch=count($Fetch);
  118. if($debug){
  119. echo "count:{$iFetch}<br>";
  120. }
  121. if($iFetch>0){
  122. foreach($Fetch as $one){
  123. $id = $one["id"];
  124. if(isset($one["guid"])){
  125. $guid = $one["guid"];
  126. }
  127. else{
  128. $guid = "";
  129. }
  130. if(isset($one["lang"])){
  131. $language = $one["lang"];
  132. }
  133. else if(isset($one["language"])){
  134. $language = $one["language"];
  135. }
  136. else{
  137. $language = "en";
  138. }
  139. $pali = $one["pali"];
  140. $dict_word_spell["{$pali}"]=1;
  141. $type = $one["type"];
  142. $gramma = $one["gramma"];
  143. $parent = $one["parent"];
  144. if(inLangSetting($language,$user_setting["dict.lang"])){
  145. $mean = $one["mean"];
  146. }
  147. else{
  148. $mean = "";
  149. }
  150. if(isset($one["note"])){
  151. $note = $one["note"];
  152. }
  153. else{
  154. $note = "";
  155. }
  156. if(isset($one["parts"])){
  157. $parts = $one["parts"];
  158. }
  159. else if(isset($one["factors"])){
  160. $parts = $one["factors"];
  161. }
  162. else{
  163. $parts = "";
  164. }
  165. if(isset($one["partmean"])){
  166. $partmean = $one["partmean"];
  167. }
  168. else if(isset($one["factormean"])){
  169. $partmean = $one["factormean"];
  170. }
  171. else{
  172. $partmean = "";
  173. }
  174. if(inLangSetting($language,$user_setting["dict.lang"])==false){
  175. $partmean = "";
  176. }
  177. if(isset($one["part_id"])){
  178. $part_id = $one["part_id"];
  179. }
  180. else{
  181. $part_id = "";
  182. }
  183. if(isset($one["status"])){
  184. $status = $one["status"];
  185. }
  186. else{
  187. $status = "";
  188. }
  189. if(isset($one["dict_name"])){
  190. $dict_name = $one["dict_name"];
  191. }
  192. else{
  193. $dict_name = "";
  194. }
  195. array_push($output,array(
  196. "id"=>$id,
  197. "guid"=>$guid,
  198. "pali"=>$pali,
  199. "type"=>$type,
  200. "gramma"=>$gramma,
  201. "parent"=>$parent,
  202. "mean"=>$mean,
  203. "note"=>$note,
  204. "parts"=>$parts,
  205. "part_id"=>$part_id,
  206. "partmean"=>$partmean,
  207. "status"=>$status,
  208. "dict_name"=>$dict_name,
  209. "language"=>$language
  210. ));
  211. if(!empty($parent)){
  212. if($pali != $parent){
  213. $parent_list[$one["parent"]]=1;
  214. }
  215. }
  216. if($type!="part"){
  217. if(isset($one["factors"])){
  218. $parts=str_getcsv($one["factors"],'+');
  219. foreach($parts as $x){
  220. if(!empty($x)){
  221. if($x != $pali){
  222. $parent_list[$x]=1;
  223. }
  224. }
  225. }
  226. }
  227. }
  228. }
  229. }
  230. $PDO = null;
  231. }
  232. /*
  233. if($i==0){
  234. //自动查找单词词干
  235. $word_base=getPaliWordBase($in_word);
  236. foreach($word_base as $x=>$infolist){
  237. foreach($infolist as $gramma){
  238. array_push($output,
  239. array("pali"=>$in_word,
  240. "type"=>$gramma["type"],
  241. "gramma"=>$gramma["gramma"],
  242. "mean"=>"",
  243. "parent"=>$x,
  244. "parts"=>$gramma["parts"],
  245. "partmean"=>"",
  246. "language"=>"en",
  247. "dict_name"=>"auto",
  248. "status"=>128
  249. ));
  250. $part_list=str_getcsv($gramma["parts"],"+");
  251. foreach($part_list as $part){
  252. $parent_list[$part]=1;
  253. }
  254. }
  255. }
  256. }
  257. */
  258. if($debug){
  259. echo "parent:".count($parent_list)."<br>";
  260. //print_r($parent_list)."<br>";
  261. }
  262. if(count($parent_list)==0){
  263. break;
  264. }
  265. else{
  266. $word_list=array();
  267. foreach($parent_list as $x=>$value){
  268. array_push($word_list,$x);
  269. }
  270. }
  271. }
  272. //查询结束
  273. //删除无效数据
  274. $newOutput = array();
  275. foreach($output as $value){
  276. if($value["dict_name"]=="auto"){
  277. if(isset($dict_word_spell["{$value["parent"]}"])){
  278. array_push($newOutput,$value);
  279. }
  280. }
  281. else
  282. {
  283. array_push($newOutput,$value);
  284. }
  285. }
  286. if($debug){
  287. echo "<textarea width=\"100%\" >";
  288. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  289. echo "</textarea>";
  290. }
  291. if($debug){
  292. echo "生成:".count($output)."<br>";
  293. echo "有效:".count($newOutput)."<br>";
  294. }
  295. //开始匹配
  296. $counter=0;
  297. $output=array();
  298. foreach($FetchAllWord as $word){
  299. $pali=$word["real"];
  300. $type="";
  301. $gramma="";
  302. $mean="";
  303. $parent="";
  304. $parts="";
  305. $partmean="";
  306. foreach($newOutput as $dictword){
  307. if($dictword["pali"]==$pali){
  308. if($type=="" && $gramma==""){
  309. $type=$dictword["type"];
  310. $gramma=$dictword["gramma"];
  311. }
  312. if(trim($mean)=="" ){
  313. $mean=str_getcsv($dictword["mean"],"$")[0];
  314. }
  315. if($parent=="" ){
  316. $parent=$dictword["parent"];
  317. }
  318. if($parts=="" ){
  319. $parts=$dictword["parts"];
  320. }
  321. if($partmean==""){
  322. $partmean=$dictword["partmean"];
  323. }
  324. }
  325. }
  326. if($mean=="" && $parent!=""){
  327. foreach($newOutput as $parentword){
  328. if($parentword["pali"]==$parent){
  329. if($parentword["mean"]!=""){
  330. $mean=trim(str_getcsv($parentword["mean"],"$")[0]);
  331. if($mean!=""){
  332. break;
  333. }
  334. }
  335. }
  336. }
  337. }
  338. if( $type!="" ||
  339. $gramma!="" ||
  340. $mean!="" ||
  341. $parent!="" ||
  342. $parts!="" ||
  343. $partmean!=""){
  344. $counter++;
  345. }
  346. array_push($output,
  347. array("book"=>$in_book,
  348. "paragraph"=>$word["paragraph"],
  349. "num"=>$word["wid"],
  350. "pali"=>$word["real"],
  351. "type"=>$type,
  352. "gramma"=>$gramma,
  353. "mean"=>$mean,
  354. "parent"=>$parent,
  355. "parts"=>$parts,
  356. "partmean"=>$partmean,
  357. "status"=>3
  358. ));
  359. }
  360. if($debug){
  361. echo "<textarea width=\"100%\" >";
  362. }
  363. echo json_encode($output, JSON_UNESCAPED_UNICODE);
  364. if($debug){
  365. echo "</textarea>";
  366. }
  367. if($debug){
  368. echo "匹配".(($counter/count($FetchAllWord))*100)."<br>";
  369. foreach($output as $result){
  370. //echo "{$result["pali"]}-{$result["mean"]}-{$result["parent"]}<br>";
  371. }
  372. $queryTime=(microtime_float()-$time_start)*1000;
  373. echo "<div >搜索时间:$queryTime 毫秒</div>";
  374. }
  375. ?>