dict_find3.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. <?php
  2. //查询参考字典
  3. require_once '../public/casesuf.inc';
  4. require_once 'dict_find_un.inc';
  5. require_once 'sandhi.php';
  6. require_once "../path.php";
  7. require_once "../public/_pdo.php";
  8. require_once '../public/load_lang.php';
  9. $op=$_GET["op"];
  10. $word=mb_strtolower($_GET["word"],'UTF-8');
  11. $org_word=$word;
  12. $count_return=0;
  13. $dict_list=array();
  14. global $PDO;
  15. function myfunction($v1,$v2)
  16. {
  17. return $v1 . "+" . $v2;
  18. }
  19. /*
  20. 查找某个单词是否在现有词典出现
  21. */
  22. function isExsit($word){
  23. global $PDO;
  24. $query = "select count(*) as co from dict where \"word\" = ".$PDO->quote($word);
  25. $row=PDO_FetchOne($query);
  26. if($row[0]==0){
  27. return false;
  28. }
  29. else{
  30. return true;
  31. }
  32. }
  33. /*
  34. *自动拆分复合词
  35. *功能:将一个单词拆分为两个部分
  36. *输入:想要拆的词
  37. *输出:数组,第一个为前半部分,第二个为后半部分,前半部分是在现有字典里搜索到的。
  38. *范例:
  39. while(($split=mySplit($splitWord))!==FALSE){
  40. array_push($part,$split[0]);
  41. $splitWord=$split[1];
  42. }
  43. 循环结束后$part里放的就是拆分结果
  44. 算法:从最后一个字母开始,一次去掉一个字母,然后在现有字典里搜索剩余的部分(前半部分)
  45. 如果搜索到,就返回。第二次,将剩余的部分,也就是后半部分应用相同的算法。
  46. 直到单词长度小于5
  47. 中间考虑了连音规则:
  48. ~a+i~=~i~
  49. 在拆分的时候要补上前面的元音
  50. 有时后面的词第一个辅音会重复
  51. word+tha~=wordttha~
  52. 需要去掉后面的单词的一个辅音
  53. */
  54. function mySplit($strWord){
  55. $doubleword="kkggccjjṭṭḍḍttddppbb";
  56. $len=mb_strlen($strWord,"UTF-8");
  57. if($len>5){
  58. for($i=$len-1;$i>3;$i--){
  59. $str1=mb_substr($strWord,0,$i,"UTF-8");
  60. $str2=mb_substr($strWord,$i,NULL,"UTF-8");
  61. if(isExsit($str1)){
  62. //如果字典里存在,返回拆分结果
  63. $left2=mb_substr($str2,0,2,"UTF-8");
  64. //如果第二个部分有双辅音,去掉第一个辅音。因为巴利语中没有以双辅音开头的单词。
  65. if(mb_strpos($doubleword,$left2,0,"UTF-8")!==FALSE){
  66. $str2=mb_substr($str2,1,NULL,"UTF-8");
  67. }
  68. return array($str1,$str2);
  69. }
  70. else{
  71. //补上结尾的a再次查找
  72. $str1=$str1."a";
  73. if(isExsit($str1)){
  74. $left2=mb_substr($str2,0,2,"UTF-8");
  75. if(mb_strpos($doubleword,$left2,0,"UTF-8")!==FALSE){
  76. $str2=mb_substr($str2,1,NULL,"UTF-8");
  77. }
  78. return array($str1,$str2);
  79. }
  80. }
  81. }
  82. //如果没找到。将ā变为a后再找。因为两个a复合后会变成ā
  83. if(mb_substr($strWord,0,1,"UTF-8")=="ā"){
  84. $strWord='a'.mb_substr($strWord,1,NULL,"UTF-8");
  85. for($i=$len-1;$i>3;$i--){
  86. $str1=mb_substr($strWord,0,$i,"UTF-8");
  87. $str2=mb_substr($strWord,$i,NULL,"UTF-8");
  88. //echo "$str1 + $str2 = ";
  89. if(isExsit($str1)){
  90. //echo "match";
  91. $left2=mb_substr($str2,0,2,"UTF-8");
  92. if(mb_strpos($doubleword,$left2,0,"UTF-8")!==FALSE){
  93. $str2=mb_substr($str2,1,NULL,"UTF-8");
  94. }
  95. return array($str1,$str2);
  96. }
  97. else{
  98. $str1=$str1."a";
  99. if(isExsit($str1)){
  100. //echo "match";
  101. $left2=mb_substr($str2,0,2,"UTF-8");
  102. if(mb_strpos($doubleword,$left2,0,"UTF-8")!==FALSE){
  103. $str2=mb_substr($str2,1,NULL,"UTF-8");
  104. }
  105. return array($str1,$str2);
  106. }
  107. }
  108. }
  109. }
  110. //如果没找到将开头的e变为i再次查找
  111. if(mb_substr($strWord,0,1,"UTF-8")=="e"){
  112. $strWord='i'.mb_substr($strWord,1,NULL,"UTF-8");
  113. for($i=$len-1;$i>3;$i--){
  114. $str1=mb_substr($strWord,0,$i,"UTF-8");
  115. $str2=mb_substr($strWord,$i,NULL,"UTF-8");
  116. if(isExsit($str1)){
  117. //echo "match";
  118. $left2=mb_substr($str2,0,2,"UTF-8");
  119. if(mb_strpos($doubleword,$left2,0,"UTF-8")!==FALSE){
  120. $str2=mb_substr($str2,1,NULL,"UTF-8");
  121. }
  122. return array($str1,$str2);
  123. }
  124. else{
  125. $str1=$str1."a";
  126. if(isExsit($str1)){
  127. $left2=mb_substr($str2,0,2,"UTF-8");
  128. if(mb_strpos($doubleword,$left2,0,"UTF-8")!==FALSE){
  129. $str2=mb_substr($str2,1,NULL,"UTF-8");
  130. }
  131. return array($str1,$str2);
  132. }
  133. }
  134. }
  135. }
  136. }
  137. return(FALSE);
  138. }
  139. function mySplit2($strWord){
  140. $output = array();
  141. $len=mb_strlen($strWord,"UTF-8");
  142. if($len>2){
  143. for($i=$len-1;$i>1;$i--){
  144. foreach($sandhi as $row){
  145. if(mb_substr($strWord,$i,$row[3],"UTF-8")==$row[2]){
  146. $str1=mb_substr($strWord,0,$i-1,"UTF-8").$row[0];
  147. $str2=$row[1].mb_substr($strWord,$i+$row[2],NULL,"UTF-8");
  148. if(isExsit($str1)){
  149. array_push($output,array($str1,$str2));
  150. }
  151. }
  152. }
  153. }
  154. }
  155. return($output);
  156. }
  157. switch($op){
  158. case "pre"://预查询
  159. $dictFileName=_FILE_DB_REF_INDEX_;
  160. PDO_Connect("sqlite:$dictFileName");
  161. echo "<wordlist>";
  162. $query = "select word,count from dict where \"eword\" like ".$PDO->quote($word.'%')." OR \"word\" like ".$PDO->quote($word.'%')." limit 0,100";
  163. $Fetch = PDO_FetchAll($query);
  164. $iFetch=count($Fetch);
  165. if($iFetch>0){
  166. for($i=0;$i<$iFetch;$i++){
  167. $outXml = "<word>";
  168. $word=$Fetch[$i]["word"];
  169. $outXml = $outXml."<pali>$word</pali>";
  170. $outXml = $outXml."<count>".$Fetch[$i]["count"]."</count>";
  171. $outXml = $outXml."</word>";
  172. echo $outXml;
  173. }
  174. }
  175. echo "</wordlist>";
  176. break;
  177. case "search":
  178. $dictFileName=_FILE_DB_REF_;
  179. PDO_Connect("sqlite:$dictFileName");
  180. //直接查询
  181. $query = "select dict.dict_id,dict.mean,info.shortname from dict LEFT JOIN info ON dict.dict_id = info.id where \"word\" = ".$PDO->quote($word)." limit 0,30";
  182. $Fetch = PDO_FetchAll($query);
  183. $iFetch=count($Fetch);
  184. $count_return+=$iFetch;
  185. if($iFetch>0){
  186. for($i=0;$i<$iFetch;$i++){
  187. $mean=$Fetch[$i]["mean"];
  188. $mean = str_replace("[[","<a onclick=\"dict_jump(this)\">",$mean);
  189. $mean = str_replace("]]","</a>",$mean);
  190. $dictid=$Fetch[$i]["dict_id"];
  191. $dict_list[$dictid]=$Fetch[$i]["shortname"];
  192. $outXml = "<div class='dict_word'>";
  193. $outXml = $outXml."<a name='ref_dict_$dictid'></a>";
  194. $outXml = $outXml."<div class='dict'>".$Fetch[$i]["shortname"]."</div>";
  195. $outXml = $outXml."<div class='mean'>{$mean}</div>";
  196. $outXml = $outXml."</div>";
  197. echo $outXml;
  198. }
  199. }
  200. if(substr($word,0,1)=="_" && substr($word,-1,1)=="_"){
  201. echo "<div id='dictlist'>";
  202. foreach($dict_list as $x=>$x_value) {
  203. echo "<a href='#ref_dict_$x'>$x_value</a>";
  204. }
  205. echo "</div>";
  206. break;
  207. }
  208. //去除尾查
  209. $newWord=array();
  210. for ($row = 0; $row < count($case); $row++) {
  211. $len=mb_strlen($case[$row][1],"UTF-8");
  212. $end=mb_substr($word, 0-$len,NULL,"UTF-8");
  213. if($end==$case[$row][1]){
  214. $base=mb_substr($word, 0,mb_strlen($word,"UTF-8")-$len,"UTF-8").$case[$row][0];
  215. if($base!=$word){
  216. $gr="<a onclick=\"dict_jump(this)\">".str_replace("$","</a> &nbsp;&nbsp;<a onclick=\"dict_jump(this)\">",$case[$row][2])."</a>";
  217. if(isset($newWord[$base])){
  218. $newWord[$base] .= "<br />".$gr;
  219. }
  220. else{
  221. $newWord[$base] = $gr;
  222. }
  223. }
  224. }
  225. }
  226. if(count($newWord)>0){
  227. foreach($newWord as $x=>$x_value) {
  228. $query = "select dict.dict_id,dict.mean,info.shortname from dict LEFT JOIN info ON dict.dict_id = info.id where \"word\" = ".$PDO->quote($x)." limit 0,30";
  229. $Fetch = PDO_FetchAll($query);
  230. $iFetch=count($Fetch);
  231. $count_return+=$iFetch;
  232. if($iFetch>0){
  233. //语法信息
  234. foreach($_local->grammastr as $gr){
  235. $x_value = str_replace($gr->id,$gr->value,$x_value);
  236. }
  237. echo $x . ":<div class='dict_find_gramma'>" . $x_value . "</div>";
  238. for($i=0;$i<$iFetch;$i++){
  239. $mean=$Fetch[$i]["mean"];
  240. $dictid=$Fetch[$i]["dict_id"];
  241. $dict_list[$dictid]=$Fetch[$i]["shortname"];
  242. $outXml = "<div class='dict_word'>";
  243. $outXml = $outXml."<a name='ref_dict_$dictid'></a>";
  244. $outXml = $outXml."<div class='dict'>".$Fetch[$i]["shortname"]."</div>";
  245. $outXml = $outXml."<div class='mean'>".$mean."</div>";
  246. $outXml = $outXml."</div>";
  247. echo $outXml;
  248. }
  249. }
  250. }
  251. }
  252. //去除尾查结束
  253. //模糊查
  254. //模糊查结束
  255. //查连读词
  256. if($count_return<2){
  257. echo "Junction:<br />";
  258. $newWord=array();
  259. for ($row = 0; $row < count($un); $row++) {
  260. $len=mb_strlen($un[$row][1],"UTF-8");
  261. $end=mb_substr($word, 0-$len,NULL,"UTF-8");
  262. if($end==$un[$row][1]){
  263. $base=mb_substr($word, 0,mb_strlen($word,"UTF-8")-$len,"UTF-8").$un[$row][0];
  264. $arr_un=explode("+",$base);
  265. foreach ($arr_un as $oneword)
  266. {
  267. echo "<a onclick='dict_pre_word_click(\"$oneword\")'>$oneword</a> + ";
  268. }
  269. echo "<br />";
  270. }
  271. }
  272. }
  273. //拆复合词
  274. $splitWord=$word;
  275. $part=array();
  276. if($count_return<2)
  277. {
  278. echo "<div>Try to split comp:</div>";
  279. while(($split=mySplit($splitWord))!==FALSE){
  280. array_push($part,$split[0]);
  281. $splitWord=$split[1];
  282. }
  283. if(count($part)>0){
  284. array_push($part,$splitWord);
  285. $newPart=ltrim(array_reduce($part,"myfunction"),"+");
  286. echo "<div>{$newPart}</div>";
  287. }
  288. }
  289. echo "不满意吗?试试强力拆分。";
  290. echo "<button onclick='dict_turbo_split(\"{$word}\")'>Turbo Split</button>";
  291. //拆复合词结束
  292. //查内容
  293. if($count_return<4){
  294. $word1=$org_word;
  295. $wordInMean="%$org_word%";
  296. echo "include $org_word:<br />";
  297. $query = "select dict.dict_id,dict.word,dict.mean,info.shortname from dict LEFT JOIN info ON dict.dict_id = info.id where \"mean\" like ".$PDO->quote($wordInMean)." limit 0,30";
  298. $Fetch = PDO_FetchAll($query);
  299. $iFetch=count($Fetch);
  300. $count_return+=$iFetch;
  301. if($iFetch>0){
  302. for($i=0;$i<$iFetch;$i++){
  303. $mean=$Fetch[$i]["mean"];
  304. $pos=mb_stripos($mean,$word,0,"UTF-8");
  305. if($pos){
  306. if($pos>20){
  307. $start=$pos-20;
  308. }
  309. else{
  310. $start=0;
  311. }
  312. $newmean=mb_substr($mean,$start,100,"UTF-8");
  313. }
  314. else{
  315. $newmean=$mean;
  316. }
  317. $pos=mb_stripos($newmean,$word1,0,"UTF-8");
  318. $head=mb_substr($newmean,0,$pos,"UTF-8");
  319. $mid=mb_substr($newmean,$pos,mb_strlen($word1,"UTF-8"),"UTF-8");
  320. $end=mb_substr($newmean,$pos+mb_strlen($word1,"UTF-8"),NULL,"UTF-8");
  321. $heigh_light_mean="$head<hl>$mid</hl>$end";
  322. $outXml = "<div class='dict_word'>";
  323. $outXml = $outXml."<div class='word'>".$Fetch[$i]["word"]."</div>";
  324. $outXml = $outXml."<div class='dict'>".$Fetch[$i]["shortname"]."</div>";
  325. $outXml = $outXml."<div class='mean'>".$heigh_light_mean."</div>";
  326. $outXml = $outXml."</div>";
  327. echo $outXml;
  328. }
  329. }
  330. }
  331. echo "<div id='dictlist'>";
  332. foreach($dict_list as $x=>$x_value) {
  333. echo "<a href='#ref_dict_$x'>$x_value</a>";
  334. }
  335. echo "</div>";
  336. break;
  337. }
  338. ?>