dict_find3.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. <?php
  2. //查询参考字典
  3. require_once '../public/casesuf.inc';
  4. require_once 'dict_find_un.inc';
  5. require_once 'sandhi.php';
  6. require_once "../path.php";
  7. require_once "../public/_pdo.php";
  8. require_once '../public/load_lang.php';
  9. $op = $_GET["op"];
  10. $word = mb_strtolower($_GET["word"], 'UTF-8');
  11. $org_word = $word;
  12. $count_return = 0;
  13. $dict_list = array();
  14. global $PDO;
  15. function myfunction($v1, $v2)
  16. {
  17. return $v1 . "+" . $v2;
  18. }
  19. /*
  20. 查找某个单词是否在现有词典出现
  21. */
  22. function isExsit($word)
  23. {
  24. global $PDO;
  25. $query = "select count(*) as co from dict where \"word\" = " . $PDO->quote($word);
  26. $row = PDO_FetchOne($query);
  27. if ($row[0] == 0) {
  28. return false;
  29. } else {
  30. return true;
  31. }
  32. }
  33. /*
  34. *自动拆分复合词
  35. *功能:将一个单词拆分为两个部分
  36. *输入:想要拆的词
  37. *输出:数组,第一个为前半部分,第二个为后半部分,前半部分是在现有字典里搜索到的。
  38. *范例:
  39. while(($split=mySplit($splitWord))!==FALSE){
  40. array_push($part,$split[0]);
  41. $splitWord=$split[1];
  42. }
  43. 循环结束后$part里放的就是拆分结果
  44. 算法:从最后一个字母开始,一次去掉一个字母,然后在现有字典里搜索剩余的部分(前半部分)
  45. 如果搜索到,就返回。第二次,将剩余的部分,也就是后半部分应用相同的算法。
  46. 直到单词长度小于5
  47. 中间考虑了连音规则:
  48. ~a+i~=~i~
  49. 在拆分的时候要补上前面的元音
  50. 有时后面的词第一个辅音会重复
  51. word+tha~=wordttha~
  52. 需要去掉后面的单词的一个辅音
  53. */
  54. function mySplit($strWord)
  55. {
  56. $doubleword = "kkggccjjṭṭḍḍttddppbb";
  57. $len = mb_strlen($strWord, "UTF-8");
  58. if ($len > 5) {
  59. for ($i = $len - 1; $i > 3; $i--) {
  60. $str1 = mb_substr($strWord, 0, $i, "UTF-8");
  61. $str2 = mb_substr($strWord, $i, null, "UTF-8");
  62. if (isExsit($str1)) {
  63. //如果字典里存在,返回拆分结果
  64. $left2 = mb_substr($str2, 0, 2, "UTF-8");
  65. //如果第二个部分有双辅音,去掉第一个辅音。因为巴利语中没有以双辅音开头的单词。
  66. if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
  67. $str2 = mb_substr($str2, 1, null, "UTF-8");
  68. }
  69. return array($str1, $str2);
  70. } else {
  71. //补上结尾的a再次查找
  72. $str1 = $str1 . "a";
  73. if (isExsit($str1)) {
  74. $left2 = mb_substr($str2, 0, 2, "UTF-8");
  75. if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
  76. $str2 = mb_substr($str2, 1, null, "UTF-8");
  77. }
  78. return array($str1, $str2);
  79. }
  80. }
  81. }
  82. //如果没找到。将ā变为a后再找。因为两个a复合后会变成ā
  83. if (mb_substr($strWord, 0, 1, "UTF-8") == "ā") {
  84. $strWord = 'a' . mb_substr($strWord, 1, null, "UTF-8");
  85. for ($i = $len - 1; $i > 3; $i--) {
  86. $str1 = mb_substr($strWord, 0, $i, "UTF-8");
  87. $str2 = mb_substr($strWord, $i, null, "UTF-8");
  88. //echo "$str1 + $str2 = ";
  89. if (isExsit($str1)) {
  90. //echo "match";
  91. $left2 = mb_substr($str2, 0, 2, "UTF-8");
  92. if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
  93. $str2 = mb_substr($str2, 1, null, "UTF-8");
  94. }
  95. return array($str1, $str2);
  96. } else {
  97. $str1 = $str1 . "a";
  98. if (isExsit($str1)) {
  99. //echo "match";
  100. $left2 = mb_substr($str2, 0, 2, "UTF-8");
  101. if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
  102. $str2 = mb_substr($str2, 1, null, "UTF-8");
  103. }
  104. return array($str1, $str2);
  105. }
  106. }
  107. }
  108. }
  109. //如果没找到将开头的e变为i再次查找
  110. if (mb_substr($strWord, 0, 1, "UTF-8") == "e") {
  111. $strWord = 'i' . mb_substr($strWord, 1, null, "UTF-8");
  112. for ($i = $len - 1; $i > 3; $i--) {
  113. $str1 = mb_substr($strWord, 0, $i, "UTF-8");
  114. $str2 = mb_substr($strWord, $i, null, "UTF-8");
  115. if (isExsit($str1)) {
  116. //echo "match";
  117. $left2 = mb_substr($str2, 0, 2, "UTF-8");
  118. if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
  119. $str2 = mb_substr($str2, 1, null, "UTF-8");
  120. }
  121. return array($str1, $str2);
  122. } else {
  123. $str1 = $str1 . "a";
  124. if (isExsit($str1)) {
  125. $left2 = mb_substr($str2, 0, 2, "UTF-8");
  126. if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
  127. $str2 = mb_substr($str2, 1, null, "UTF-8");
  128. }
  129. return array($str1, $str2);
  130. }
  131. }
  132. }
  133. }
  134. }
  135. return (false);
  136. }
  137. function mySplit2($strWord)
  138. {
  139. $output = array();
  140. $len = mb_strlen($strWord, "UTF-8");
  141. if ($len > 2) {
  142. for ($i = $len - 1; $i > 1; $i--) {
  143. foreach ($sandhi as $row) {
  144. if (mb_substr($strWord, $i, $row[3], "UTF-8") == $row[2]) {
  145. $str1 = mb_substr($strWord, 0, $i - 1, "UTF-8") . $row[0];
  146. $str2 = $row[1] . mb_substr($strWord, $i + $row[2], null, "UTF-8");
  147. if (isExsit($str1)) {
  148. array_push($output, array($str1, $str2));
  149. }
  150. }
  151. }
  152. }
  153. }
  154. return ($output);
  155. }
  156. switch ($op) {
  157. case "pre": //预查询
  158. PDO_Connect(_FILE_DB_REF_INDEX_);
  159. echo "<wordlist>";
  160. $query = "select word,count from dict where \"eword\" like " . $PDO->quote($word . '%') . " OR \"word\" like " . $PDO->quote($word . '%') . " limit 0,100";
  161. $Fetch = PDO_FetchAll($query);
  162. $iFetch = count($Fetch);
  163. if ($iFetch > 0) {
  164. for ($i = 0; $i < $iFetch; $i++) {
  165. $outXml = "<word>";
  166. $word = $Fetch[$i]["word"];
  167. $outXml = $outXml . "<pali>$word</pali>";
  168. $outXml = $outXml . "<count>" . $Fetch[$i]["count"] . "</count>";
  169. $outXml = $outXml . "</word>";
  170. echo $outXml;
  171. }
  172. }
  173. echo "</wordlist>";
  174. break;
  175. case "search":
  176. PDO_Connect(_FILE_DB_REF_);
  177. //直接查询
  178. $query = "select dict.dict_id,dict.mean,info.shortname from dict LEFT JOIN info ON dict.dict_id = info.id where \"word\" = " . $PDO->quote($word) . " limit 0,30";
  179. $Fetch = PDO_FetchAll($query);
  180. $iFetch = count($Fetch);
  181. $count_return += $iFetch;
  182. if ($iFetch > 0) {
  183. for ($i = 0; $i < $iFetch; $i++) {
  184. $mean = $Fetch[$i]["mean"];
  185. $mean = str_replace("[[", "<a onclick=\"dict_jump(this)\">", $mean);
  186. $mean = str_replace("]]", "</a>", $mean);
  187. $dictid = $Fetch[$i]["dict_id"];
  188. $dict_list[$dictid] = $Fetch[$i]["shortname"];
  189. $outXml = "<div class='dict_word'>";
  190. $outXml = $outXml . "<a name='ref_dict_$dictid'></a>";
  191. $outXml = $outXml . "<div class='dict'>" . $Fetch[$i]["shortname"] . "</div>";
  192. $outXml = $outXml . "<div class='mean'>{$mean}</div>";
  193. $outXml = $outXml . "</div>";
  194. echo $outXml;
  195. }
  196. }
  197. if (substr($word, 0, 1) == "_" && substr($word, -1, 1) == "_") {
  198. echo "<div id='dictlist'>";
  199. foreach ($dict_list as $x => $x_value) {
  200. echo "<a href='#ref_dict_$x'>$x_value</a>";
  201. }
  202. echo "</div>";
  203. break;
  204. }
  205. //去除尾查
  206. $newWord = array();
  207. for ($row = 0; $row < count($case); $row++) {
  208. $len = mb_strlen($case[$row][1], "UTF-8");
  209. $end = mb_substr($word, 0 - $len, null, "UTF-8");
  210. if ($end == $case[$row][1]) {
  211. $base = mb_substr($word, 0, mb_strlen($word, "UTF-8") - $len, "UTF-8") . $case[$row][0];
  212. if ($base != $word) {
  213. $gr = "<a onclick=\"dict_jump(this)\">" . str_replace("$", "</a> &nbsp;&nbsp;<a onclick=\"dict_jump(this)\">", $case[$row][2]) . "</a>";
  214. if (isset($newWord[$base])) {
  215. $newWord[$base] .= "<br />" . $gr;
  216. } else {
  217. $newWord[$base] = $gr;
  218. }
  219. }
  220. }
  221. }
  222. if (count($newWord) > 0) {
  223. foreach ($newWord as $x => $x_value) {
  224. $query = "select dict.dict_id,dict.mean,info.shortname from dict LEFT JOIN info ON dict.dict_id = info.id where \"word\" = " . $PDO->quote($x) . " limit 0,30";
  225. $Fetch = PDO_FetchAll($query);
  226. $iFetch = count($Fetch);
  227. $count_return += $iFetch;
  228. if ($iFetch > 0) {
  229. //语法信息
  230. foreach ($_local->grammastr as $gr) {
  231. $x_value = str_replace($gr->id, $gr->value, $x_value);
  232. }
  233. echo $x . ":<div class='dict_find_gramma'>" . $x_value . "</div>";
  234. for ($i = 0; $i < $iFetch; $i++) {
  235. $mean = $Fetch[$i]["mean"];
  236. $dictid = $Fetch[$i]["dict_id"];
  237. $dict_list[$dictid] = $Fetch[$i]["shortname"];
  238. $outXml = "<div class='dict_word'>";
  239. $outXml = $outXml . "<a name='ref_dict_$dictid'></a>";
  240. $outXml = $outXml . "<div class='dict'>" . $Fetch[$i]["shortname"] . "</div>";
  241. $outXml = $outXml . "<div class='mean'>" . $mean . "</div>";
  242. $outXml = $outXml . "</div>";
  243. echo $outXml;
  244. }
  245. }
  246. }
  247. }
  248. //去除尾查结束
  249. //模糊查
  250. //模糊查结束
  251. //查连读词
  252. if ($count_return < 2) {
  253. echo "Junction:<br />";
  254. $newWord = array();
  255. for ($row = 0; $row < count($un); $row++) {
  256. $len = mb_strlen($un[$row][1], "UTF-8");
  257. $end = mb_substr($word, 0 - $len, null, "UTF-8");
  258. if ($end == $un[$row][1]) {
  259. $base = mb_substr($word, 0, mb_strlen($word, "UTF-8") - $len, "UTF-8") . $un[$row][0];
  260. $arr_un = explode("+", $base);
  261. foreach ($arr_un as $oneword) {
  262. echo "<a onclick='dict_pre_word_click(\"$oneword\")'>$oneword</a> + ";
  263. }
  264. echo "<br />";
  265. }
  266. }
  267. }
  268. //拆复合词
  269. $splitWord = $word;
  270. $part = array();
  271. if ($count_return < 2) {
  272. echo "<div>Try to split comp:</div>";
  273. while (($split = mySplit($splitWord)) !== false) {
  274. array_push($part, $split[0]);
  275. $splitWord = $split[1];
  276. }
  277. if (count($part) > 0) {
  278. array_push($part, $splitWord);
  279. $newPart = ltrim(array_reduce($part, "myfunction"), "+");
  280. echo "<div>{$newPart}</div>";
  281. }
  282. }
  283. echo "不满意吗?试试强力拆分。";
  284. echo "<button onclick='dict_turbo_split(\"{$word}\")'>Turbo Split</button>";
  285. //拆复合词结束
  286. //查内容
  287. if ($count_return < 4) {
  288. $word1 = $org_word;
  289. $wordInMean = "%$org_word%";
  290. echo "include $org_word:<br />";
  291. $query = "select dict.dict_id,dict.word,dict.mean,info.shortname from dict LEFT JOIN info ON dict.dict_id = info.id where \"mean\" like " . $PDO->quote($wordInMean) . " limit 0,30";
  292. $Fetch = PDO_FetchAll($query);
  293. $iFetch = count($Fetch);
  294. $count_return += $iFetch;
  295. if ($iFetch > 0) {
  296. for ($i = 0; $i < $iFetch; $i++) {
  297. $mean = $Fetch[$i]["mean"];
  298. $pos = mb_stripos($mean, $word, 0, "UTF-8");
  299. if ($pos) {
  300. if ($pos > 20) {
  301. $start = $pos - 20;
  302. } else {
  303. $start = 0;
  304. }
  305. $newmean = mb_substr($mean, $start, 100, "UTF-8");
  306. } else {
  307. $newmean = $mean;
  308. }
  309. $pos = mb_stripos($newmean, $word1, 0, "UTF-8");
  310. $head = mb_substr($newmean, 0, $pos, "UTF-8");
  311. $mid = mb_substr($newmean, $pos, mb_strlen($word1, "UTF-8"), "UTF-8");
  312. $end = mb_substr($newmean, $pos + mb_strlen($word1, "UTF-8"), null, "UTF-8");
  313. $heigh_light_mean = "$head<hl>$mid</hl>$end";
  314. $outXml = "<div class='dict_word'>";
  315. $outXml = $outXml . "<div class='word'>" . $Fetch[$i]["word"] . "</div>";
  316. $outXml = $outXml . "<div class='dict'>" . $Fetch[$i]["shortname"] . "</div>";
  317. $outXml = $outXml . "<div class='mean'>" . $heigh_light_mean . "</div>";
  318. $outXml = $outXml . "</div>";
  319. echo $outXml;
  320. }
  321. }
  322. }
  323. echo "<div id='dictlist'>";
  324. foreach ($dict_list as $x => $x_value) {
  325. echo "<a href='#ref_dict_$x'>$x_value</a>";
  326. }
  327. echo "</div>";
  328. break;
  329. }