dict_find_one.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. <?php
  2. require_once "../path.php";
  3. require_once "../public/_pdo.php";
  4. require_once "../public/function.php";
  5. require_once '../ucenter/setting_function.php';
  6. require_once "../redis/function.php";
  7. $redis = redis_connect();
  8. if (isset($_GET["book"])) {
  9. $in_book = $_GET["book"];
  10. }
  11. if (isset($_GET["paragraph"])) {
  12. $in_para = $_GET["paragraph"];
  13. }
  14. if (isset($_GET["sn"])) {
  15. $in_sn = $_GET["sn"];
  16. }
  17. if (isset($_GET["type"])) {
  18. $type = $_GET["type"];
  19. } else {
  20. $type = "wbw";
  21. }
  22. if (isset($_GET["dict_name"])) {
  23. $dict_name = $_GET["dict_name"];
  24. } else {
  25. $dict_name = "";
  26. }
  27. if ($type == "part") {
  28. $lookup_loop = 3;
  29. } else {
  30. $lookup_loop = 3;
  31. }
  32. if (isset($_GET["deep"])) {
  33. $lookup_loop = $_GET["deep"];
  34. } else {
  35. $lookup_loop = 3;
  36. }
  37. $in_word = $_GET["word"];
  38. if (isset($_GET["debug"])) {
  39. $debug = true;
  40. } else {
  41. $debug = false;
  42. }
  43. if (mb_strlen($in_word) == 0) {
  44. exit;
  45. }
  46. function microtime_float()
  47. {
  48. list($usec, $sec) = explode(" ", microtime());
  49. return ((float) $usec + (float) $sec);
  50. }
  51. $time_start = microtime_float();
  52. $user_setting = get_setting();
  53. //open database
  54. global $PDO;
  55. $word_list = str_getcsv($in_word);
  56. $dict_word_spell = array();
  57. $output = array();
  58. $db_file_list = array();
  59. //词典列表
  60. if ($dict_name == "") {
  61. $db_file_list[] = array(_FILE_DB_TERM_,"dict://term",true);
  62. $db_file_list[] = array(_FILE_DB_WBW1_,"dict://user",true);
  63. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/sys_regular.db","dict://regular",true);
  64. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/sys_irregular.db","dict://irregular",true);
  65. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/union.db","dict://union",true);
  66. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/comp.db","dict://comp",true);
  67. $db_file_list[] = array( _DIR_DICT_3RD_ . "/pm.db","dict://pm",true);
  68. $db_file_list[] = array( _DIR_DICT_3RD_ . "/bhmf.db","dict://bhmf",true);
  69. $db_file_list[] = array( _DIR_DICT_3RD_ . "/shuihan.db","dict://shuihan",true);
  70. $db_file_list[] = array( _DIR_DICT_3RD_ . "/concise.db","dict://concise",true);
  71. $db_file_list[] = array( _DIR_DICT_3RD_ . "/uhan_en.db","dict://uhan_en",true);
  72. } else {
  73. $dict_list = str_getcsv($dict_name, ',');
  74. foreach ($dict_list as $dict) {
  75. $db_file_list[] = array( $dict,"");
  76. }
  77. }
  78. $_dict_db = array();
  79. foreach ($db_file_list as $db_file) {
  80. try {
  81. if ($redis && !empty($db_file[1])) {
  82. $dbh=null;
  83. }
  84. else{
  85. $dbh = new PDO("sqlite:" . $db_file[0], "", "");
  86. $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  87. }
  88. $_dict_db[] = array("file" => $db_file[0], "dbh" => $dbh,"redis"=>$db_file[1],"static"=>$db_file[2]);
  89. } catch (PDOException $e) {
  90. if ($debug) {
  91. print "Error!: " . $e->getMessage() . "<br/>";
  92. }
  93. }
  94. }
  95. $lookuped=array();
  96. for ($i = 0; $i < $lookup_loop; $i++) {
  97. $parent_list = array();
  98. # 记录已经查过的词,下次就不查了
  99. $newWordList = array();
  100. foreach ($word_list as $lsWord) {
  101. if(!isset($lookuped[$lsWord]) && !empty($lsWord)){
  102. $newWordList[]=$lsWord;
  103. $lookuped[$lsWord]=1;
  104. }
  105. }
  106. if(count($newWordList)==0){
  107. break;
  108. }
  109. $word_list = $newWordList;
  110. # 记录已经查过的词结束
  111. $strQueryWord = "("; //单词查询字串
  112. foreach ($word_list as $word) {
  113. $word = str_replace("'", "’", $word);
  114. $strQueryWord .= "'{$word}',";
  115. }
  116. $strQueryWord = mb_substr($strQueryWord, 0, mb_strlen($strQueryWord, "UTF-8") - 1, "UTF-8");
  117. $strQueryWord .= ")";
  118. if ($debug) {
  119. echo "<h2>第" . ($i + 1) . "轮查询:" . count($word_list) . "</h2>";
  120. }
  121. foreach ($_dict_db as $db_file) {
  122. if ($debug) {
  123. echo "dict connect:{$db_file["file"]}<br>";
  124. }
  125. if ($i == 0) {
  126. $query = "SELECT * from dict where \"pali\" in $strQueryWord ORDER BY id DESC";
  127. } else {
  128. $query = "SELECT * from dict where \"pali\" in $strQueryWord AND ( type <> '.n.' AND type <> '.ti.' AND type <> '.adj.' AND type <> '.pron.' AND type <> '.v.' ) ORDER BY id DESC";
  129. }
  130. if ($debug) {
  131. echo $query . "<br>";
  132. }
  133. $Fetch = array();
  134. if ($redis && !empty($db_file["redis"])) {
  135. if ($debug) {
  136. echo "<spen style='color:green;'>redis</spen>:{$db_file["redis"]}<br>";
  137. }
  138. foreach ($word_list as $word) {
  139. $wordData = $redis->hGet($db_file["redis"],$word);
  140. if($wordData){
  141. if(!empty($wordData)){
  142. $arrWord = json_decode($wordData,true);
  143. foreach ($arrWord as $one) {
  144. # code...
  145. if(count($one)==14){
  146. $Fetch[] = array("id"=>$one[0],
  147. "pali"=>$one[1],
  148. "type"=>$one[2],
  149. "gramma"=>$one[3],
  150. "parent"=>$one[4],
  151. "mean"=>$one[5],
  152. "note"=>$one[6],
  153. "parts"=>$one[7],
  154. "partmean"=>$one[8],
  155. "status"=>$one[9],
  156. "confidence"=>$one[10],
  157. "dict_name"=>$one[12],
  158. "lang"=>$one[13],
  159. );
  160. }
  161. else{
  162. $Fetch[] = array("id"=>$one[0],
  163. "pali"=>$one[1],
  164. "type"=>$one[2],
  165. "gramma"=>$one[3],
  166. "parent"=>$one[4],
  167. "mean"=>$one[5],
  168. "note"=>$one[6],
  169. "parts"=>$one[7],
  170. "partmean"=>$one[8],
  171. "status"=>$one[9],
  172. "confidence"=>$one[10],
  173. "dict_name"=>$one[12],
  174. "lang"=>"en"
  175. );
  176. }
  177. }
  178. }
  179. }
  180. else{
  181. # 没找到就不找了
  182. }
  183. }
  184. }
  185. else{
  186. if ($debug) {
  187. echo "<spen style='color:red;'>db query</spen>:{$db_file["file"]}<br>";
  188. }
  189. if ($db_file["dbh"]) {
  190. try {
  191. $stmt = $db_file["dbh"]->query($query);
  192. if ($stmt) {
  193. $Fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  194. } else {
  195. $Fetch = array();
  196. if ($debug) {
  197. echo "无效的Statement句柄";
  198. }
  199. }
  200. } catch (PDOException $e) {
  201. if ($debug) {
  202. print "Error!: " . $e->getMessage() . "<br/>";
  203. }
  204. $Fetch = array();
  205. }
  206. } else {
  207. $Fetch = array();
  208. if ($debug) {
  209. echo "无效的数据库句柄";
  210. }
  211. }
  212. }
  213. //$Fetch = PDO_FetchAll($query);
  214. $iFetch = count($Fetch);
  215. if ($debug) {
  216. echo "count:$iFetch<br>";
  217. }
  218. if ($iFetch > 0) {
  219. foreach ($Fetch as $one) {
  220. $id = $one["id"];
  221. if (isset($one["guid"])) {
  222. $guid = $one["guid"];
  223. } else {
  224. $guid = "";
  225. }
  226. if (isset($one["lang"])) {
  227. $language = $one["lang"];
  228. } else if (isset($one["language"])) {
  229. $language = $one["language"];
  230. } else {
  231. $language = "en";
  232. }
  233. $pali = $one["pali"];
  234. $dict_word_spell["{$pali}"] = 1;
  235. $type = $one["type"];
  236. $gramma = $one["gramma"];
  237. $parent = $one["parent"];
  238. //$mean = $one["mean"];
  239. if (inLangSetting($language, $user_setting["dict.lang"])) {
  240. $mean = $one["mean"];
  241. } else {
  242. $mean = "";
  243. }
  244. $note = $one["note"];
  245. if (isset($one["factors"])) {
  246. $parts = $one["factors"];
  247. } else if (isset($one["parts"])) {
  248. $parts = $one["parts"];
  249. } else {
  250. $parts = "";
  251. }
  252. if (isset($one["factormean"])) {
  253. $partmean = $one["factormean"];
  254. } else if (isset($one["partmean"])) {
  255. $partmean = $one["partmean"];
  256. } else {
  257. $partmean = "";
  258. }
  259. if (inLangSetting($language, $user_setting["dict.lang"]) == false) {
  260. $partmean = "";
  261. }
  262. $status = $one["status"];
  263. if (isset($one["confidence"])) {
  264. $confidence = $one["confidence"];
  265. } else {
  266. $confidence = 100;
  267. }
  268. if (isset($one["dict_name"])) {
  269. $dict_name = $one["dict_name"];
  270. } else {
  271. $dict_name = "";
  272. }
  273. array_push($output, array(
  274. "id" => $id,
  275. "guid" => $guid,
  276. "pali" => $pali,
  277. "type" => $type,
  278. "gramma" => $gramma,
  279. "parent" => $parent,
  280. "mean" => $mean,
  281. "note" => $note,
  282. "parts" => $parts,
  283. "partmean" => $partmean,
  284. "status" => $status,
  285. "confidence" => $confidence,
  286. "dict_name" => $dict_name,
  287. "language" => $language,
  288. ));
  289. //将语基插入下次查询的列表
  290. if (!empty($parent)) {
  291. if ($pali != $parent) {
  292. $parent_list[$parent] = 1;
  293. }
  294. }
  295. //将拆分插入下次查询的列表
  296. if ($type != ".part.") {
  297. if (!empty($parts)) {
  298. $wordparts = str_getcsv($parts, '+');
  299. foreach ($wordparts as $x) {
  300. if (!empty($x)) {
  301. if ($x != $pali) {
  302. $parent_list[$x] = 1;
  303. }
  304. }
  305. }
  306. }
  307. }
  308. }
  309. }
  310. }
  311. /*
  312. if($i==0){
  313. //自动查找单词词干
  314. $word_base=getPaliWordBase($in_word);
  315. foreach($word_base as $x=>$infolist){
  316. foreach($infolist as $gramma){
  317. array_push($output,
  318. array("pali"=>$in_word,
  319. "parent"=>$x,
  320. "type"=>$gramma["type"],
  321. "gramma"=>$gramma["gramma"],
  322. "parts"=>$gramma["parts"],
  323. "partmean"=>"",
  324. "mean"=>"",
  325. "language"=>"en",
  326. "dict_name"=>"auto",
  327. "status"=>128
  328. ));
  329. $part_list=str_getcsv($gramma["parts"],"+");
  330. foreach($part_list as $part){
  331. $parent_list[$part]=1;
  332. }
  333. }
  334. }
  335. }
  336. */
  337. if ($debug) {
  338. echo "parent:" . count($parent_list) . "<br>";
  339. print_r($parent_list) . "<br>";
  340. }
  341. if (count($parent_list) == 0) {
  342. break;
  343. } else {
  344. $word_list = array();
  345. foreach ($parent_list as $x => $value) {
  346. array_push($word_list, $x);
  347. }
  348. }
  349. }
  350. //删除无效数据
  351. $newOutput = array();
  352. foreach ($output as $value) {
  353. if ($value["dict_name"] == "auto") {
  354. if (isset($dict_word_spell["{$value["parent"]}"])) {
  355. array_push($newOutput, $value);
  356. }
  357. } else {
  358. array_push($newOutput, $value);
  359. }
  360. }
  361. if ($debug) {
  362. echo "<textarea width=\"100%\" >";
  363. }
  364. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  365. if ($debug) {
  366. echo "</textarea>";
  367. }
  368. if ($debug) {
  369. echo "生成:" . count($output) . "<br>";
  370. echo "有效:" . count($newOutput) . "<br>";
  371. foreach ($newOutput as $result) {
  372. echo "{$result["pali"]}-{$result["parent"]}-{$result["mean"]}<br>";
  373. }
  374. $queryTime = (microtime_float() - $time_start) * 1000;
  375. echo "<div >搜索时间:$queryTime 毫秒</div>";
  376. }