dict_find_one.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. <?php
  2. require_once "../path.php";
  3. require_once "../public/_pdo.php";
  4. require_once "../public/function.php";
  5. require_once '../ucenter/setting_function.php';
  6. require_once "../redis/function.php";
  7. $redis = redis_connect();
  8. //$redis = false;
  9. if (isset($_GET["book"])) {
  10. $in_book = $_GET["book"];
  11. }
  12. if (isset($_GET["paragraph"])) {
  13. $in_para = $_GET["paragraph"];
  14. }
  15. if (isset($_GET["sn"])) {
  16. $in_sn = $_GET["sn"];
  17. }
  18. if (isset($_GET["type"])) {
  19. $type = $_GET["type"];
  20. } else {
  21. $type = "wbw";
  22. }
  23. if (isset($_GET["dict_name"])) {
  24. $dict_name = $_GET["dict_name"];
  25. } else {
  26. $dict_name = "";
  27. }
  28. if ($type == "part") {
  29. $lookup_loop = 3;
  30. } else {
  31. $lookup_loop = 3;
  32. }
  33. if (isset($_GET["deep"])) {
  34. $lookup_loop = $_GET["deep"];
  35. } else {
  36. $lookup_loop = 3;
  37. }
  38. $in_word = $_GET["word"];
  39. if (isset($_GET["debug"])) {
  40. $debug = true;
  41. } else {
  42. $debug = false;
  43. }
  44. if (mb_strlen($in_word) == 0) {
  45. exit;
  46. }
  47. function microtime_float()
  48. {
  49. list($usec, $sec) = explode(" ", microtime());
  50. return ((float) $usec + (float) $sec);
  51. }
  52. $time_start = microtime_float();
  53. $user_setting = get_setting();
  54. //open database
  55. global $PDO;
  56. $word_list = str_getcsv($in_word);
  57. $dict_word_spell = array();
  58. $output = array();
  59. $db_file_list = array();
  60. //用户词典
  61. if ($dict_name == "") {
  62. $db_file_list[] = array(_FILE_DB_WBW1_,"dict://user",false);
  63. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/sys_regular.db","dict://regular",true);
  64. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/sys_irregular.db","dict://irregular",true);
  65. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/union.db","dict://union",true);
  66. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/comp.db","dict://comp",true);
  67. $db_file_list[] = array( _DIR_DICT_3RD_ . "/pm.db","dict://pm",true);
  68. $db_file_list[] = array( _DIR_DICT_3RD_ . "/bhmf.db","dict://bhmf",true);
  69. $db_file_list[] = array( _DIR_DICT_3RD_ . "/shuihan.db","dict://shuihan",true);
  70. $db_file_list[] = array( _DIR_DICT_3RD_ . "/concise.db","dict://concise",true);
  71. $db_file_list[] = array( _DIR_DICT_3RD_ . "/uhan_en.db","dict://uhan_en",true);
  72. } else {
  73. $dict_list = str_getcsv($dict_name, ',');
  74. foreach ($dict_list as $dict) {
  75. $db_file_list[] = array( $dict,"");
  76. }
  77. }
  78. $_dict_db = array();
  79. foreach ($db_file_list as $db_file) {
  80. try {
  81. $dbh = new PDO("sqlite:" . $db_file[0], "", "");
  82. $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  83. $_dict_db[] = array("file" => $db_file[0], "dbh" => $dbh,"redis"=>$db_file[1],"static"=>$db_file[2]);
  84. } catch (PDOException $e) {
  85. if ($debug) {
  86. print "Error!: " . $e->getMessage() . "<br/>";
  87. }
  88. }
  89. }
  90. $lookuped=array();
  91. for ($i = 0; $i < $lookup_loop; $i++) {
  92. $parent_list = array();
  93. $newWordList = array();
  94. foreach ($word_list as $lsWord) {
  95. # 记录已经查过的词,下次就不查了
  96. if(!isset($lookuped[$lsWord]) && !empty($lsWord)){
  97. $newWordList[]=$lsWord;
  98. $lookuped[$lsWord]=1;
  99. }
  100. }
  101. if(count($newWordList)==0){
  102. break;
  103. }
  104. $word_list = $newWordList;
  105. $strQueryWord = "("; //单词查询字串
  106. foreach ($word_list as $word) {
  107. $word = str_replace("'", "’", $word);
  108. $strQueryWord .= "'{$word}',";
  109. }
  110. $strQueryWord = mb_substr($strQueryWord, 0, mb_strlen($strQueryWord, "UTF-8") - 1, "UTF-8");
  111. $strQueryWord .= ")";
  112. if ($debug) {
  113. echo "<h2>第" . ($i + 1) . "轮查询:" . count($word_list) . "</h2>";
  114. }
  115. foreach ($_dict_db as $db_file) {
  116. if ($debug) {
  117. echo "dict connect:{$db_file["file"]}<br>";
  118. }
  119. if ($i == 0) {
  120. $query = "SELECT * from dict where \"pali\" in $strQueryWord ORDER BY id DESC";
  121. } else {
  122. $query = "SELECT * from dict where \"pali\" in $strQueryWord AND ( type <> '.n.' AND type <> '.ti.' AND type <> '.adj.' AND type <> '.pron.' AND type <> '.v.' ) ORDER BY id DESC";
  123. }
  124. if ($debug) {
  125. echo $query . "<br>";
  126. }
  127. $Fetch = array();
  128. if ($redis && !empty($db_file["redis"])) {
  129. if ($debug) {
  130. echo "<spen style='color:green;'>redis</spen>:{$db_file["redis"]}<br>";
  131. }
  132. foreach ($word_list as $word) {
  133. $wordData = $redis->hGet($db_file["redis"],$word);
  134. if($wordData){
  135. if(!empty($wordData)){
  136. $arrWord = json_decode($wordData,true);
  137. foreach ($arrWord as $one) {
  138. # code...
  139. $Fetch[] = array("id"=>$one[0],
  140. "pali"=>$one[1],
  141. "type"=>$one[2],
  142. "gramma"=>$one[3],
  143. "parent"=>$one[4],
  144. "mean"=>$one[5],
  145. "note"=>$one[6],
  146. "parts"=>$one[7],
  147. "partmean"=>$one[8],
  148. "status"=>$one[9],
  149. "confidence"=>$one[10],
  150. "dict_name"=>$one[12],
  151. "lang"=>$one[13],
  152. );
  153. }
  154. }
  155. }
  156. else{
  157. /*
  158. if($db_file["static"]==false){
  159. try {
  160. if ($debug) {
  161. echo "<spen style='color:red;'>db query</spen>:{$word} in {$db_file["file"]}<br>";
  162. }
  163. $query = "SELECT * from dict where pali = ? ORDER BY id DESC";
  164. $stmt = $db_file["dbh"]->prepare($query);
  165. $stmt->execute(array($word));
  166. if ($stmt) {
  167. $Fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  168. $redisWord=array();
  169. foreach ($Fetch as $one) {
  170. # code...
  171. $redisWord[] = array($one["id"],
  172. $one["pali"],
  173. $one["type"],
  174. $one["gramma"],
  175. $one["parent"],
  176. $one["mean"],
  177. $one["note"],
  178. $one["factors"],
  179. $one["factormean"],
  180. $one["status"],
  181. $one["confidence"],
  182. 1,
  183. $one["dict_name"],
  184. $one["language"]
  185. );
  186. }
  187. $redis->hSet($db_file["redis"],$word,json_encode($redisWord,JSON_UNESCAPED_UNICODE));
  188. } else {
  189. $Fetch = array();
  190. if ($debug) {
  191. echo "无效的Statement句柄";
  192. }
  193. }
  194. } catch (PDOException $e) {
  195. if ($debug) {
  196. print "Error!: " . $e->getMessage() . "<br/>";
  197. }
  198. $Fetch = array();
  199. }
  200. }
  201. */
  202. }
  203. }
  204. }
  205. else{
  206. if ($debug) {
  207. echo "<spen style='color:red;'>db query</spen>:{$db_file["file"]}<br>";
  208. }
  209. if ($db_file["dbh"]) {
  210. try {
  211. $stmt = $db_file["dbh"]->query($query);
  212. if ($stmt) {
  213. $Fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  214. } else {
  215. $Fetch = array();
  216. if ($debug) {
  217. echo "无效的Statement句柄";
  218. }
  219. }
  220. } catch (PDOException $e) {
  221. if ($debug) {
  222. print "Error!: " . $e->getMessage() . "<br/>";
  223. }
  224. $Fetch = array();
  225. }
  226. } else {
  227. $Fetch = array();
  228. if ($debug) {
  229. echo "无效的数据库句柄";
  230. }
  231. }
  232. }
  233. //$Fetch = PDO_FetchAll($query);
  234. $iFetch = count($Fetch);
  235. if ($debug) {
  236. echo "count:$iFetch<br>";
  237. }
  238. if ($iFetch > 0) {
  239. foreach ($Fetch as $one) {
  240. $id = $one["id"];
  241. if (isset($one["guid"])) {
  242. $guid = $one["guid"];
  243. } else {
  244. $guid = "";
  245. }
  246. if (isset($one["lang"])) {
  247. $language = substr($one["lang"],0,2);
  248. } else if (isset($one["language"])) {
  249. $language = substr($one["language"],0,2);
  250. } else {
  251. $language = "en";
  252. }
  253. $pali = $one["pali"];
  254. $dict_word_spell["{$pali}"] = 1;
  255. $type = $one["type"];
  256. $gramma = $one["gramma"];
  257. $parent = $one["parent"];
  258. //$mean = $one["mean"];
  259. if (inLangSetting($language, $user_setting["dict.lang"])) {
  260. $mean = $one["mean"];
  261. } else {
  262. $mean = "";
  263. }
  264. $note = $one["note"];
  265. if (isset($one["factors"])) {
  266. $parts = $one["factors"];
  267. } else if (isset($one["parts"])) {
  268. $parts = $one["parts"];
  269. } else {
  270. $parts = "";
  271. }
  272. if (isset($one["factormean"])) {
  273. $partmean = $one["factormean"];
  274. } else if (isset($one["partmean"])) {
  275. $partmean = $one["partmean"];
  276. } else {
  277. $partmean = "";
  278. }
  279. if (inLangSetting($language, $user_setting["dict.lang"]) == false) {
  280. $partmean = "";
  281. }
  282. $status = $one["status"];
  283. if (isset($one["confidence"])) {
  284. $confidence = $one["confidence"];
  285. } else {
  286. $confidence = 100;
  287. }
  288. if (isset($one["dict_name"])) {
  289. $dict_name = $one["dict_name"];
  290. } else {
  291. $dict_name = "";
  292. }
  293. array_push($output, array(
  294. "id" => $id,
  295. "guid" => $guid,
  296. "pali" => $pali,
  297. "type" => $type,
  298. "gramma" => $gramma,
  299. "parent" => $parent,
  300. "mean" => $mean,
  301. "note" => $note,
  302. "parts" => $parts,
  303. "partmean" => $partmean,
  304. "status" => $status,
  305. "confidence" => $confidence,
  306. "dict_name" => $dict_name,
  307. "language" => $language,
  308. ));
  309. //将语基插入下次查询的列表
  310. if (!empty($parent)) {
  311. if ($pali != $parent) {
  312. $parent_list[$parent] = 1;
  313. }
  314. }
  315. //将拆分插入下次查询的列表
  316. if ($type != ".part.") {
  317. if (!empty($parts)) {
  318. $wordparts = str_getcsv($parts, '+');
  319. foreach ($wordparts as $x) {
  320. if (!empty($x)) {
  321. if ($x != $pali) {
  322. $parent_list[$x] = 1;
  323. }
  324. }
  325. }
  326. }
  327. }
  328. }
  329. }
  330. }
  331. /*
  332. if($i==0){
  333. //自动查找单词词干
  334. $word_base=getPaliWordBase($in_word);
  335. foreach($word_base as $x=>$infolist){
  336. foreach($infolist as $gramma){
  337. array_push($output,
  338. array("pali"=>$in_word,
  339. "parent"=>$x,
  340. "type"=>$gramma["type"],
  341. "gramma"=>$gramma["gramma"],
  342. "parts"=>$gramma["parts"],
  343. "partmean"=>"",
  344. "mean"=>"",
  345. "language"=>"en",
  346. "dict_name"=>"auto",
  347. "status"=>128
  348. ));
  349. $part_list=str_getcsv($gramma["parts"],"+");
  350. foreach($part_list as $part){
  351. $parent_list[$part]=1;
  352. }
  353. }
  354. }
  355. }
  356. */
  357. if ($debug) {
  358. echo "parent:" . count($parent_list) . "<br>";
  359. print_r($parent_list) . "<br>";
  360. }
  361. if (count($parent_list) == 0) {
  362. break;
  363. } else {
  364. $word_list = array();
  365. foreach ($parent_list as $x => $value) {
  366. array_push($word_list, $x);
  367. }
  368. }
  369. }
  370. //删除无效数据
  371. $newOutput = array();
  372. foreach ($output as $value) {
  373. if ($value["dict_name"] == "auto") {
  374. if (isset($dict_word_spell["{$value["parent"]}"])) {
  375. array_push($newOutput, $value);
  376. }
  377. } else {
  378. array_push($newOutput, $value);
  379. }
  380. }
  381. if ($debug) {
  382. echo "<textarea width=\"100%\" >";
  383. }
  384. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  385. if ($debug) {
  386. echo "</textarea>";
  387. }
  388. if ($debug) {
  389. echo "生成:" . count($output) . "<br>";
  390. echo "有效:" . count($newOutput) . "<br>";
  391. foreach ($newOutput as $result) {
  392. echo "{$result["pali"]}-{$result["parent"]}-{$result["mean"]}<br>";
  393. }
  394. $queryTime = (microtime_float() - $time_start) * 1000;
  395. echo "<div >搜索时间:$queryTime 毫秒</div>";
  396. }