dict_find_one.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. <?php
  2. include("../log/pref_log.php");
  3. require_once __DIR__."/../config.php";
  4. require_once "../public/_pdo.php";
  5. require_once "../public/function.php";
  6. require_once '../ucenter/setting_function.php';
  7. require_once "../redis/function.php";
  8. $redis = redis_connect();
  9. if (isset($_GET["book"])) {
  10. $in_book = $_GET["book"];
  11. }
  12. if (isset($_GET["paragraph"])) {
  13. $in_para = $_GET["paragraph"];
  14. }
  15. if (isset($_GET["sn"])) {
  16. $in_sn = $_GET["sn"];
  17. }
  18. if (isset($_GET["type"])) {
  19. $type = $_GET["type"];
  20. } else {
  21. $type = "wbw";
  22. }
  23. if (isset($_GET["dict_name"])) {
  24. $dict_name = $_GET["dict_name"];
  25. } else {
  26. $dict_name = "";
  27. }
  28. if ($type == "part") {
  29. $lookup_loop = 3;
  30. } else {
  31. $lookup_loop = 3;
  32. }
  33. if (isset($_GET["deep"])) {
  34. $lookup_loop = $_GET["deep"];
  35. } else {
  36. $lookup_loop = 3;
  37. }
  38. $in_word = $_GET["word"];
  39. if (isset($_GET["debug"])) {
  40. $debug = true;
  41. } else {
  42. $debug = false;
  43. }
  44. if (mb_strlen($in_word) == 0) {
  45. echo json_encode(array(), JSON_UNESCAPED_UNICODE);
  46. exit;
  47. }
  48. function microtime_float()
  49. {
  50. list($usec, $sec) = explode(" ", microtime());
  51. return ((float) $usec + (float) $sec);
  52. }
  53. $time_start = microtime_float();
  54. $user_setting = get_setting();
  55. //open database
  56. global $PDO;
  57. $word_list = str_getcsv($in_word);
  58. $dict_word_spell = array();
  59. $output = array();
  60. $db_file_list = array();
  61. //词典列表
  62. if ($dict_name == "") {
  63. $db_file_list[] = array(_FILE_DB_TERM_,"dict://term",true);
  64. $db_file_list[] = array(_FILE_DB_WBW1_,Redis["prefix"]."dict/user",true);
  65. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/sys_regular.db","dict://regular",true);
  66. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/sys_irregular.db","dict://irregular",true);
  67. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/union.db","dict://union",true);
  68. $db_file_list[] = array( _DIR_DICT_SYSTEM_ . "/comp.db","dict://comp",true);
  69. $db_file_list[] = array( _DIR_DICT_3RD_ . "/pm.db","dict://pm",true);
  70. $db_file_list[] = array( _DIR_DICT_3RD_ . "/bhmf.db","dict://bhmf",true);
  71. $db_file_list[] = array( _DIR_DICT_3RD_ . "/shuihan.db","dict://shuihan",true);
  72. $db_file_list[] = array( _DIR_DICT_3RD_ . "/concise.db","dict://concise",true);
  73. $db_file_list[] = array( _DIR_DICT_3RD_ . "/uhan_en.db","dict://uhan_en",true);
  74. $db_file_list[] = array( _DIR_DICT_3RD_ . "/uhan_en.db","dict://uhausein",true);
  75. } else {
  76. $dict_list = str_getcsv($dict_name, ',');
  77. foreach ($dict_list as $dict) {
  78. $db_file_list[] = array( $dict,"",false);
  79. }
  80. }
  81. $_dict_db = array();
  82. foreach ($db_file_list as $db_file) {
  83. try {
  84. if ($redis && !empty($db_file[1])) {
  85. $dbh=null;
  86. }
  87. else{
  88. $dbh = new PDO("sqlite:" . $db_file[0], "", "");
  89. $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  90. }
  91. $_dict_db[] = array("file" => $db_file[0], "dbh" => $dbh,"redis"=>$db_file[1],"static"=>$db_file[2]);
  92. } catch (PDOException $e) {
  93. if ($debug) {
  94. print "Error!: " . $e->getMessage() . "<br/>";
  95. }
  96. }
  97. }
  98. $lookuped=array();
  99. for ($i = 0; $i < $lookup_loop; $i++) {
  100. $parent_list = array();
  101. # 记录已经查过的词,下次就不查了
  102. $newWordList = array();
  103. foreach ($word_list as $lsWord) {
  104. if(!isset($lookuped[$lsWord]) && !empty($lsWord)){
  105. $newWordList[]=$lsWord;
  106. $lookuped[$lsWord]=1;
  107. }
  108. }
  109. if(count($newWordList)==0){
  110. break;
  111. }
  112. $word_list = $newWordList;
  113. # 记录已经查过的词结束
  114. $strQueryWord = "("; //单词查询字串
  115. foreach ($word_list as $word) {
  116. $word = str_replace("'", "’", $word);
  117. $strQueryWord .= "'{$word}',";
  118. }
  119. $strQueryWord = mb_substr($strQueryWord, 0, mb_strlen($strQueryWord, "UTF-8") - 1, "UTF-8");
  120. $strQueryWord .= ")";
  121. if ($debug) {
  122. echo "<h2>第" . ($i + 1) . "轮查询:" . count($word_list) . "</h2>";
  123. }
  124. foreach ($_dict_db as $db_file) {
  125. if ($debug) {
  126. echo "dict connect:{$db_file["file"]}<br>";
  127. }
  128. if ($i == 0) {
  129. $query = "SELECT * from dict where \"pali\" in $strQueryWord ORDER BY id DESC";
  130. } else {
  131. $query = "SELECT * from dict where \"pali\" in $strQueryWord AND ( type <> '.n.' AND type <> '.ti.' AND type <> '.adj.' AND type <> '.pron.' AND type <> '.v.' ) ORDER BY id DESC";
  132. }
  133. if ($debug) {
  134. echo $query . "<br>";
  135. }
  136. $Fetch = array();
  137. if ($redis && !empty($db_file["redis"])) {
  138. if ($debug) {
  139. echo "<spen style='color:green;'>redis</spen>:{$db_file["redis"]}<br>";
  140. }
  141. foreach ($word_list as $word) {
  142. $wordData = $redis->hGet($db_file["redis"],$word);
  143. if($wordData){
  144. if(!empty($wordData)){
  145. $arrWord = json_decode($wordData,true);
  146. foreach ($arrWord as $one) {
  147. # code...
  148. if(count($one)==14){
  149. $Fetch[] = array("id"=>$one[0],
  150. "pali"=>$one[1],
  151. "type"=>$one[2],
  152. "gramma"=>$one[3],
  153. "parent"=>$one[4],
  154. "mean"=>$one[5],
  155. "note"=>$one[6],
  156. "parts"=>$one[7],
  157. "partmean"=>$one[8],
  158. "status"=>$one[9],
  159. "confidence"=>$one[10],
  160. "dict_name"=>$one[12],
  161. "lang"=>$one[13],
  162. );
  163. }
  164. else{
  165. $Fetch[] = array("id"=>$one[0],
  166. "pali"=>$one[1],
  167. "type"=>$one[2],
  168. "gramma"=>$one[3],
  169. "parent"=>$one[4],
  170. "mean"=>$one[5],
  171. "note"=>$one[6],
  172. "parts"=>$one[7],
  173. "partmean"=>$one[8],
  174. "status"=>$one[9],
  175. "confidence"=>$one[10],
  176. "dict_name"=>$one[12],
  177. "lang"=>"en"
  178. );
  179. }
  180. }
  181. }
  182. }
  183. else{
  184. # 没找到就不找了
  185. }
  186. }
  187. }
  188. else{
  189. if ($debug) {
  190. echo "<spen style='color:red;'>db query</spen>:{$db_file["file"]}<br>";
  191. }
  192. if ($db_file["dbh"]) {
  193. try {
  194. $stmt = $db_file["dbh"]->query($query);
  195. if ($stmt) {
  196. $Fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  197. } else {
  198. $Fetch = array();
  199. if ($debug) {
  200. echo "无效的Statement句柄";
  201. }
  202. }
  203. } catch (PDOException $e) {
  204. if ($debug) {
  205. print "Error!: " . $e->getMessage() . "<br/>";
  206. }
  207. $Fetch = array();
  208. }
  209. } else {
  210. $Fetch = array();
  211. if ($debug) {
  212. echo "无效的数据库句柄";
  213. }
  214. }
  215. }
  216. $iFetch = count($Fetch);
  217. if ($debug) {
  218. echo "count:$iFetch<br>";
  219. }
  220. if ($iFetch > 0) {
  221. foreach ($Fetch as $one) {
  222. $id = $one["id"];
  223. if (isset($one["guid"])) {
  224. $guid = $one["guid"];
  225. } else {
  226. $guid = "";
  227. }
  228. if (isset($one["lang"])) {
  229. $language = $one["lang"];
  230. } else if (isset($one["language"])) {
  231. $language = $one["language"];
  232. } else {
  233. $language = "en";
  234. }
  235. $pali = $one["pali"];
  236. $dict_word_spell["{$pali}"] = 1;
  237. $type = $one["type"];
  238. $gramma = $one["gramma"];
  239. $parent = $one["parent"];
  240. //$mean = $one["mean"];
  241. if (inLangSetting($language, $user_setting["dict.lang"])) {
  242. $mean = $one["mean"];
  243. } else {
  244. $mean = "";
  245. }
  246. $note = $one["note"];
  247. if (isset($one["factors"])) {
  248. $parts = $one["factors"];
  249. } else if (isset($one["parts"])) {
  250. $parts = $one["parts"];
  251. } else {
  252. $parts = "";
  253. }
  254. if (isset($one["factormean"])) {
  255. $partmean = $one["factormean"];
  256. } else if (isset($one["partmean"])) {
  257. $partmean = $one["partmean"];
  258. } else {
  259. $partmean = "";
  260. }
  261. if (inLangSetting($language, $user_setting["dict.lang"]) == false) {
  262. $partmean = "";
  263. }
  264. $status = $one["status"];
  265. if (isset($one["confidence"])) {
  266. $confidence = $one["confidence"];
  267. } else {
  268. $confidence = 100;
  269. }
  270. if (isset($one["dict_name"])) {
  271. $dict_name = $one["dict_name"];
  272. } else {
  273. $dict_name = "";
  274. }
  275. array_push($output, array(
  276. "id" => $id,
  277. "guid" => $guid,
  278. "pali" => $pali,
  279. "type" => $type,
  280. "gramma" => $gramma,
  281. "parent" => $parent,
  282. "mean" => $mean,
  283. "note" => $note,
  284. "parts" => $parts,
  285. "partmean" => $partmean,
  286. "status" => $status,
  287. "confidence" => $confidence,
  288. "dict_name" => $dict_name,
  289. "language" => $language,
  290. ));
  291. //将语基插入下次查询的列表
  292. if (!empty($parent)) {
  293. if ($pali != $parent) {
  294. $parent_list[$parent] = 1;
  295. }
  296. }
  297. //将拆分插入下次查询的列表
  298. if ($type != ".part.") {
  299. if (!empty($parts)) {
  300. $wordparts = str_getcsv($parts, '+');
  301. foreach ($wordparts as $x) {
  302. if (!empty($x)) {
  303. if ($x != $pali) {
  304. $parent_list[$x] = 1;
  305. }
  306. }
  307. }
  308. }
  309. }
  310. }
  311. }
  312. }
  313. /*
  314. if($i==0){
  315. //自动查找单词词干
  316. $word_base=getPaliWordBase($in_word);
  317. foreach($word_base as $x=>$infolist){
  318. foreach($infolist as $gramma){
  319. array_push($output,
  320. array("pali"=>$in_word,
  321. "parent"=>$x,
  322. "type"=>$gramma["type"],
  323. "gramma"=>$gramma["gramma"],
  324. "parts"=>$gramma["parts"],
  325. "partmean"=>"",
  326. "mean"=>"",
  327. "language"=>"en",
  328. "dict_name"=>"auto",
  329. "status"=>128
  330. ));
  331. $part_list=str_getcsv($gramma["parts"],"+");
  332. foreach($part_list as $part){
  333. $parent_list[$part]=1;
  334. }
  335. }
  336. }
  337. }
  338. */
  339. if ($debug) {
  340. echo "parent:" . count($parent_list) . "<br>";
  341. print_r($parent_list) . "<br>";
  342. }
  343. if (count($parent_list) == 0) {
  344. break;
  345. } else {
  346. $word_list = array();
  347. foreach ($parent_list as $x => $value) {
  348. array_push($word_list, $x);
  349. }
  350. }
  351. }
  352. //删除无效数据
  353. $newOutput = array();
  354. foreach ($output as $value) {
  355. if ($value["dict_name"] == "auto") {
  356. if (isset($dict_word_spell["{$value["parent"]}"])) {
  357. array_push($newOutput, $value);
  358. }
  359. } else {
  360. array_push($newOutput, $value);
  361. }
  362. }
  363. if ($debug) {
  364. echo "<textarea width=\"100%\" >";
  365. }
  366. echo json_encode($newOutput, JSON_UNESCAPED_UNICODE);
  367. if ($debug) {
  368. echo "</textarea>";
  369. }
  370. if ($debug) {
  371. echo "生成:" . count($output) . "<br>";
  372. echo "有效:" . count($newOutput) . "<br>";
  373. foreach ($newOutput as $result) {
  374. echo "{$result["pali"]}-{$result["parent"]}-{$result["mean"]}<br>";
  375. }
  376. $queryTime = (microtime_float() - $time_start) * 1000;
  377. echo "<div >搜索时间:$queryTime 毫秒</div>";
  378. }
  379. PrefLog();