wbw_analyse.php 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. <?php
  2. require_once "../path.php";
  3. require_once "../public/_pdo.php";
  4. require_once '../public/load_lang.php';
  5. require_once '../public/function.php';
  6. global $PDO;
  7. PDO_Connect("" . _FILE_DB_USER_WBW_);
  8. $query = "SELECT * from wbw where 1";
  9. $sth = $PDO->prepare($query);
  10. $sth->execute();
  11. $udict = new PDO("" . _FILE_DB_USER_DICT_, "", "", array(PDO::ATTR_PERSISTENT => true));
  12. $udict->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  13. /* 开始一个事务,关闭自动提交 */
  14. $udict->beginTransaction();
  15. $query = "INSERT INTO udict ('userid',
  16. 'pali',
  17. 'book',
  18. 'paragraph',
  19. 'wid',
  20. 'type',
  21. 'data',
  22. 'confidence',
  23. 'lang',
  24. 'modify_time')
  25. VALUES ( ? , ? , ? , ? , ? , ? , ? , ? , ? , ? )";
  26. $stmt = $udict->prepare($query);
  27. $i = 0;
  28. while ($result = $sth->fetch(PDO::FETCH_ASSOC)) {
  29. try {
  30. $xmlString = "<root>" . $result["data"] . "</root>";
  31. //echo $xmlString."<br>";
  32. $xmlWord = simplexml_load_string($xmlString);
  33. $wordsList = $xmlWord->xpath('//word');
  34. foreach ($wordsList as $word) {
  35. $pali = $word->real->__toString();
  36. foreach ($word as $key => $value) {
  37. $strValue = $value->__toString();
  38. if ($strValue !== "?" && $strValue !== "" && $strValue !== ".ctl." && $strValue !== ".a." && $strValue !== " " && mb_substr($strValue, 0, 3, "UTF-8") !== "[a]" && $strValue !== "_un_auto_factormean_" && $strValue !== "_un_auto_mean_") {
  39. $iType = 0;
  40. $lang = 'pali';
  41. switch ($key) {
  42. case 'type':
  43. $iType = 1;
  44. break;
  45. case 'gramma':
  46. $iType = 2;
  47. break;
  48. case 'mean':
  49. $iType = 3;
  50. $lang = getLanguageCode($strValue);
  51. break;
  52. case 'org':
  53. $iType = 4;
  54. break;
  55. case 'om':
  56. $iType = 5;
  57. $lang = getLanguageCode($strValue);
  58. break;
  59. case 'parent':
  60. $iType = 6;
  61. break;
  62. }
  63. if ($iType > 0) {
  64. $wordData = array($result["owner"],
  65. $pali, $result["book"],
  66. $result["paragraph"],
  67. $result["wid"],
  68. $iType,
  69. $strValue,
  70. 100,
  71. $lang,
  72. $result["modify_time"],
  73. );
  74. //print_r($wordData);
  75. $stmt->execute($wordData);
  76. }
  77. }
  78. }
  79. }
  80. } catch (Throwable $e) {
  81. echo "Captured Throwable: " . $e->getMessage();
  82. }
  83. $i++;
  84. if ($i > 10) {
  85. //break;
  86. }
  87. }
  88. //其他字典
  89. $db_file_list = array();
  90. array_push($db_file_list, _DIR_DICT_SYSTEM_ . "/sys_regular.db");
  91. array_push($db_file_list, _DIR_DICT_SYSTEM_ . "/sys_irregular.db");
  92. array_push($db_file_list, _DIR_DICT_SYSTEM_ . "/union.db");
  93. array_push($db_file_list, _DIR_DICT_SYSTEM_ . "/comp.db");
  94. array_push($db_file_list, _DIR_DICT_3RD_ . "/pm.db");
  95. array_push($db_file_list, _DIR_DICT_3RD_ . "/bhmf.db");
  96. array_push($db_file_list, _DIR_DICT_3RD_ . "/shuihan.db");
  97. array_push($db_file_list, _DIR_DICT_3RD_ . "/concise.db");
  98. array_push($db_file_list, _DIR_DICT_3RD_ . "/uhan_en.db");
  99. foreach ($db_file_list as $db_file) {
  100. if ($debug) {
  101. echo "dict connect:$db_file<br>";
  102. }
  103. $dbh = new PDO("" . $db_file, "", "", array(PDO::ATTR_PERSISTENT => true));
  104. $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  105. $query = "select * from dict where 1";
  106. $sth = $dbh->prepare($query);
  107. $sth->execute();
  108. $i = 0;
  109. while ($result = $sth->fetch(PDO::FETCH_ASSOC)) {
  110. if (!empty($result["mean"])) {
  111. $arrMean = explode('$', $result["mean"]);
  112. foreach ($arrMean as $key => $value) {
  113. $word = trim($value, " \t\n\r\0\x0B\.");
  114. if (!empty($word)) {
  115. $wordData = array($result["dict_name"],
  116. $result["pali"],
  117. 0,
  118. 0,
  119. 0,
  120. 3,
  121. $word,
  122. $result["confidence"],
  123. $result["lang"],
  124. 1,
  125. );
  126. //print_r($wordData);
  127. $stmt->execute($wordData);
  128. }
  129. }
  130. }
  131. if (!empty($result["type"])) {
  132. $wordData = array($result["dict_name"],
  133. $result["pali"],
  134. 0,
  135. 0,
  136. 0,
  137. 1,
  138. $result["type"],
  139. $result["confidence"],
  140. 'pali',
  141. 1,
  142. );
  143. $stmt->execute($wordData);
  144. //print_r($wordData);
  145. }
  146. if (!empty($result["gramma"])) {
  147. $wordData = array($result["dict_name"],
  148. $result["pali"],
  149. 0,
  150. 0,
  151. 0,
  152. 2,
  153. $result["gramma"],
  154. $result["confidence"],
  155. 'pali',
  156. 1,
  157. );
  158. $stmt->execute($wordData);
  159. //print_r($wordData);
  160. }
  161. if (!empty($result["parts"])) {
  162. $wordData = array($result["dict_name"],
  163. $result["pali"],
  164. 0,
  165. 0,
  166. 0,
  167. 4,
  168. $result["parts"],
  169. $result["confidence"],
  170. 'pali',
  171. 1,
  172. );
  173. //print_r($wordData);
  174. $stmt->execute($wordData);
  175. }
  176. if (!empty($result["partmean"])) {
  177. $wordData = array($result["dict_name"],
  178. $result["pali"],
  179. 0,
  180. 0,
  181. 0,
  182. 5,
  183. $result["partmean"],
  184. $result["confidence"],
  185. $result["lang"],
  186. 1,
  187. );
  188. //print_r($wordData);
  189. $stmt->execute($wordData);
  190. }
  191. if (!empty($result["parent"])) {
  192. $wordData = array($result["dict_name"],
  193. $result["pali"],
  194. 0,
  195. 0,
  196. 0,
  197. 6,
  198. $result["parent"],
  199. $result["confidence"],
  200. 'pali',
  201. 1,
  202. );
  203. //print_r($wordData);
  204. $stmt->execute($wordData);
  205. }
  206. if ($i > 10) {
  207. //break;
  208. }
  209. $i++;
  210. }
  211. }
  212. /* 提交更改 */
  213. $udict->commit();
  214. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  215. $error = $udict->errorInfo();
  216. echo "error - $error[2] <br>";
  217. } else {
  218. echo "updata index $i recorders.";
  219. }