wbw_analyse.php 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. <?php
  2. /*
  3. 逐词解析数据库数据分析
  4. */
  5. require_once "../path.php";
  6. require_once "../public/_pdo.php";
  7. require_once '../public/load_lang.php';
  8. require_once '../public/function.php';
  9. global $PDO;
  10. PDO_Connect("" . _FILE_DB_USER_WBW_);
  11. $query = "SELECT * from "._TABLE_USER_WBW_." where 1";
  12. $sth = $PDO->prepare($query);
  13. $sth->execute();
  14. $udict = new PDO("" . _FILE_DB_USER_DICT_, "", "", array(PDO::ATTR_PERSISTENT => true));
  15. $udict->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  16. /* 开始一个事务,关闭自动提交 */
  17. $udict->beginTransaction();
  18. $query = "INSERT INTO udict ('userid',
  19. 'pali',
  20. 'book',
  21. 'paragraph',
  22. 'wid',
  23. 'type',
  24. 'data',
  25. 'confidence',
  26. 'lang',
  27. 'modify_time')
  28. VALUES ( ? , ? , ? , ? , ? , ? , ? , ? , ? , ? )";
  29. $stmt = $udict->prepare($query);
  30. $i = 0;
  31. while ($result = $sth->fetch(PDO::FETCH_ASSOC)) {
  32. try {
  33. $xmlString = "<root>" . $result["data"] . "</root>";
  34. //echo $xmlString."<br>";
  35. $xmlWord = simplexml_load_string($xmlString);
  36. $wordsList = $xmlWord->xpath('//word');
  37. foreach ($wordsList as $word) {
  38. $pali = $word->real->__toString();
  39. foreach ($word as $key => $value) {
  40. $strValue = $value->__toString();
  41. if ($strValue !== "?" && $strValue !== "" && $strValue !== ".ctl." && $strValue !== ".a." && $strValue !== " " && mb_substr($strValue, 0, 3, "UTF-8") !== "[a]" && $strValue !== "_un_auto_factormean_" && $strValue !== "_un_auto_mean_") {
  42. $iType = 0;
  43. $lang = 'pali';
  44. switch ($key) {
  45. case 'type':
  46. $iType = 1;
  47. break;
  48. case 'gramma':
  49. $iType = 2;
  50. break;
  51. case 'mean':
  52. $iType = 3;
  53. $lang = getLanguageCode($strValue);
  54. break;
  55. case 'org':
  56. $iType = 4;
  57. break;
  58. case 'om':
  59. $iType = 5;
  60. $lang = getLanguageCode($strValue);
  61. break;
  62. case 'parent':
  63. $iType = 6;
  64. break;
  65. }
  66. if ($iType > 0) {
  67. $wordData = array($result["owner"],
  68. $pali, $result["book"],
  69. $result["paragraph"],
  70. $result["wid"],
  71. $iType,
  72. $strValue,
  73. 100,
  74. $lang,
  75. $result["modify_time"],
  76. );
  77. //print_r($wordData);
  78. $stmt->execute($wordData);
  79. }
  80. }
  81. }
  82. }
  83. } catch (Throwable $e) {
  84. echo "Captured Throwable: " . $e->getMessage();
  85. }
  86. $i++;
  87. if ($i > 10) {
  88. //break;
  89. }
  90. }
  91. //其他字典
  92. $db_file_list = array();
  93. array_push($db_file_list, _DIR_DICT_SYSTEM_ . "/sys_regular.db");
  94. array_push($db_file_list, _DIR_DICT_SYSTEM_ . "/sys_irregular.db");
  95. array_push($db_file_list, _DIR_DICT_SYSTEM_ . "/union.db");
  96. array_push($db_file_list, _DIR_DICT_SYSTEM_ . "/comp.db");
  97. array_push($db_file_list, _DIR_DICT_3RD_ . "/pm.db");
  98. array_push($db_file_list, _DIR_DICT_3RD_ . "/bhmf.db");
  99. array_push($db_file_list, _DIR_DICT_3RD_ . "/shuihan.db");
  100. array_push($db_file_list, _DIR_DICT_3RD_ . "/concise.db");
  101. array_push($db_file_list, _DIR_DICT_3RD_ . "/uhan_en.db");
  102. foreach ($db_file_list as $db_file) {
  103. if ($debug) {
  104. echo "dict connect:$db_file<br>";
  105. }
  106. $dbh = new PDO("" . $db_file, "", "", array(PDO::ATTR_PERSISTENT => true));
  107. $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  108. $query = "select * from dict where 1";
  109. $sth = $dbh->prepare($query);
  110. $sth->execute();
  111. $i = 0;
  112. while ($result = $sth->fetch(PDO::FETCH_ASSOC)) {
  113. if (!empty($result["mean"])) {
  114. $arrMean = explode('$', $result["mean"]);
  115. foreach ($arrMean as $key => $value) {
  116. $word = trim($value, " \t\n\r\0\x0B\.");
  117. if (!empty($word)) {
  118. $wordData = array($result["dict_name"],
  119. $result["pali"],
  120. 0,
  121. 0,
  122. 0,
  123. 3,
  124. $word,
  125. $result["confidence"],
  126. $result["lang"],
  127. 1,
  128. );
  129. //print_r($wordData);
  130. $stmt->execute($wordData);
  131. }
  132. }
  133. }
  134. if (!empty($result["type"])) {
  135. $wordData = array($result["dict_name"],
  136. $result["pali"],
  137. 0,
  138. 0,
  139. 0,
  140. 1,
  141. $result["type"],
  142. $result["confidence"],
  143. 'pali',
  144. 1,
  145. );
  146. $stmt->execute($wordData);
  147. //print_r($wordData);
  148. }
  149. if (!empty($result["gramma"])) {
  150. $wordData = array($result["dict_name"],
  151. $result["pali"],
  152. 0,
  153. 0,
  154. 0,
  155. 2,
  156. $result["gramma"],
  157. $result["confidence"],
  158. 'pali',
  159. 1,
  160. );
  161. $stmt->execute($wordData);
  162. //print_r($wordData);
  163. }
  164. if (!empty($result["parts"])) {
  165. $wordData = array($result["dict_name"],
  166. $result["pali"],
  167. 0,
  168. 0,
  169. 0,
  170. 4,
  171. $result["parts"],
  172. $result["confidence"],
  173. 'pali',
  174. 1,
  175. );
  176. //print_r($wordData);
  177. $stmt->execute($wordData);
  178. }
  179. if (!empty($result["partmean"])) {
  180. $wordData = array($result["dict_name"],
  181. $result["pali"],
  182. 0,
  183. 0,
  184. 0,
  185. 5,
  186. $result["partmean"],
  187. $result["confidence"],
  188. $result["lang"],
  189. 1,
  190. );
  191. //print_r($wordData);
  192. $stmt->execute($wordData);
  193. }
  194. if (!empty($result["parent"])) {
  195. $wordData = array($result["dict_name"],
  196. $result["pali"],
  197. 0,
  198. 0,
  199. 0,
  200. 6,
  201. $result["parent"],
  202. $result["confidence"],
  203. 'pali',
  204. 1,
  205. );
  206. //print_r($wordData);
  207. $stmt->execute($wordData);
  208. }
  209. if ($i > 10) {
  210. //break;
  211. }
  212. $i++;
  213. }
  214. }
  215. /* 提交更改 */
  216. $udict->commit();
  217. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  218. $error = $udict->errorInfo();
  219. echo "error - $error[2] <br>";
  220. } else {
  221. echo "updata index $i recorders.";
  222. }