db_insert_index.php 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. <?php
  2. require_once "install_head.php";
  3. ?>
  4. <!DOCTYPE html>
  5. <html>
  6. <head>
  7. </head>
  8. <body>
  9. <h2>Insert to Index</h2>
  10. <p><a href="index.php">Home</a></p>
  11. <?php
  12. include "./_pdo.php";
  13. include "../path.php";
  14. if (isset($_GET["from"]) == false) {
  15. ?>
  16. <form action="db_insert_index.php" method="get">
  17. From: <input type="text" name="from" value="0"><br>
  18. To: <input type="text" name="to" value="216"><br>
  19. <input type="submit">
  20. </form>
  21. <?php
  22. return;
  23. }
  24. $from = $_GET["from"];
  25. $to = $_GET["to"];
  26. $dirLog = _DIR_LOG_ . "/";
  27. $dirDb = "db/";
  28. $dirXmlBase = _DIR_PALI_CSV_ . "/";
  29. $filelist = array();
  30. $fileNums = 0;
  31. $log = "";
  32. echo "<h2>$from</h2>";
  33. //已经存在的词
  34. $g_wordCounter = 0;
  35. $g_wordIndexCounter = 0;
  36. $iAllWordIndex = array();
  37. $sAllWord = array();
  38. //新加入的词
  39. $wordindex_max_index = 0;
  40. $aNewWordIndex = array(); //词内容
  41. $sNewWord = array(); //词头索引
  42. global $dbh_word_index;
  43. $dns = "" . _FILE_DB_WORD_INDEX_;
  44. $dbh_word_index = new PDO($dns, "", "", array(PDO::ATTR_PERSISTENT => true));
  45. $dbh_word_index->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  46. $query = "select id from wordindex where 1 order by id DESC ";
  47. $stmt = $dbh_word_index->prepare($query);
  48. $stmt->execute(array());
  49. $id = $stmt->fetch(PDO::FETCH_ASSOC);
  50. if ($id === false) {
  51. $wordindex_max_index = 0;
  52. } else {
  53. $wordindex_max_index = $id["id"];
  54. }
  55. $db_file = _FILE_DB_PALI_INDEX_;
  56. PDO_Connect("$db_file");
  57. $query = "select id from word where 1 order by id DESC ";
  58. $stmt = $PDO->prepare($query);
  59. $stmt->execute(array());
  60. $id = $stmt->fetch(PDO::FETCH_ASSOC);
  61. if ($id === false) {
  62. $g_wordCounter = 0;
  63. } else {
  64. $g_wordCounter = $id["id"];
  65. }
  66. function dict_lookup($word)
  67. {
  68. global $dbh_word_index;
  69. $query = "select * from wordindex where \"word\" = ? ";
  70. $stmt = $dbh_word_index->prepare($query);
  71. $stmt->execute(array($word));
  72. return $stmt->fetch(PDO::FETCH_ASSOC);
  73. }
  74. function getWordEn($strIn)
  75. {
  76. $out = $strIn;
  77. $out = str_replace("ā", "a", $out);
  78. $out = str_replace("ī", "i", $out);
  79. $out = str_replace("ū", "u", $out);
  80. $out = str_replace("ṅ", "n", $out);
  81. $out = str_replace("ñ", "n", $out);
  82. $out = str_replace("ṭ", "t", $out);
  83. $out = str_replace("ḍ", "d", $out);
  84. $out = str_replace("ṇ", "n", $out);
  85. $out = str_replace("ḷ", "l", $out);
  86. $out = str_replace("ṃ", "m", $out);
  87. return ($out);
  88. }
  89. if (($handle = fopen("filelist.csv", 'r')) !== false) {
  90. while (($filelist[$fileNums] = fgetcsv($handle, 0, ',')) !== false) {
  91. $fileNums++;
  92. }
  93. }
  94. if ($to == 0 || $to >= $fileNums) {
  95. $to = $fileNums - 1;
  96. }
  97. //for($iFile=$from;$iFile<=$to;$iFile++)
  98. $iFile = $from;
  99. {
  100. $FileName = $filelist[$iFile][1] . ".htm";
  101. $fileId = $filelist[$iFile][0];
  102. $inputFileName = $FileName;
  103. $outputFileNameHead = $filelist[$iFile][1];
  104. $bookId = $filelist[$iFile][2];
  105. $dirXml = $outputFileNameHead . "/";
  106. $xmlfile = $inputFileName;
  107. echo "doing:" . $xmlfile . "<br>";
  108. $log = $log . "$iFile,$FileName,open\r\n";
  109. $arrInserString = array();
  110. // 打开文件并读取数据
  111. $irow = 0;
  112. if (($fp = fopen($dirXmlBase . $dirXml . $outputFileNameHead . ".csv", "r")) !== false) {
  113. while (($data = fgetcsv($fp, 0, ',')) !== false) {
  114. $irow++;
  115. if ($irow > 1) {
  116. $params = $data;
  117. $arrInserString[] = $params;
  118. }
  119. }
  120. fclose($fp);
  121. echo "单词表load:" . $dirXmlBase . $dirXml . $outputFileNameHead . ".csv<br>";
  122. } else {
  123. echo "can not open csv file. filename=" . $dirXmlBase . $dirXml . $outputFileNameHead . ".csv";
  124. }
  125. // 开始一个事务,关闭自动提交
  126. $PDO->beginTransaction();
  127. $query = "INSERT INTO word ('id','book','paragraph','wordindex','bold') VALUES (?,?,?,?,?)";
  128. $stmt = $PDO->prepare($query);
  129. $count = 0;
  130. $count1 = 0;
  131. $sen = "";
  132. $sen1 = "";
  133. $sen_en = "";
  134. $sen_count = 0;
  135. $book = "";
  136. $paragraph = "";
  137. foreach ($arrInserString as $oneParam) {
  138. if ($oneParam[5] != "") {
  139. $g_wordCounter++;
  140. $book = substr($oneParam[2], 1);
  141. $paragraph = $oneParam[3];
  142. $word = $oneParam[5];
  143. if ($oneParam[15] == "bld") {
  144. $bold = 1;
  145. } else {
  146. $bold = 0;
  147. }
  148. if (isset($sAllWord[$word])) {
  149. //已经存在的词
  150. $wordindex = $sAllWord[$word];
  151. $iAllWordIndex[$wordindex][1]++;
  152. if ($bold == 1) {
  153. $iAllWordIndex[$wordindex][3]++;
  154. } else {
  155. $iAllWordIndex[$wordindex][2]++;
  156. }
  157. } else if (isset($sNewWord[$word])) {
  158. //是新家入的词
  159. $wordindex = $sNewWord[$word];
  160. $aNewWordIndex[$wordindex][1]++;
  161. if ($bold == 1) {
  162. $aNewWordIndex[$wordindex][3]++;
  163. } else {
  164. $aNewWordIndex[$wordindex][2]++;
  165. }
  166. } else if (($lookup = dict_lookup($word)) !== false) {
  167. //在数据库中找到
  168. $wordindex = $lookup["id"];
  169. $sAllWord[$word] = $wordindex;
  170. $iAllWordIndex[$wordindex][0] = $word;
  171. $iAllWordIndex[$wordindex][1] = $lookup["count"] + 1; //all word count
  172. if ($bold == 1) {
  173. $iAllWordIndex[$wordindex][2] = $lookup["normal"];
  174. $iAllWordIndex[$wordindex][3] = $lookup["bold"] + 1;
  175. } else {
  176. $iAllWordIndex[$wordindex][2] = $lookup["normal"] + 1;
  177. $iAllWordIndex[$wordindex][3] = $lookup["bold"];
  178. }
  179. } else {
  180. //数据库里也没找到 怎么办呢?我想呀想 想呀想
  181. $wordindex = $wordindex_max_index + 1;
  182. $sNewWord[$word] = $wordindex;
  183. $aNewWordIndex[$wordindex][0] = $word;
  184. $aNewWordIndex[$wordindex][1] = 1; //all word count
  185. if ($bold == 1) {
  186. $aNewWordIndex[$wordindex][2] = 0;
  187. $aNewWordIndex[$wordindex][3] = 1;
  188. } else {
  189. $aNewWordIndex[$wordindex][2] = 1;
  190. $aNewWordIndex[$wordindex][3] = 0;
  191. }
  192. $wordindex_max_index++;
  193. }
  194. $newWord = array($g_wordCounter, $book, $paragraph, $wordindex, $bold);
  195. $stmt->execute($newWord);
  196. $count++;
  197. }
  198. }
  199. // 提交更改
  200. $PDO->commit();
  201. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  202. $error = PDO_ErrorInfo();
  203. echo "error - $error[2] <br>";
  204. $log .= "$from, $FileName, error, $error[2] \r\n";
  205. } else {
  206. echo "updata $count recorders.<br />";
  207. $log .= "updata $count recorders.\r\n";
  208. }
  209. }
  210. //更新单词索引表
  211. //首先插入新的词
  212. // 开始一个事务,关闭自动提交
  213. $dbh_word_index->beginTransaction();
  214. $query = "INSERT INTO wordindex ('id','word','word_en','count','normal','bold','is_base','len') VALUES ( ? , ? , ? , ? , ? , ? , ? , ? )";
  215. $stmt = $dbh_word_index->prepare($query);
  216. echo "INSERT:" . count($aNewWordIndex) . "words<br>";
  217. foreach ($aNewWordIndex as $wIndex => $info) {
  218. $wordindex = $iword;
  219. $newWord = array(
  220. $wIndex,
  221. $info[0],
  222. getWordEn($info[0]),
  223. $info[1],
  224. $info[2],
  225. $info[3],
  226. 0,
  227. mb_strlen($info[0], "UTF-8"),
  228. );
  229. $stmt->execute($newWord);
  230. }
  231. // 提交更改
  232. $dbh_word_index->commit();
  233. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  234. $error = $dbh_word_index->errorInfo();
  235. echo "error - $error[2] <br>";
  236. $log .= "$from, $FileName, error, $error[2] \r\n";
  237. } else {
  238. echo "updata iword recorders.<br />";
  239. $log .= "updata iword recorders.\r\n";
  240. }
  241. //然后修改已经有的词
  242. // 开始一个事务,关闭自动提交
  243. $dbh_word_index->beginTransaction();
  244. $query = "UPDATE wordindex SET count = ? , normal = ? , bold = ? where id = ? ";
  245. $stmt = $dbh_word_index->prepare($query);
  246. echo "UPDATE:" . count($iAllWordIndex) . "words<br>";
  247. foreach ($iAllWordIndex as $wIndex => $info) {
  248. $wordindex = $iword;
  249. $newWord = array(
  250. $info[1],
  251. $info[2],
  252. $info[3],
  253. $wIndex,
  254. );
  255. $stmt->execute($newWord);
  256. }
  257. // 提交更改
  258. $dbh_word_index->commit();
  259. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  260. $error = $dbh_word_index->errorInfo();
  261. echo "error - $error[2] <br>";
  262. $log .= "$from, $FileName, error, $error[2] \r\n";
  263. } else {
  264. echo "updata iword recorders.<br />";
  265. $log .= "updata iword recorders.\r\n";
  266. }
  267. $myLogFile = fopen($dirLog . "insert_index.log", "a");
  268. fwrite($myLogFile, $log);
  269. fclose($myLogFile);
  270. ?>
  271. <?php
  272. if ($from >= $to) {
  273. echo "<h2>齐活!功德无量!all done!</h2>";
  274. } else {
  275. echo "<script>";
  276. echo "window.location.assign(\"db_insert_index.php?from=" . ($from + 1) . "&to=" . $to . "\")";
  277. echo "</script>";
  278. echo "正在载入:" . ($from + 1) . "——" . $filelist[$from + 1][0];
  279. }
  280. ?>
  281. </body>
  282. </html>