db_insert_index.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. <?php
  2. /*
  3. 用csv 单词列表文件更新 wordindex and word
  4. 可以用 db_insert_word_from_csv.php取代
  5. */
  6. require_once "install_head.php";
  7. function dict_lookup($word)
  8. {
  9. global $dbh_word_index;
  10. $query = "select * from wordindex where \"word\" = ? ";
  11. $stmt = $dbh_word_index->prepare($query);
  12. $stmt->execute(array($word));
  13. return $stmt->fetch(PDO::FETCH_ASSOC);
  14. }
  15. function getWordEn($strIn)
  16. {
  17. $out = $strIn;
  18. $out = str_replace("ā", "a", $out);
  19. $out = str_replace("ī", "i", $out);
  20. $out = str_replace("ū", "u", $out);
  21. $out = str_replace("ṅ", "n", $out);
  22. $out = str_replace("ñ", "n", $out);
  23. $out = str_replace("ṭ", "t", $out);
  24. $out = str_replace("ḍ", "d", $out);
  25. $out = str_replace("ṇ", "n", $out);
  26. $out = str_replace("ḷ", "l", $out);
  27. $out = str_replace("ṃ", "m", $out);
  28. return ($out);
  29. }
  30. ?>
  31. <!DOCTYPE html>
  32. <html>
  33. <head>
  34. </head>
  35. <body>
  36. <h2>Insert to Index</h2>
  37. <p><a href="index.php">Home</a></p>
  38. <?php
  39. include "./_pdo.php";
  40. include "../config.php";
  41. if (isset($_GET["from"]) == false) {
  42. ?>
  43. <form action="db_insert_index.php" method="get">
  44. From: <input type="text" name="from" value="0"><br>
  45. To: <input type="text" name="to" value="216"><br>
  46. <input type="submit">
  47. </form>
  48. <?php
  49. return;
  50. }
  51. $from = $_GET["from"];
  52. $to = $_GET["to"];
  53. $dirLog = _DIR_LOG_ . "/";
  54. $dirDb = "db/";
  55. $dirXmlBase = _DIR_PALI_CSV_ . "/";
  56. $filelist = array();
  57. $fileNums = 0;
  58. $log = "";
  59. echo "<h2>$from</h2>";
  60. //已经存在的词
  61. $g_wordCounter = 0;
  62. $g_wordIndexCounter = 0;
  63. $iAllWordIndex = array();
  64. $sAllWord = array();
  65. //新加入的词
  66. $wordindex_max_index = 0;
  67. $aNewWordIndex = array(); //词内容
  68. $sNewWord = array(); //词头索引
  69. global $dbh_word_index;
  70. $dns = _FILE_DB_WORD_INDEX_;
  71. $dbh_word_index = new PDO($dns, _DB_USERNAME_, _DB_PASSWORD_, array(PDO::ATTR_PERSISTENT => true));
  72. $dbh_word_index->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  73. $query = "SELECT id from "._TABLE_WORD_INDEX_." where true order by id DESC ";
  74. $stmt = $dbh_word_index->prepare($query);
  75. $stmt->execute(array());
  76. $id = $stmt->fetch(PDO::FETCH_ASSOC);
  77. if ($id === false) {
  78. $wordindex_max_index = 0;
  79. } else {
  80. $wordindex_max_index = $id["id"];
  81. }
  82. $db_file = _FILE_DB_PALI_INDEX_;
  83. PDO_Connect($db_file,_DB_USERNAME_,_DB_PASSWORD_);
  84. $query = "SELECT id from "._TABLE_WORD_." where true order by id DESC ";
  85. $stmt = $PDO->prepare($query);
  86. $stmt->execute(array());
  87. $id = $stmt->fetch(PDO::FETCH_ASSOC);
  88. if ($id === false) {
  89. $g_wordCounter = 0;
  90. } else {
  91. $g_wordCounter = $id["id"];
  92. }
  93. if (($handle = fopen("filelist.csv", 'r')) !== false) {
  94. while (($filelist[$fileNums] = fgetcsv($handle, 0, ',')) !== false) {
  95. $fileNums++;
  96. }
  97. }
  98. if ($to == 0 || $to >= $fileNums) {
  99. $to = $fileNums - 1;
  100. }
  101. $iFile = $from;
  102. {
  103. $FileName = $filelist[$iFile][1] . ".htm";
  104. $fileId = $filelist[$iFile][0];
  105. $inputFileName = $FileName;
  106. $outputFileNameHead = $filelist[$iFile][1];
  107. $bookId = $filelist[$iFile][2];
  108. $dirXml = $outputFileNameHead . "/";
  109. $xmlfile = $inputFileName;
  110. echo "doing:" . $xmlfile . "<br>";
  111. $log = $log . "$iFile,$FileName,open\r\n";
  112. $arrInserString = array();
  113. // 打开文件并读取数据
  114. $irow = 0;
  115. if (($fp = fopen($dirXmlBase . $dirXml . $outputFileNameHead . ".csv", "r")) !== false) {
  116. while (($data = fgetcsv($fp, 0, ',')) !== false) {
  117. $irow++;
  118. if ($irow > 1) {
  119. $params = $data;
  120. $arrInserString[] = $params;
  121. }
  122. }
  123. fclose($fp);
  124. echo "单词表load:" . $dirXmlBase . $dirXml . $outputFileNameHead . ".csv<br>";
  125. } else {
  126. echo "can not open csv file. filename=" . $dirXmlBase . $dirXml . $outputFileNameHead . ".csv";
  127. }
  128. // 开始一个事务,关闭自动提交
  129. $PDO->beginTransaction();
  130. $query = "INSERT INTO "._TABLE_WORD_." ( id , book , paragraph , wordindex , bold ) VALUES (?,?,?,?,?)";
  131. $stmt = $PDO->prepare($query);
  132. $count = 0;
  133. $count1 = 0;
  134. $sen = "";
  135. $sen1 = "";
  136. $sen_en = "";
  137. $sen_count = 0;
  138. $book = "";
  139. $paragraph = "";
  140. foreach ($arrInserString as $oneParam) {
  141. if ($oneParam[5] != "") {
  142. $g_wordCounter++;
  143. $book = substr($oneParam[2], 1);
  144. $paragraph = $oneParam[3];
  145. $word = $oneParam[5];
  146. if ($oneParam[15] == "bld") {
  147. $bold = 1;
  148. } else {
  149. $bold = 0;
  150. }
  151. if (isset($sAllWord[$word])) {
  152. //已经存在的词
  153. $wordindex = $sAllWord[$word];
  154. $iAllWordIndex[$wordindex][1]++;
  155. if ($bold == 1) {
  156. $iAllWordIndex[$wordindex][3]++;
  157. } else {
  158. $iAllWordIndex[$wordindex][2]++;
  159. }
  160. } else if (isset($sNewWord[$word])) {
  161. //是新家入的词
  162. $wordindex = $sNewWord[$word];
  163. $aNewWordIndex[$wordindex][1]++;
  164. if ($bold == 1) {
  165. $aNewWordIndex[$wordindex][3]++;
  166. } else {
  167. $aNewWordIndex[$wordindex][2]++;
  168. }
  169. } else if (($lookup = dict_lookup($word)) !== false) {
  170. //在数据库中找到
  171. $wordindex = $lookup["id"];
  172. $sAllWord[$word] = $wordindex;
  173. $iAllWordIndex[$wordindex][0] = $word;
  174. $iAllWordIndex[$wordindex][1] = $lookup["count"] + 1; //all word count
  175. if ($bold == 1) {
  176. $iAllWordIndex[$wordindex][2] = $lookup["normal"];
  177. $iAllWordIndex[$wordindex][3] = $lookup["bold"] + 1;
  178. } else {
  179. $iAllWordIndex[$wordindex][2] = $lookup["normal"] + 1;
  180. $iAllWordIndex[$wordindex][3] = $lookup["bold"];
  181. }
  182. } else {
  183. //数据库里也没找到 怎么办呢?我想呀想 想呀想
  184. $wordindex = $wordindex_max_index + 1;
  185. $sNewWord[$word] = $wordindex;
  186. $aNewWordIndex[$wordindex][0] = $word;
  187. $aNewWordIndex[$wordindex][1] = 1; //all word count
  188. if ($bold == 1) {
  189. $aNewWordIndex[$wordindex][2] = 0;
  190. $aNewWordIndex[$wordindex][3] = 1;
  191. } else {
  192. $aNewWordIndex[$wordindex][2] = 1;
  193. $aNewWordIndex[$wordindex][3] = 0;
  194. }
  195. $wordindex_max_index++;
  196. }
  197. $newWord = array($g_wordCounter, $book, $paragraph, $wordindex, $bold);
  198. $stmt->execute($newWord);
  199. $count++;
  200. }
  201. }
  202. // 提交更改
  203. $PDO->commit();
  204. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  205. $error = PDO_ErrorInfo();
  206. echo "error - $error[2] <br>";
  207. $log .= "$from, $FileName, error, $error[2] \r\n";
  208. } else {
  209. echo "updata $count recorders.<br />";
  210. $log .= "updata $count recorders.\r\n";
  211. }
  212. }
  213. //更新单词索引表
  214. //首先插入新的词
  215. // 开始一个事务,关闭自动提交
  216. $dbh_word_index->beginTransaction();
  217. $query = "INSERT INTO "._TABLE_WORD_INDEX_." ('id','word','word_en','count','normal','bold','is_base','len') VALUES ( ? , ? , ? , ? , ? , ? , ? , ? )";
  218. $stmt = $dbh_word_index->prepare($query);
  219. echo "INSERT:" . count($aNewWordIndex) . "words<br>";
  220. foreach ($aNewWordIndex as $wIndex => $info) {
  221. $wordindex = $iword;
  222. $newWord = array(
  223. $wIndex,
  224. $info[0],
  225. getWordEn($info[0]),
  226. $info[1],
  227. $info[2],
  228. $info[3],
  229. 0,
  230. mb_strlen($info[0], "UTF-8"),
  231. );
  232. $stmt->execute($newWord);
  233. }
  234. // 提交更改
  235. $dbh_word_index->commit();
  236. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  237. $error = $dbh_word_index->errorInfo();
  238. echo "error - $error[2] <br>";
  239. $log .= "$from, $FileName, error, $error[2] \r\n";
  240. } else {
  241. echo "updata iword recorders.<br />";
  242. $log .= "updata iword recorders.\r\n";
  243. }
  244. //然后修改已经有的词
  245. // 开始一个事务,关闭自动提交
  246. $dbh_word_index->beginTransaction();
  247. $query = "UPDATE "._TABLE_WORD_INDEX_." SET count = ? , normal = ? , bold = ? where id = ? ";
  248. $stmt = $dbh_word_index->prepare($query);
  249. echo "UPDATE:" . count($iAllWordIndex) . "words<br>";
  250. foreach ($iAllWordIndex as $wIndex => $info) {
  251. $wordindex = $iword;
  252. $newWord = array(
  253. $info[1],
  254. $info[2],
  255. $info[3],
  256. $wIndex,
  257. );
  258. $stmt->execute($newWord);
  259. }
  260. // 提交更改
  261. $dbh_word_index->commit();
  262. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  263. $error = $dbh_word_index->errorInfo();
  264. echo "error - $error[2] <br>";
  265. $log .= "$from, $FileName, error, $error[2] \r\n";
  266. } else {
  267. echo "updata iword recorders.<br />";
  268. $log .= "updata iword recorders.\r\n";
  269. }
  270. $myLogFile = fopen($dirLog . "insert_index.log", "a");
  271. fwrite($myLogFile, $log);
  272. fclose($myLogFile);
  273. ?>
  274. <?php
  275. if ($from >= $to) {
  276. echo "<h2>齐活!功德无量!all done!</h2>";
  277. } else {
  278. echo "<script>";
  279. echo "window.location.assign(\"db_insert_index.php?from=" . ($from + 1) . "&to=" . $to . "\")";
  280. echo "</script>";
  281. echo "正在载入:" . ($from + 1) . "——" . $filelist[$from + 1][0];
  282. }
  283. ?>
  284. </body>
  285. </html>