db_insert_index.php 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. <?php
  2. require_once "install_head.php";
  3. ?>
  4. <!DOCTYPE html>
  5. <html>
  6. <head>
  7. </head>
  8. <body>
  9. <h2>Insert to Index</h2>
  10. <p><a href="index.php">Home</a></p>
  11. <?php
  12. include "./_pdo.php";
  13. include "../path.php";
  14. if(isset($_GET["from"])==false){
  15. ?>
  16. <form action="db_insert_index.php" method="get">
  17. From: <input type="text" name="from" value="0"><br>
  18. To: <input type="text" name="to" value="216"><br>
  19. <input type="submit">
  20. </form>
  21. <?php
  22. return;
  23. }
  24. $from=$_GET["from"];
  25. $to=$_GET["to"];
  26. $dirLog=_DIR_LOG_."/";
  27. $dirDb="db/";
  28. $dirXmlBase=_DIR_PALI_CSV_."/";
  29. $filelist=array();
  30. $fileNums=0;
  31. $log="";
  32. echo "<h2>$from</h2>";
  33. //已经存在的词
  34. $g_wordCounter=0;
  35. $g_wordIndexCounter=0;
  36. $iAllWordIndex=array();
  37. $sAllWord=array();
  38. //新加入的词
  39. $wordindex_max_index = 0;
  40. $aNewWordIndex = array(); //词内容
  41. $sNewWord = array(); //词头索引
  42. global $dbh_word_index;
  43. $dns = "sqlite:"._FILE_DB_WORD_INDEX_;
  44. $dbh_word_index = new PDO($dns, "", "",array(PDO::ATTR_PERSISTENT=>true));
  45. $dbh_word_index->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  46. $query = "select id from wordindex where 1 order by id DESC ";
  47. $stmt = $dbh_word_index->prepare($query);
  48. $stmt->execute(array());
  49. $id = $stmt->fetch(PDO::FETCH_ASSOC);
  50. if($id === FALSE){
  51. $wordindex_max_index = 0;
  52. }
  53. else{
  54. $wordindex_max_index =$id["id"];
  55. }
  56. $db_file = _FILE_DB_PALI_INDEX_;
  57. PDO_Connect("sqlite:$db_file");
  58. $query = "select id from word where 1 order by id DESC ";
  59. $stmt = $PDO->prepare($query);
  60. $stmt->execute(array());
  61. $id = $stmt->fetch(PDO::FETCH_ASSOC);
  62. if($id === FALSE){
  63. $g_wordCounter = 0;
  64. }
  65. else{
  66. $g_wordCounter = $id["id"];
  67. }
  68. function dict_lookup($word){
  69. global $dbh_word_index;
  70. $query = "select * from wordindex where \"word\" = ? ";
  71. $stmt = $dbh_word_index->prepare($query);
  72. $stmt->execute(array($word));
  73. return $stmt->fetch(PDO::FETCH_ASSOC);
  74. }
  75. function getWordEn($strIn){
  76. $out=$strIn;
  77. $out=str_replace("ā","a",$out);
  78. $out=str_replace("ī","i",$out);
  79. $out=str_replace("ū","u",$out);
  80. $out=str_replace("ṅ","n",$out);
  81. $out=str_replace("ñ","n",$out);
  82. $out=str_replace("ṭ","t",$out);
  83. $out=str_replace("ḍ","d",$out);
  84. $out=str_replace("ṇ","n",$out);
  85. $out=str_replace("ḷ","l",$out);
  86. $out=str_replace("ṃ","m",$out);
  87. return($out);
  88. }
  89. if(($handle=fopen("filelist.csv",'r'))!==FALSE){
  90. while(($filelist[$fileNums]=fgetcsv($handle,0,','))!==FALSE){
  91. $fileNums++;
  92. }
  93. }
  94. if($to==0 || $to>=$fileNums) $to=$fileNums-1;
  95. //for($iFile=$from;$iFile<=$to;$iFile++)
  96. $iFile=$from;
  97. {
  98. $FileName=$filelist[$iFile][1].".htm";
  99. $fileId=$filelist[$iFile][0];
  100. $inputFileName=$FileName;
  101. $outputFileNameHead=$filelist[$iFile][1];
  102. $bookId=$filelist[$iFile][2];
  103. $dirXml=$outputFileNameHead."/";
  104. $xmlfile = $inputFileName;
  105. echo "doing:".$xmlfile."<br>";
  106. $log=$log."$iFile,$FileName,open\r\n";
  107. $arrInserString=array();
  108. // 打开文件并读取数据
  109. $irow=0;
  110. if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead.".csv", "r"))!==FALSE){
  111. while(($data=fgetcsv($fp,0,','))!==FALSE){
  112. $irow++;
  113. if($irow>1){
  114. $params=$data;
  115. $arrInserString[]=$params;
  116. }
  117. }
  118. fclose($fp);
  119. echo "单词表load:".$dirXmlBase.$dirXml.$outputFileNameHead.".csv<br>";
  120. }
  121. else{
  122. echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead.".csv";
  123. }
  124. // 开始一个事务,关闭自动提交
  125. $PDO->beginTransaction();
  126. $query="INSERT INTO word ('id','book','paragraph','wordindex','bold') VALUES (?,?,?,?,?)";
  127. $stmt = $PDO->prepare($query);
  128. $count=0;
  129. $count1=0;
  130. $sen="";
  131. $sen1="";
  132. $sen_en="";
  133. $sen_count=0;
  134. $book="";
  135. $paragraph="";
  136. foreach($arrInserString as $oneParam){
  137. if($oneParam[5]!=""){
  138. $g_wordCounter++;
  139. $book=substr($oneParam[2],1);
  140. $paragraph=$oneParam[3];
  141. $word=$oneParam[5];
  142. if($oneParam[15]=="bld" ){
  143. $bold=1;
  144. }
  145. else{
  146. $bold=0;
  147. }
  148. if(isset($sAllWord[$word])){
  149. //已经存在的词
  150. $wordindex=$sAllWord[$word];
  151. $iAllWordIndex[$wordindex][1]++;
  152. if($bold==1){
  153. $iAllWordIndex[$wordindex][3]++;
  154. }
  155. else{
  156. $iAllWordIndex[$wordindex][2]++;
  157. }
  158. }
  159. else if(isset($sNewWord[$word])){
  160. //是新家入的词
  161. $wordindex=$sNewWord[$word];
  162. $aNewWordIndex[$wordindex][1]++;
  163. if($bold==1){
  164. $aNewWordIndex[$wordindex][3]++;
  165. }
  166. else{
  167. $aNewWordIndex[$wordindex][2]++;
  168. }
  169. }
  170. else if(($lookup=dict_lookup($word)) !== FALSE){
  171. //在数据库中找到
  172. $wordindex=$lookup["id"];
  173. $sAllWord[$word]=$wordindex;
  174. $iAllWordIndex[$wordindex][0]=$word;
  175. $iAllWordIndex[$wordindex][1]=$lookup["count"] + 1;//all word count
  176. if($bold==1){
  177. $iAllWordIndex[$wordindex][2] = $lookup["normal"] ;
  178. $iAllWordIndex[$wordindex][3] = $lookup["bold"] + 1;
  179. }
  180. else{
  181. $iAllWordIndex[$wordindex][2] = $lookup["normal"] + 1;
  182. $iAllWordIndex[$wordindex][3] = $lookup["bold"] ;
  183. }
  184. }
  185. else{
  186. //数据库里也没找到 怎么办呢?我想呀想 想呀想
  187. $wordindex=$wordindex_max_index + 1;
  188. $sNewWord[$word]=$wordindex;
  189. $aNewWordIndex[$wordindex][0]=$word;
  190. $aNewWordIndex[$wordindex][1]=1;//all word count
  191. if($bold==1){
  192. $aNewWordIndex[$wordindex][2]=0;
  193. $aNewWordIndex[$wordindex][3]=1;
  194. }
  195. else{
  196. $aNewWordIndex[$wordindex][2]=1;
  197. $aNewWordIndex[$wordindex][3]=0;
  198. }
  199. $wordindex_max_index++;
  200. }
  201. $newWord=array($g_wordCounter,$book,$paragraph,$wordindex,$bold);
  202. $stmt->execute($newWord);
  203. $count++;
  204. }
  205. }
  206. // 提交更改
  207. $PDO->commit();
  208. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  209. $error = PDO_ErrorInfo();
  210. echo "error - $error[2] <br>";
  211. $log.="$from, $FileName, error, $error[2] \r\n";
  212. }
  213. else{
  214. echo "updata $count recorders.<br />";
  215. $log.="updata $count recorders.\r\n";
  216. }
  217. }
  218. //更新单词索引表
  219. //首先插入新的词
  220. // 开始一个事务,关闭自动提交
  221. $dbh_word_index->beginTransaction();
  222. $query="INSERT INTO wordindex ('id','word','word_en','count','normal','bold','is_base','len') VALUES ( ? , ? , ? , ? , ? , ? , ? , ? )";
  223. $stmt = $dbh_word_index->prepare($query);
  224. echo "INSERT:".count($aNewWordIndex)."words<br>";
  225. foreach($aNewWordIndex as $wIndex => $info){
  226. $wordindex=$iword;
  227. $newWord=array(
  228. $wIndex,
  229. $info[0],
  230. getWordEn($info[0]),
  231. $info[1],
  232. $info[2],
  233. $info[3],
  234. 0,
  235. mb_strlen($info[0],"UTF-8")
  236. );
  237. $stmt->execute($newWord);
  238. }
  239. // 提交更改
  240. $dbh_word_index->commit();
  241. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  242. $error = $dbh_word_index->errorInfo();
  243. echo "error - $error[2] <br>";
  244. $log.="$from, $FileName, error, $error[2] \r\n";
  245. }
  246. else{
  247. echo "updata iword recorders.<br />";
  248. $log.="updata iword recorders.\r\n";
  249. }
  250. //然后修改已经有的词
  251. // 开始一个事务,关闭自动提交
  252. $dbh_word_index->beginTransaction();
  253. $query="UPDATE wordindex SET count = ? , normal = ? , bold = ? where id = ? ";
  254. $stmt = $dbh_word_index->prepare($query);
  255. echo "UPDATE:".count($iAllWordIndex)."words<br>";
  256. foreach($iAllWordIndex as $wIndex => $info){
  257. $wordindex=$iword;
  258. $newWord=array(
  259. $info[1],
  260. $info[2],
  261. $info[3],
  262. $wIndex
  263. );
  264. $stmt->execute($newWord);
  265. }
  266. // 提交更改
  267. $dbh_word_index->commit();
  268. if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
  269. $error = $dbh_word_index->errorInfo();
  270. echo "error - $error[2] <br>";
  271. $log.="$from, $FileName, error, $error[2] \r\n";
  272. }
  273. else{
  274. echo "updata iword recorders.<br />";
  275. $log.="updata iword recorders.\r\n";
  276. }
  277. $myLogFile = fopen($dirLog."insert_index.log", "a");
  278. fwrite($myLogFile, $log);
  279. fclose($myLogFile);
  280. ?>
  281. <?php
  282. if($from>=$to){
  283. echo "<h2>齐活!功德无量!all done!</h2>";
  284. }
  285. else{
  286. echo "<script>";
  287. echo "window.location.assign(\"db_insert_index.php?from=".($from+1)."&to=".$to."\")";
  288. echo "</script>";
  289. echo "正在载入:".($from+1)."——".$filelist[$from+1][0];
  290. }
  291. ?>
  292. </body>
  293. </html>