db_insert_index_csv.php 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. <?php
  2. require_once "install_head.php";
  3. include "./_pdo.php";
  4. if (PHP_SAPI == "cli") {
  5. echo $argc;
  6. if($argc>=3){
  7. $from=$argv[1];
  8. $to=$argv[2];
  9. echo "From: {$from} To:{$to}";
  10. }
  11. else if($argc>=1){
  12. $from=0;
  13. $to = 216;
  14. echo "生成全部217本书";
  15. }
  16. else{
  17. echo "参数错误";
  18. exit;
  19. }
  20. }
  21. else{
  22. echo "<!DOCTYPE html><html><head></head>";
  23. echo "<body><h2>Insert to Index</h2>";
  24. if(isset($_GET["from"])==false){
  25. echo '<form action="db_insert_index_csv.php" method="get">';
  26. echo 'From: <input type="text" name="from" value="0"><br>';
  27. echo 'To: <input type="text" name="to" value="216"><br>';
  28. echo '<input type="submit">';
  29. echo '</form>';
  30. exit;
  31. }
  32. else{
  33. $from=$_GET["from"];
  34. $to=$_GET["to"];
  35. }
  36. }
  37. $g_wordCounter=0;
  38. $g_wordIndexCounter=0;
  39. $iAllWordIndex=array();
  40. $sAllWord=array();
  41. $dirLog=_DIR_LOG_."/";
  42. $dirXmlBase=_DIR_PALI_CSV_."/";
  43. $filelist=array();
  44. $fileNums=0;
  45. $log="";
  46. echo "<h2>$from</h2>";
  47. function getWordEn($strIn){
  48. $out=$strIn;
  49. $out=str_replace("ā","a",$out);
  50. $out=str_replace("ī","i",$out);
  51. $out=str_replace("ū","u",$out);
  52. $out=str_replace("ṅ","n",$out);
  53. $out=str_replace("ñ","n",$out);
  54. $out=str_replace("ṭ","t",$out);
  55. $out=str_replace("ḍ","d",$out);
  56. $out=str_replace("ṇ","n",$out);
  57. $out=str_replace("ḷ","l",$out);
  58. $out=str_replace("ṃ","m",$out);
  59. return($out);
  60. }
  61. if(($handle=fopen("filelist.csv",'r'))!==FALSE){
  62. while(($filelist[$fileNums]=fgetcsv($handle,0,','))!==FALSE){
  63. $fileNums++;
  64. }
  65. }
  66. if($to==0 || $to>=$fileNums) $to=$fileNums-1;
  67. for($iFile=$from;$iFile<=$to;$iFile++){
  68. echo "<h3>{$iFile}</h3>";
  69. $FileName=$filelist[$iFile][1].".htm";
  70. $fileId=$filelist[$iFile][0];
  71. $inputFileName=$FileName;
  72. $outputFileNameHead=$filelist[$iFile][1];
  73. $bookId=$filelist[$iFile][2];
  74. $dirXml=$outputFileNameHead."/";
  75. $xmlfile = $inputFileName;
  76. echo "doing:".$xmlfile."<br>";
  77. $log=$log."$iFile,$FileName,open\r\n";
  78. $arrInserString=array();
  79. // 打开文件并读取数据
  80. $irow=0;
  81. if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead.".csv", "r"))!==FALSE){
  82. while(($data=fgetcsv($fp,0,','))!==FALSE){
  83. $irow++;
  84. if($irow>1){
  85. $params=$data;
  86. $arrInserString[count($arrInserString)]=$params;
  87. }
  88. }
  89. fclose($fp);
  90. echo "单词表load:".$dirXmlBase.$dirXml.$outputFileNameHead.".csv<br>";
  91. }
  92. else{
  93. echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead.".csv";
  94. }
  95. if(($fpoutput=fopen(_DIR_CSV_PALI_CANON_WORD_."/{$iFile}_words.csv", "w"))!==FALSE){
  96. //$query="INSERT INTO word ('id','book','paragraph','wordindex','bold') VALUES (?,?,?,?,?)";
  97. $count=0;
  98. $count1=0;
  99. $sen="";
  100. $sen1="";
  101. $sen_en="";
  102. $sen_count=0;
  103. $book="";
  104. $paragraph="";
  105. foreach($arrInserString as $oneParam){
  106. if($oneParam[5]!=""){
  107. $g_wordCounter++;
  108. $book=substr($oneParam[2],1);
  109. $paragraph=$oneParam[3];
  110. $word=$oneParam[5];
  111. if($oneParam[15]=="bld" ){
  112. $bold=1;
  113. }
  114. else{
  115. $bold=0;
  116. }
  117. if(isset($sAllWord[$word])){
  118. $wordindex=$sAllWord[$word];
  119. $iAllWordIndex[$wordindex][1]++;
  120. if($bold==1){
  121. $iAllWordIndex[$wordindex][3]++;
  122. }
  123. else{
  124. $iAllWordIndex[$wordindex][2]++;
  125. }
  126. }
  127. else{
  128. $wordindex=$g_wordIndexCounter;
  129. $sAllWord[$word]=$g_wordIndexCounter;
  130. $iAllWordIndex[$g_wordIndexCounter][0]=$word;
  131. $iAllWordIndex[$g_wordIndexCounter][1]=1;//all word count
  132. if($bold==1){
  133. $iAllWordIndex[$g_wordIndexCounter][2]=0;
  134. $iAllWordIndex[$g_wordIndexCounter][3]=1;
  135. }
  136. else{
  137. $iAllWordIndex[$g_wordIndexCounter][2]=1;
  138. $iAllWordIndex[$g_wordIndexCounter][3]=0;
  139. }
  140. $g_wordIndexCounter++;
  141. }
  142. $newWord=array($g_wordCounter,$book,$paragraph,$wordindex,$bold);
  143. fputcsv($fpoutput,$newWord);
  144. $count++;
  145. }
  146. }
  147. fclose($fpoutput);
  148. }
  149. else{
  150. echo "open file false";
  151. }
  152. }
  153. //$query="INSERT INTO wordindex ('id','word','word_en','count','normal','bold','is_base','len') VALUES (?,?,?,?,?,?,?,?)";
  154. if(($fpoutput=fopen(_DIR_CSV_PALI_CANON_WORD_INDEX_."/0.csv", "w"))!==FALSE){
  155. echo count($iAllWordIndex)."words<br>";
  156. for($iword=0;$iword<count($iAllWordIndex);$iword++){
  157. if(($iword % 10000)==0){
  158. fclose($fpoutput);
  159. $fpoutput=fopen(_DIR_CSV_PALI_CANON_WORD_INDEX_."/" . ($iword/10000) . ".csv", "w");
  160. }
  161. $wordindex=$iword;
  162. $newWord=array($wordindex,$iAllWordIndex[$iword][0],getWordEn($iAllWordIndex[$iword][0]),$iAllWordIndex[$iword][1],$iAllWordIndex[$iword][2],$iAllWordIndex[$iword][3],0,mb_strlen($iAllWordIndex[$iword][0],"UTF-8"));
  163. fputcsv($fpoutput,$newWord);
  164. }
  165. fclose($fpoutput);
  166. }
  167. else{
  168. echo "can not open file ";
  169. }
  170. $myLogFile = fopen($dirLog."insert_index.log", "a");
  171. fwrite($myLogFile, $log);
  172. fclose($myLogFile);
  173. echo "<h2>齐活!功德无量!all done!</h2>";
  174. ?>
  175. </body>
  176. </html>