ソースを参照

提取cs6段落信息

visuddhinanda 5 年 前
コミット
7443f0df7e
2 ファイル変更215 行追加41 行削除
  1. 42 41
      app/install/db_collact_para_bookid.php
  2. 173 0
      app/install/page_index.php

+ 42 - 41
app/install/db_collact_para_bookid.php

@@ -1,30 +1,21 @@
 <?php
-require_once "install_head.php";
-?>
-<!DOCTYPE html>
-<html>
-<head>
-</head>
-<body>
-<h2>提取para?_??的内容</h2>
-<p><a href="index.php">Home</a></p>
-<?php
-
-$thisFileName=basename(__FILE__);
-
-if(isset($_GET["from"])==false){
-?>
-<form action="<?php echo $thisFileName;?>" method="get">
-From: <input type="text" name="from"><br>
-To: <input type="text" name="to"><br>
-<input type="submit">
-</form>
-<?php
-return;
+# 用拆分好的三藏数据 导出cs6段落编号
+require_once '../public/_pdo.php';
+require_once '../path.php';
+if ($argc < 3){
+	echo "无效的参数 ";
+	exit;
+}
+$from = (int)$argv[1];
+$to =(int)$argv[2];
+if($from<1){
+	$from = 1;
 }
+if($to>217){
+	$to = 217;
+}
+
 
-$from=$_GET["from"];
-$to=$_GET["to"];
 $filelist=array();
 $fileNums=0;
 $log="";
@@ -37,13 +28,12 @@ if(($handle=fopen("filelist.csv",'r'))!==FALSE){
 		$fileNums++;
 	}
 }
-if($to==0 || $to>=$fileNums) $to=$fileNums-1;
 
-$outputFile = fopen("xml/book_link.csv", "a") or die("Unable to open file!");
+$outputFile = fopen(_DIR_PALI_CSV_."/book_cs6_para.csv", "w") or die("Unable to open file!");
 $aBook = array();
 
-for($iFile=$from;$iFile<$to;$iFile++){
-	echo "<h2>$iFile</h2>";
+for($iFile=$from-1;$iFile<=$to-1;$iFile++){
+	echo "doing $iFile ";
 	$FileName=$filelist[$iFile][1].".htm";
 	$fileId=$filelist[$iFile][0];
 
@@ -53,14 +43,13 @@ for($iFile=$from;$iFile<$to;$iFile++){
 	$vriParNum=0;
 	$wordOrder=1;
 
-	$dirXmlBase="xml/";
+	$dirXmlBase=_DIR_PALI_CSV_."/";
 	$dirXml=$outputFileNameHead."/";
 
 	$currParNum="";
 
 	$xmlfile = $inputFileName;
-	echo "doing:".$xmlfile."<br>";
-	$log=$log."$from,$FileName,open\r\n";
+	# $log=$log."$from,$FileName,open\r\n";
 
 
 	// 打开文件并读取数据
@@ -71,19 +60,35 @@ for($iFile=$from;$iFile<$to;$iFile++){
 	if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead.".csv", "r"))!==FALSE){
 		while(($data=fgetcsv($fp,0,','))!==FALSE){
 			if($data[7]==".a."){
-				if(substr($data[4],0,4)=="para"){
+				if(stripos($data[4],"para")!==false){
 					if($bookid=stristr($data[4],"_")){
 						$bookid=substr($bookid,1);
-						$aBook["{$bookid}"]=1;
+						$paraString = stristr($data[4],"_",true);
+						$paraBegin = stripos($paraString,"para")+4;
+						$paraNum = explode("-",substr($paraString,$paraBegin));
+						$count++;
+						$output = array();
+						$output[] = substr($data[2],1);
+						$output[] = $data[3];
+						$output[] = $bookid;
+
+						foreach ($paraNum as $key => $value) {
+							# code...
+							$output[] = $value;
+						}
+						if(count($paraNum)==1){
+							$output[] = $paraNum[0];
+						}
+						fputcsv($outputFile,$output);
 					}
 				}
-			}			
+			}
 		}
 		fclose($fp);
-		echo "单词表load:".$dirXmlBase.$dirXml.$outputFileNameHead.".csv<br>";
+		echo "$count \n";
 	}
 	else{
-		echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead.".csv";
+		echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead.".csv \n";
 	}
 }
 
@@ -94,12 +99,8 @@ for($iFile=$from;$iFile<$to;$iFile++){
 	*/
 	fclose($outputFile);
 	
-	echo "count:".count($aBook)."<br>";
-	foreach($aBook as $x=>$value){
-		echo "{$x}<br>";
-	}
 
-	echo "<h2>齐活!功德无量!all done!</h2>";
+	echo "齐活!功德无量!all done! \n";
 
 ?>
 </body>

+ 173 - 0
app/install/page_index.php

@@ -0,0 +1,173 @@
+<?php 
+/*
+用拆分好的三藏数据 导出cs6段落编号
+*/
+
+?>
+<!DOCTYPE html>
+<html>
+<head>
+</head>
+<body>
+<h2>Export Paragraph No.</h2>
+<p>用拆分好的三藏数据 导出cs6段落编号</p>
+<?php
+require_once '../public/_pdo.php';
+require_once '../path.php';
+if(isset($_GET["run"])==false){
+?>
+<form action="db_insert_templet.php" method="get">
+From: <input type="text" value="0" name="from"><br>
+To: <input type="text" value="216" name="to"><br>
+<input type="submit">
+</form>
+<?php
+return;
+}
+
+$from=0;
+$to=216;
+$filelist=array();
+$fileNums=0;
+$log="";
+echo "<h2>$from</h2>";
+
+if(($handle=fopen("filelist.csv",'r'))!==FALSE){
+	while(($filelist[$fileNums]=fgetcsv($handle,0,','))!==FALSE){
+		$fileNums++;
+	}
+}
+if($to==0 || $to>=$fileNums) $to=$fileNums-1;
+
+$FileName=$filelist[$from][1].".htm";
+$fileId=$filelist[$from][0];
+$fileId=$filelist[$from][0];
+
+$dirLog=_DIR_LOG_;
+
+$dirDb=_DIR_PALICANON_TEMPLET_;
+$inputFileName=$FileName;
+$outputFileNameHead=$filelist[$from][1];
+$bookId=$filelist[$from][2];
+$vriParNum=0;
+$wordOrder=1;
+
+$dirXmlBase=_DIR_PALI_CSV_."/";
+$dirXml=$outputFileNameHead."/";
+
+$currChapter="";
+$currParNum="";
+$arrAllWords[0]=array("id","wid","book","paragraph","word","real","type","gramma","mean","note","part","partmean","bmc","bmt","un","style","vri","sya","si","ka","pi","pa","kam");
+$g_wordCounter=0;
+
+$arrUnWords[0]=array("id","word","type","gramma","parent","mean","note","part","partmean","cf","state","delete","tag","len");
+$g_unWordCounter=0;
+
+$arrUnPart[0]="word";
+$g_unPartCounter=-1;
+
+/*去掉标点符号的统计*/
+$arrAllPaliWordsCount=array();
+$g_paliWordCounter=0;
+$g_wordCounterInSutta=0;
+$g_paliWordCountCounter=0;
+
+
+$xmlfile = $inputFileName;
+echo "doing:".$xmlfile."<br>";
+$log=$log."$from,$FileName,open\r\n";
+
+$arrInserString=array();
+$db_file = $dirDb."/".$bookId.'_tpl.db3';
+PDO_Connect("sqlite:$db_file");
+
+PDO_Execute("DROP TABLE IF EXISTS main;");
+$query="CREATE TABLE 'main' ( 'id' TEXT PRIMARY KEY NOT NULL, 
+							'book' INTEGER, 
+							'paragraph' INTEGER, 
+							'wid' INTEGER, 
+							'word' TEXT, 
+							'real' TEXT, 
+							'type' TEXT, 
+							'gramma' TEXT, 
+							'part' TEXT, 
+							'style' TEXT)";
+    $stmt = @PDO_Execute($query);
+    if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+        $error = PDO_ErrorInfo();
+        print_r($error[2]);
+
+	}
+	PDO_Execute("DROP INDEX IF EXISTS search;");
+	
+$query="CREATE INDEX 'search' ON \"main\" (\"book\", \"paragraph\", \"wid\" ASC)";
+    $stmt = @PDO_Execute($query);
+    if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+        $error = PDO_ErrorInfo();
+        print_r($error[2]);
+        $log=$log."$from, $FileName, error, $error[2] \r\n";
+    }
+
+// 打开文件并读取数据
+if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead.".csv", "r"))!==FALSE){
+	while(($data=fgetcsv($fp,0,','))!==FALSE){
+		//id,wid,book,paragraph,word,real,type,gramma,mean,note,part,partmean,bmc,bmt,un,style,vri,sya,si,ka,pi,pa,kam
+		
+		$params=array($data[0],
+					 mb_substr($data[2],1),
+					 $data[3],
+					 $data[16],
+					 $data[4],
+					 $data[5],
+					 $data[6],
+					 $data[7],
+					 $data[10],
+					 $data[15]);
+		$arrInserString[]=$params;
+	}
+	fclose($fp);
+	echo "单词表load:".$dirXmlBase.$dirXml.$outputFileNameHead.".csv<br>";
+}
+else{
+	echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead.".csv";
+}
+
+// 开始一个事务,关闭自动提交
+$PDO->beginTransaction();
+$query="INSERT INTO main ('id','book','paragraph','wid','word','real','type','gramma','part','style') VALUES (?,?,?,?,?,?,?,?,?,?)";
+$stmt = $PDO->prepare($query);
+foreach($arrInserString as $oneParam){
+	$stmt->execute($oneParam);
+}
+// 提交更改 
+$PDO->commit();
+if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+	$error = PDO_ErrorInfo();
+	echo "error - $error[2] <br>";
+	
+	$log=$log."$from, $FileName, error, $error[2] \r\n";
+}
+else{
+	$count=count($arrInserString);
+	echo "updata $count recorders.";
+}
+
+	$myLogFile = fopen($dirLog."insert_db.log", "a");
+	fwrite($myLogFile, $log);
+	fclose($myLogFile);
+?>
+
+
+<?php 
+if($from==$to){
+	echo "<h2>齐活!功德无量!all done!</h2>";
+}
+else{
+	echo "<script>";
+	echo "window.location.assign(\"db_insert_templet.php?from=".($from+1)."&to=".$to."\")";
+	echo "</script>";
+	echo "正在载入:".($from+1)."——".$filelist[$from+1][0];
+}
+?>
+</body>
+</html>