Browse Source

将换来的www csv 导入改为 cli

visuddhinanda 4 years ago
parent
commit
0e2bcce39a

+ 2 - 2
app/admin/word_index_weight_refresh.php

@@ -2,8 +2,8 @@
 /*
 计算单词权重
  */
-require_once '../path.php';
-require_once './word_index_weight_table.php';
+require_once __DIR__.'/../path.php';
+require_once __DIR__.'/word_index_weight_table.php';
 
 if (isset($_GET["from"])) {
     $from = (int)$_GET["from"];

+ 101 - 0
app/install/db_insert_bookword_from_csv_cli.php

@@ -0,0 +1,101 @@
+<?php
+/*
+生成 巴利原文段落表
+ */
+require_once __DIR__."/../path.php";
+require_once __DIR__.'/../public/_pdo.php';
+
+
+echo "Insert Pali Text To DB".PHP_EOL;
+
+if ($argc != 3) {
+	echo "help".PHP_EOL;
+	echo $argv[0]." from to".PHP_EOL;
+	echo "from = 1-217".PHP_EOL;
+	echo "to = 1-217".PHP_EOL;
+	exit;
+}
+$_from = (int) $argv[1];
+$_to = (int) $argv[2];
+if ($_to > 217) {
+	$_to = 217;
+}
+
+
+$dirLog = _DIR_LOG_ . "/";
+$dirXmlBase = _DIR_PALI_CSV_ . "/";
+
+$filelist = array();
+$fileNums = 0;
+$log = "";
+
+global $dbh_word_index;
+$dns = _FILE_DB_BOOK_WORD_;
+$dbh_word_index = new PDO($dns, _DB_USERNAME_, _DB_PASSWORD_, array(PDO::ATTR_PERSISTENT => true));
+$dbh_word_index->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
+
+if (($handle = fopen("filelist.csv", 'r')) !== false) {
+    while (($filelist[$fileNums] = fgetcsv($handle, 0, ',')) !== false) {
+        $fileNums++;
+    }
+}
+if ($_to == 0 || $_to >= $fileNums) {
+    $_to = $fileNums ;
+}
+
+for ($from=$_from-1; $from < $_to; $from++) { 
+    echo "doing ".($from+1).PHP_EOL;
+
+    $bookword = array();
+
+    if (($fpoutput = fopen(_DIR_CSV_PALI_CANON_WORD_ . "/{$from}_words.csv", "r")) !== false) {
+        $count = 0;
+        while (($data = fgetcsv($fpoutput, 0, ',')) !== false) {
+            $book = $data[1];
+            if (isset($bookword[$data[3]])) {
+                $bookword[$data[3]]++;
+            } else {
+                $bookword[$data[3]] = 1;
+            }
+
+            $count++;
+        }
+    }
+    #删除原来的数据
+    $query = "DELETE FROM "._TABLE_BOOK_WORD_." WHERE book = ?";
+    $stmt = $dbh_word_index->prepare($query);
+    $stmt->execute(array($book));
+    if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+        $error = $dbh_word_index->errorInfo();
+        echo "error - $error[2]".PHP_EOL;
+        $log .= "$from, $FileName, error, $error[2] \r\n";
+    }else{
+        // 开始一个事务,关闭自动提交
+        $dbh_word_index->beginTransaction();
+        $query = "INSERT INTO "._TABLE_BOOK_WORD_." (book , wordindex , count) VALUES ( ? , ? , ?  )";
+        $stmt = $dbh_word_index->prepare($query);
+
+        foreach ($bookword as $key => $value) {
+            $stmt->execute(array($book, $key, $value));
+        }
+        // 提交更改
+        $dbh_word_index->commit();
+        if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+            $error = $dbh_word_index->errorInfo();
+            echo "error - $error[2]".PHP_EOL;
+            $log .= "$from, $FileName, error, $error[2] \r\n";
+        } else {
+            echo "updata $count recorders.".PHP_EOL;
+            $log .= "updata $count recorders.\r\n";
+        }	
+    }
+
+    /*
+    $myLogFile = fopen($dirLog . "insert_index.log", "a");
+    fwrite($myLogFile, $log);
+    fclose($myLogFile);
+    */
+}
+echo "齐活!功德无量!all done!".PHP_EOL;
+
+?>

+ 146 - 0
app/install/db_insert_palitext_cli.php

@@ -0,0 +1,146 @@
+<?php
+/*
+生成 巴利原文段落表
+ */
+require_once __DIR__."/../path.php";
+require_once __DIR__.'/../public/_pdo.php';
+
+
+echo "Insert Pali Text To DB".PHP_EOL;
+
+if ($argc != 3) {
+	echo "help".PHP_EOL;
+	echo $argv[0]." from to".PHP_EOL;
+	echo "from = 1-217".PHP_EOL;
+	echo "to = 1-217".PHP_EOL;
+	exit;
+}
+$_from = (int) $argv[1];
+$_to = (int) $argv[2];
+if ($_to > 217) {
+	$_to = 217;
+}
+
+
+$to = $_to;
+
+$filelist = array();
+$fileNums = 0;
+$log = "";
+echo "doing $_from";
+
+if (($handle = fopen("filelist.csv", 'r')) !== false) {
+    while (($filelist[$fileNums] = fgetcsv($handle, 0, ',')) !== false) {
+        $fileNums++;
+    }
+}
+if ($to == 0 || $to >= $fileNums) {
+    $to = $fileNums - 1;
+}
+
+PDO_Connect(_FILE_DB_PALITEXT_,_DB_USERNAME_,_DB_PASSWORD_);
+
+for ($from=$_from-1; $from < $to; $from++) { 
+    # code...
+
+    $FileName = $filelist[$from][1] . ".htm";
+    $fileId = $filelist[$from][0];
+    $fileId = $filelist[$from][0];
+    
+    $dirLog = _DIR_LOG_ . "/";
+    
+    $inputFileName = $FileName;
+    $outputFileNameHead = $filelist[$from][1];
+    $bookId = $filelist[$from][2];
+    $vriParNum = 0;
+    $wordOrder = 1;
+    
+    $dirXmlBase = _DIR_PALI_CSV_ . "/";
+    $dirPaliTextBase = _DIR_PALI_HTML_ . "/";
+    $dirXml = $outputFileNameHead . "/";
+    
+    $xmlfile = $inputFileName;
+    echo "doing:" . $xmlfile . PHP_EOL;
+    $log = $log . "$from,$FileName,open\r\n";
+    
+    $arrInserString = array();
+    
+    
+    // 打开vri html文件并读取数据
+    $pali_text_array = array();
+    if (($fpPaliText = fopen($dirPaliTextBase . $xmlfile, "r")) !== false) {
+        while (($data = fgets($fpPaliText)) !== false) {
+            if (substr($data, 0, 2) === "<p") {
+                array_push($pali_text_array, $data);
+            }
+    
+        }
+        fclose($fpPaliText);
+        echo "pali text load:" . $dirPaliTextBase . $xmlfile . PHP_EOL;
+    } else {
+        echo "can not pali text file. filename=" . $dirPaliTextBase . $xmlfile;
+    }
+    
+    $inputRow = 0;
+    if (($fp = fopen($dirXmlBase . $dirXml . $outputFileNameHead . "_pali.csv", "r")) !== false) {
+        while (($data = fgetcsv($fp, 0, ',')) !== false) {
+            if ($inputRow > 0) {
+                if (($inputRow - 1) < count($pali_text_array)) {
+                    $data[5] = $pali_text_array[$inputRow - 1];
+                }
+                $arrInserString[] = $data;
+            }
+            $inputRow++;
+        }
+        fclose($fp);
+        echo "单词表load:" . $dirXmlBase . $dirXml . $outputFileNameHead . ".csv".PHP_EOL;
+    } else {
+        echo "can not open csv file. filename=" . $dirXmlBase . $dirXml . $outputFileNameHead . ".csv";
+        continue;
+    }
+    
+    if (($inputRow - 1) != count($pali_text_array)) {
+        $log = $log . "$from, $FileName,error,文件行数不匹配 inputRow=$inputRow pali_text_array=" . count($pali_text_array) . " \r\n";
+        echo "line count error".PHP_EOL;
+    }
+    
+    #删除 旧数据
+    $query = "DELETE FROM "._TABLE_PALI_TEXT_." WHERE book=?";
+    PDO_Execute($query,array($from+1));
+    
+    // 开始一个事务,关闭自动提交
+    $PDO->beginTransaction();
+    
+    $query = "INSERT INTO "._TABLE_PALI_TEXT_." ( book , paragraph , level , class , toc , text , html , lenght ) VALUES ( ? , ? , ? , ? , ? , ? , ? , ? )";
+    $stmt = $PDO->prepare($query);
+    foreach ($arrInserString as $oneParam) {
+        if ($oneParam[3] < 100) {
+            $toc = $oneParam[6];
+        } else {
+            $toc = "";
+        }
+        $newData = array($from + 1, $oneParam[2], $oneParam[3], $oneParam[4], $toc, $oneParam[6], $oneParam[5], mb_strlen($oneParam[6], "UTF-8"));
+        $stmt->execute($newData);
+    }
+    // 提交更改
+    $PDO->commit();
+    if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+        $error = PDO_ErrorInfo();
+        echo "error - $error[2]".PHP_EOL;
+    
+        $log = $log . "$from, $FileName, error, $error[2] \r\n";
+    } else {
+        $count = count($arrInserString);
+        echo "updata $count recorders.".PHP_EOL;
+    }
+    /*
+    $myLogFile = fopen($dirLog . "db_insert_palitext.log", "a");
+    fwrite($myLogFile, $log);
+    fclose($myLogFile);
+    */
+}
+echo "all done!".PHP_EOL;
+
+
+
+?>

+ 84 - 0
app/install/db_insert_word_from_csv_cli.php

@@ -0,0 +1,84 @@
+<?php
+/*
+生成 巴利原文段落表
+ */
+require_once __DIR__."/../path.php";
+require_once __DIR__.'/../public/_pdo.php';
+
+
+echo "Insert Word To DB".PHP_EOL;
+
+if ($argc != 3) {
+	echo "help".PHP_EOL;
+	echo $argv[0]." from to".PHP_EOL;
+	echo "from = 1-217".PHP_EOL;
+	echo "to = 1-217".PHP_EOL;
+	exit;
+}
+$_from = (int) $argv[1];
+$_to = (int) $argv[2];
+if ($_to > 217) {
+	$_to = 217;
+}
+
+$dirLog = _DIR_LOG_ . "/";
+$dirXmlBase = _DIR_PALI_CSV_ . "/";
+
+$filelist = array();
+$fileNums = 0;
+$log = "";
+
+
+global $dbh_word_index;
+$dns = _FILE_DB_PALI_INDEX_;
+$dbh_word_index = new PDO($dns, _DB_USERNAME_, _DB_PASSWORD_, array(PDO::ATTR_PERSISTENT => true));
+$dbh_word_index->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
+
+if (($handle = fopen("filelist.csv", 'r')) !== false) {
+    while (($filelist[$fileNums] = fgetcsv($handle, 0, ',')) !== false) {
+        $fileNums++;
+    }
+}
+if ($_to == 0 || $_to >= $fileNums) {
+    $to = $fileNums ;
+}
+
+for ($from=$_from-1; $from < $_to; $from++) { 
+    echo "doing ".($from+1).PHP_EOL;
+    #删除
+    $query = "DELETE FROM "._TABLE_WORD_." WHERE book = ?";
+    $stmt = $dbh_word_index->prepare($query);
+    $stmt->execute(array($from+1));
+
+
+    if (($fpoutput = fopen(_DIR_CSV_PALI_CANON_WORD_ . "/{$from}_words.csv", "r")) !== false) {
+        // 开始一个事务,关闭自动提交
+        $dbh_word_index->beginTransaction();
+        $query = "INSERT INTO "._TABLE_WORD_." ( sn , book , paragraph , wordindex , bold ) VALUES (?,?,?,?,?)";
+        $stmt = $dbh_word_index->prepare($query);
+
+        $count = 0;
+        while (($data = fgetcsv($fpoutput, 0, ',')) !== false) {
+            $stmt->execute($data);
+            $count++;
+        }
+        // 提交更改
+        $dbh_word_index->commit();
+        if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+            $error = $dbh_word_index->errorInfo();
+            echo "error - $error[2] ".PHP_EOL;
+            $log .= "$from, $FileName, error, $error[2] \r\n";
+        } else {
+            echo "updata $count recorders.".PHP_EOL;
+            $log .= "updata $count recorders.\r\n";
+        }
+    }
+/*
+    $myLogFile = fopen($dirLog . "insert_index.log", "a");
+    fwrite($myLogFile, $log);
+    fclose($myLogFile);
+    */
+}
+    echo "齐活!功德无量!all done!".PHP_EOL;
+
+?>

+ 64 - 0
app/install/db_insert_wordindex_from_csv_cli.php

@@ -0,0 +1,64 @@
+<?php
+/*
+生成 巴利原文段落表
+ */
+require_once __DIR__."/../path.php";
+require_once __DIR__.'/../public/_pdo.php';
+
+
+echo "Insert Word Index To DB".PHP_EOL;
+
+
+$dirLog = _DIR_LOG_ . "/";
+$log = "";
+
+$dns = _FILE_DB_WORD_INDEX_;
+$dbh_word_index = new PDO($dns, _DB_USERNAME_, _DB_PASSWORD_, array(PDO::ATTR_PERSISTENT => true));
+$dbh_word_index->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
+
+#删除
+$query = "DELETE FROM "._TABLE_WORD_INDEX_." WHERE true";
+$stmt = $dbh_word_index->prepare($query);
+$stmt->execute();
+
+    $scan = scandir(_DIR_CSV_PALI_CANON_WORD_INDEX_);
+    foreach($scan as $filename) {
+        $filename = _DIR_CSV_PALI_CANON_WORD_INDEX_."/".$filename;
+        if (is_file($filename)) {
+            echo "doing ".$filename.PHP_EOL;
+            if (($fpoutput = fopen($filename, "r")) !== false) {
+
+                // 开始一个事务,关闭自动提交
+                $dbh_word_index->beginTransaction();
+                $query = "INSERT INTO "._TABLE_WORD_INDEX_." (id , word , word_en , count , normal , bold , is_base , len ) VALUES (?,?,?,?,?,?,?,?)";
+        
+                $stmt = $dbh_word_index->prepare($query);
+        
+                $count = 0;
+                while (($data = fgetcsv($fpoutput, 0, ',')) !== false) {
+                    $stmt->execute($data);
+                    $count++;
+                }
+                // 提交更改
+                $dbh_word_index->commit();
+                if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+                    $error = $dbh_word_index->errorInfo();
+                    echo "error - $error[2]".PHP_EOL;
+                    $log .= "$filename, error, $error[2] \r\n";
+                } else {
+                    echo "updata $count recorders.".PHP_EOL;
+                    $log .= "updata $count recorders.\r\n";
+                }
+            }else{
+                echo "open file error".PHP_EOL;
+            }
+        
+        }
+    }
+
+echo "齐活!功德无量!all done!".PHP_EOL;
+
+
+
+?>
+

+ 227 - 0
app/install/db_update_palitext_cli.php

@@ -0,0 +1,227 @@
+<?php
+/*
+生成 巴利原文段落表
+ */
+require_once __DIR__."/../path.php";
+require_once __DIR__.'/../public/_pdo.php';
+
+
+echo "Insert Pali Text To DB".PHP_EOL;
+
+if ($argc != 3) {
+	echo "help".PHP_EOL;
+	echo $argv[0]." from to".PHP_EOL;
+	echo "from = 1-217".PHP_EOL;
+	echo "to = 1-217".PHP_EOL;
+	exit;
+}
+$_from = (int) $argv[1];
+$_to = (int) $argv[2];
+if ($_to > 217) {
+	$_to = 217;
+}
+
+
+$to = $_to;
+
+$filelist = array();
+$fileNums = 0;
+$log = "";
+
+
+if (($handle = fopen("filelist.csv", 'r')) !== false) {
+    while (($filelist[$fileNums] = fgetcsv($handle, 0, ',')) !== false) {
+        $fileNums++;
+    }
+}
+if ($to == 0 || $to >= $fileNums) {
+    $to = $fileNums - 1;
+}
+
+PDO_Connect(_FILE_DB_PALITEXT_,_DB_USERNAME_,_DB_PASSWORD_);
+
+for ($from=$_from-1; $from < $to; $from++) { 
+    echo "doing $from".PHP_EOL;
+
+$FileName = $filelist[$from][1] . ".htm";
+$fileId = $filelist[$from][0];
+$fileId = $filelist[$from][0];
+
+$dirLog = _DIR_LOG_ . "/";
+
+$dirDb = "/";
+$inputFileName = $FileName;
+$outputFileNameHead = $filelist[$from][1];
+$bookId = $filelist[$from][2];
+$vriParNum = 0;
+$wordOrder = 1;
+
+$dirXmlBase = _DIR_PALI_CSV_ . "/";
+$dirPaliTextBase = _DIR_PALI_HTML_ . "/";
+$dirXml = $outputFileNameHead . "/";
+
+$xmlfile = $inputFileName;
+echo "doing:" . $xmlfile . PHP_EOL;
+
+$log = $log . date("Y-m-d h:i:sa") . ",$from,$FileName,open\r\n";
+
+$arrInserString = array();
+
+// 打开vri html文件并读取数据
+$pali_text_array = array(); //vri text
+if (($fpPaliText = fopen($dirPaliTextBase . $xmlfile, "r")) !== false) {
+    while (($data = fgets($fpPaliText)) !== false) {
+        array_push($pali_text_array, $data);
+    }
+    fclose($fpPaliText);
+    echo "pali text load:" . $dirPaliTextBase . $xmlfile . PHP_EOL;
+} else {
+    echo "can not pali text file. filename=" . $dirPaliTextBase . $xmlfile;
+}
+
+// 打开csv文件并读取数据
+$inputRow = 0;
+if (($fp = fopen(_DIR_PALI_TITLE_ . "/" . ($from + 1) . "_pali.csv", "r")) !== false) {
+    while (($data = fgetcsv($fp, 0, ',')) !== false) {
+        if ($inputRow > 0) {
+            $params = $data;
+            array_push($arrInserString, $params);
+        }
+        $inputRow++;
+    }
+    fclose($fp);
+    echo "单词表load:" . $dirXmlBase . $dirXml . $outputFileNameHead . ".csv". PHP_EOL;
+} else {
+    echo "can not open csv file. filename=" . $dirXmlBase . $dirXml . $outputFileNameHead . ".csv";
+}
+
+if ((count($arrInserString)) != count($pali_text_array) - 2) {
+    $log = $log . "$from, $FileName,error,文件行数不匹配 csv = " . (count($arrInserString) - 1) . " pali_text_array=" . (count($pali_text_array) - 2) . " \r\n";
+}
+
+$book = $from + 1;
+
+//计算段落信息,如上一段
+
+$query = "SELECT * from "._TABLE_PALI_TEXT_." where book = '$book'  order by paragraph asc";
+$title_data = PDO_FetchAll($query);
+echo "Paragraph Count:" . count($title_data) . " arrInserString:".count($arrInserString). PHP_EOL;
+
+$paragraph_count = count($title_data);
+
+// 开始一个事务,关闭自动提交
+$PDO->beginTransaction();
+$query = "UPDATE "._TABLE_PALI_TEXT_." SET level = ? , toc = ? , chapter_len = ? , next_chapter = ?, prev_chapter=? , parent= ?  ,  chapter_strlen = ?  WHERE book=? and paragraph=?";
+$stmt = $PDO->prepare($query);
+
+$paragraph_info = array();
+array_push($paragraph_info, array($from, -1, $paragraph_count, -1, -1, -1));
+
+
+for ($iPar = 0; $iPar < count($title_data); $iPar++) {
+    $title_data[$iPar]["level"] = $arrInserString[$iPar][3];
+}
+
+
+for ($iPar = 0; $iPar < count($title_data); $iPar++) {
+    $book = $from + 1;
+    $paragraph = $title_data[$iPar]["paragraph"];
+
+    if ((int) $title_data[$iPar]["level"] == 8) {
+        $title_data[$iPar]["level"] = 100;
+    }
+
+    $curr_level = (int) $title_data[$iPar]["level"];
+    # 计算这个chapter的段落数量
+    $length = -1;
+   
+    
+    for ($iPar1 = $iPar + 1; $iPar1 < count($title_data); $iPar1++) {
+        $thislevel = (int) $title_data[$iPar1]["level"];
+        if ($thislevel <= $curr_level) {
+            $length = (int) $title_data[$iPar1]["paragraph"] - $paragraph;
+            break;
+        }
+    }
+
+    if ($length == -1) {
+        $length = $paragraph_count - $paragraph + 1;
+    }
+
+
+    $prev = -1;
+    if ($iPar > 0) {
+        for ($iPar1 = $iPar - 1; $iPar1 >= 0; $iPar1--) {
+            if ($title_data[$iPar1]["level"] == $curr_level) {
+                $prev = $title_data[$iPar1]["paragraph"];
+                break;
+            }
+        }
+    }
+
+    $next = -1;
+    if ($iPar < count($title_data) - 1) {
+        for ($iPar1 = $iPar + 1; $iPar1 < count($title_data); $iPar1++) {
+            if ($title_data[$iPar1]["level"] == $curr_level) {
+                $next = $title_data[$iPar1]["paragraph"];
+                break;
+            }
+        }
+    }
+
+    $parent = -1;
+    if ($iPar > 0) {
+        for ($iPar1 = $iPar - 1; $iPar1 >= 0; $iPar1--) {
+            if ($title_data[$iPar1]["level"] < $curr_level) {
+                $parent = $title_data[$iPar1]["paragraph"];
+                break;
+            }
+        }
+    }
+    //计算章节包含总字符数
+    $iChapter_strlen = 0;
+
+    for ($i = $iPar; $i < $iPar + $length; $i++) {
+        $iChapter_strlen += $title_data[$i]["lenght"];
+    }
+    
+    $newData = array(
+        $arrInserString[$iPar][3],
+        $arrInserString[$iPar][5],
+        $length,
+        $next,
+        $prev,
+        $parent,
+        $iChapter_strlen,
+        $book,
+        $paragraph,
+    );
+    $stmt->execute($newData);
+
+    if ($curr_level > 0 && $curr_level < 8) {
+        array_push($paragraph_info, array($book, $paragraph, $length, $prev, $next, $parent));
+    }
+}
+
+// 提交更改
+$PDO->commit();
+if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+    $error = PDO_ErrorInfo();
+    echo "error - $error[2]". PHP_EOL;
+
+    $log = $log . "$from, $FileName, error, $error[2] \r\n";
+} else {
+    $count = count($title_data);
+    echo "updata $count paragraph info recorders.". PHP_EOL;
+    echo count($paragraph_info) . " Heading". PHP_EOL;
+}
+//段落信息结束
+/*
+$myLogFile = fopen(_DIR_LOG_ . "/db_update_palitext.log", "a");
+fwrite($myLogFile, $log);
+fclose($myLogFile);
+*/
+}
+echo "all done!".PHP_EOL;
+?>
+