Просмотр исходного кода

直接将巴利文插入数据库

visuddhinanda 4 лет назад
Родитель
Сommit
da3b9101ab
1 измененных файлов с 36 добавлено и 7 удалено
  1. 36 7
      app/fts/sql.php

+ 36 - 7
app/fts/sql.php

@@ -1,5 +1,5 @@
 <?php
-
+require_once __DIR__."/../path.php";
 /*
  * 该脚本用于生成 SQL 语句, 将三藏语料 CSV 数据 (如:abh01a.att.csv)
  * 转换为 SQL 语句插入到 PostgreSQL 内,数据表结构参见 fts.sql
@@ -74,18 +74,25 @@ function count_bld ($bld_array) {
 }
 
 
+$dns = _DB_ENGIN_.":host="._DB_HOST_.";port="._DB_PORT_.";dbname="._DB_NAME_.";user="._DB_USERNAME_.";password="._DB_PASSWORD_.";";
+$dbh_fts = new PDO($dns, _DB_USERNAME_, _DB_PASSWORD_, array(PDO::ATTR_PERSISTENT => true));
+$dbh_fts->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
+
 // 查找 tmp/palicsv/ 目录下的语料数据
-$palicsv_path = '../../tmp/palicsv/';
+$palicsv_path = __DIR__.'/../../tmp/palicsv/';
 $scan = scandir($palicsv_path);
+$fileCounter = 0;
 foreach($scan as $foldername) {
   if (is_dir("$palicsv_path/$foldername")) {
+
     $csv_file = "$palicsv_path/$foldername/$foldername.csv";
 
     // DEBUG
     // if ($foldername != 'abh01m.mul') continue;
 
     if (is_file($csv_file)) {
-      echo '正在处理文件: ' . PHP_EOL . $csv_file . PHP_EOL;
+      $fileCounter++;      
+      echo "正在处理文件: $fileCounter" . PHP_EOL . $csv_file . PHP_EOL;
       // 存放当前正在处理的 CSV 文件生成的所有 SQL
       $sql_from_csv = '';
       // 初始化段落为 0 (没有这种段落)
@@ -94,6 +101,21 @@ foreach($scan as $foldername) {
       $bold_text = [];
       
       if (($handle = fopen($csv_file, "r")) !== FALSE) {
+        # 获取book id
+        $data = fgetcsv($handle, 1000, ",");
+        $data = fgetcsv($handle, 1000, ",");
+        $bookId = (int)mb_substr($data[2],1);
+        #删除旧数据
+        $query = "DELETE FROM "._TABLE_FTS_." WHERE book=?";
+        $stmt = $dbh_fts->prepare($query);
+        $stmt->execute(array($bookId));
+
+        // 开始一个事务,关闭自动提交
+        $dbh_fts->beginTransaction();
+        $query = "INSERT INTO "._TABLE_FTS_." (book , paragraph , wid,bold_single,bold_double,bold_multiple,content) VALUES ( ? , ? , ? , ? , ? , ? , ?  )";
+        $stmt = $dbh_fts->prepare($query);
+
+        rewind($handle);
         $row=0;
         while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
           #忽略第一行
@@ -142,14 +164,13 @@ foreach($scan as $foldername) {
                   $bold_multiple = "";
                 }
                 
-                $sql_from_csv .=
-                  "INSERT INTO fts VALUES ($paragraph, '$book', '$wid', '$bold_single', '$bold_double', '$bold_multiple', '$content');" . PHP_EOL;
-                // 转换后,重置黑体字数据
+                $stmt->execute(array($book, $paragraph, $wid,$bold_single,$bold_double,$bold_multiple,$content));
+                  // 转换后,重置黑体字数据
                 $bold_text = [];
               }
               // 如果是不同段落,则赋新的值
               $content = $current_word;
-              $paragraph =  $data[3];
+              $paragraph =  (int)$data[3];
               $book = (int)mb_substr($data[2],1);
               $wid =  $data[1];
 
@@ -160,6 +181,14 @@ foreach($scan as $foldername) {
 
         }
         fclose($handle);
+        // 提交更改
+        $dbh_fts->commit();
+        if (!$stmt || ($stmt && $stmt->errorCode() != 0)) {
+            $error = $dbh_fts->errorInfo();
+            echo "error - $error[2]".PHP_EOL;
+        } else {
+            echo "updata $row recorders.".PHP_EOL;
+        }	
       }
 
       file_put_contents("./sql/$foldername.sql", $sql_from_csv);