word_index_weight_refresh.php 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. <?php
  2. require_once '../path.php';
  3. require_once './word_index_weight_table.php';
  4. if(isset($_GET["from"])){
  5. $from = $_GET["from"];
  6. $to = $_GET["to"];
  7. }
  8. else{
  9. if ($argc != 3){
  10. echo "无效的参数 ";
  11. exit;
  12. }
  13. $from = (int)$argv[1];
  14. $to =(int)$argv[2];
  15. if($to>217){
  16. $to = 217;
  17. }
  18. }
  19. $dh_word = new PDO("sqlite:"._FILE_DB_WORD_INDEX_, "", "");
  20. $dh_word->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  21. $dh_pali = new PDO("sqlite:"._FILE_DB_PALI_INDEX_, "", "");
  22. $dh_pali->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  23. echo "from=$from to = $to \n";
  24. for ($i=$from; $i <=$to ; $i++) {
  25. $time_start = microtime(true);
  26. echo "正在处理 book= $i ";
  27. $query = "SELECT max(paragraph) from word where book={$i}";
  28. $stmt = $dh_pali->query($query);
  29. $row = $stmt->fetch(PDO::FETCH_NUM);
  30. if ($row) {
  31. $max_para = $row[0];
  32. echo "段落数量:$max_para \n";
  33. for ($j=0; $j <=$max_para ; $j++) {
  34. # code...
  35. $query = "SELECT id,book,wordindex,bold from word where book={$i} and paragraph={$j} order by id ASC";
  36. $stmt = $dh_pali->query($query);
  37. $fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  38. $query = "SELECT wordindex,count(*) as co from word where book={$i} and paragraph={$j} group by wordindex";
  39. $stmt = $dh_pali->query($query);
  40. $fetch_voc = $stmt->fetchAll(PDO::FETCH_ASSOC);
  41. $vocabulary = array();
  42. foreach ($fetch_voc as $key => $value) {
  43. $vocabulary[$value["wordindex"]] = $value["co"];
  44. }
  45. for ($iWord=0; $iWord <count($fetch) ; $iWord++) {
  46. # 非黑体字
  47. if($fetch[$iWord]["bold"]==0){
  48. $count = $vocabulary[$fetch[$iWord]["wordindex"]];
  49. $paraWeight = pow(1.01,$count);//总分
  50. if($paraWeight>1.9){
  51. $paraWeight = 1.9;
  52. }
  53. $weight = $paraWeight/$count;
  54. }
  55. else{
  56. #黑体字
  57. #查找前后相连的黑体字
  58. $begin = $iWord;
  59. while ($fetch[$begin]["bold"] ==1) {
  60. $begin--;
  61. if($begin<0){
  62. break;
  63. }
  64. }
  65. $begin = $begin+1;
  66. $end = $iWord;
  67. while ($fetch[$end]["bold"] ==1) {
  68. $end++;
  69. if($end>count($fetch)-1){
  70. break;
  71. }
  72. }
  73. $end = $end-1;
  74. $bold_count = $end-$begin+1;
  75. if($bold_count==1){
  76. $query = "SELECT * from wordindex where id=".$fetch[$iWord]["wordindex"];
  77. $stmt_word = $dh_word->query($query);
  78. $wordinfo = $stmt_word->fetch(PDO::FETCH_ASSOC);
  79. $bookId = (int)$fetch[$iWord]["book"];
  80. if(mb_substr($wordinfo["word"],-2)=="ti"){
  81. $weight = 100+$book_weight[$bookId];
  82. }
  83. else{
  84. $weight = 100+$book_weight[$bookId];
  85. }
  86. //echo "单独黑体 $weight \n";
  87. }
  88. else{
  89. #连续黑体字
  90. //echo "连续黑体字";
  91. $len_sum = 0;
  92. $len_curr = 0;
  93. for ($iBold=$begin; $iBold <=$end ; $iBold++) {
  94. # code...
  95. $boldid = $fetch[$iBold]["wordindex"];
  96. $query = "SELECT len from wordindex where id=".$boldid;
  97. $stmt_bold = $dh_word->query($query);
  98. $wordbold = $stmt_bold->fetch(PDO::FETCH_ASSOC);
  99. $len_sum += $wordbold["len"];
  100. if($iBold==$i){
  101. $len_curr = $wordbold["len"];
  102. }
  103. }
  104. $weight = 10+$len_curr/$len_sum;
  105. }
  106. }
  107. //echo $weight."\n";
  108. $fetch[$iWord]["weight"] = (int)($weight*100);
  109. }
  110. # 将整段权重写入据库
  111. $dh_pali->beginTransaction();
  112. $query = "UPDATE word set weight = ? where id=? ";
  113. $stmt_weight = $dh_pali->prepare($query);
  114. foreach ($fetch as $key => $value) {
  115. $stmt_weight->execute(array($value["weight"],$value["id"]));
  116. }
  117. $dh_pali->commit();
  118. if (!$stmt_weight || ($stmt_weight && $stmt_weight->errorCode() != 0)) {
  119. $error = $dh_pali->errorInfo();
  120. echo "error - $error[2]";
  121. }
  122. else{
  123. //echo "修改数据库成功 book={$i} paragraph={$j} \n";
  124. }
  125. }
  126. } else {
  127. echo "无法获取段落最大值";
  128. }
  129. echo "处理时间 :".( microtime(true)-$time_start);
  130. }
  131. ?>