word_index_weight_refresh.php 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. <?php
  2. /*
  3. 计算单词权重
  4. */
  5. require_once '../path.php';
  6. require_once './word_index_weight_table.php';
  7. if(isset($_GET["from"])){
  8. $from = $_GET["from"];
  9. $to = $_GET["to"];
  10. }
  11. else{
  12. if ($argc != 3){
  13. echo "无效的参数 ";
  14. exit;
  15. }
  16. $from = (int)$argv[1];
  17. $to =(int)$argv[2];
  18. if($to>217){
  19. $to = 217;
  20. }
  21. }
  22. $dh_word = new PDO("sqlite:"._FILE_DB_WORD_INDEX_, "", "");
  23. $dh_word->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  24. $dh_pali = new PDO("sqlite:"._FILE_DB_PALI_INDEX_, "", "");
  25. $dh_pali->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  26. echo "from=$from to = $to \n";
  27. for ($i=$from; $i <=$to ; $i++) {
  28. $time_start = microtime(true);
  29. echo "正在处理 book= $i ";
  30. $query = "SELECT max(paragraph) from word where book={$i}";
  31. $stmt = $dh_pali->query($query);
  32. $row = $stmt->fetch(PDO::FETCH_NUM);
  33. if ($row) {
  34. $max_para = $row[0];
  35. echo "段落数量:$max_para \n";
  36. for ($j=0; $j <=$max_para ; $j++) {
  37. # code...
  38. $query = "SELECT id,book,wordindex,bold from word where book={$i} and paragraph={$j} order by id ASC";
  39. $stmt = $dh_pali->query($query);
  40. $fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  41. $query = "SELECT wordindex,count(*) as co from word where book={$i} and paragraph={$j} group by wordindex";
  42. $stmt = $dh_pali->query($query);
  43. $fetch_voc = $stmt->fetchAll(PDO::FETCH_ASSOC);
  44. $vocabulary = array();
  45. foreach ($fetch_voc as $key => $value) {
  46. $vocabulary[$value["wordindex"]] = $value["co"];
  47. }
  48. for ($iWord=0; $iWord <count($fetch) ; $iWord++) {
  49. # 非黑体字
  50. if($fetch[$iWord]["bold"]==0){
  51. $count = $vocabulary[$fetch[$iWord]["wordindex"]];
  52. $paraWeight = pow(1.01,$count);//总分
  53. if($paraWeight>1.9){
  54. $paraWeight = 1.9;
  55. }
  56. $weight = $paraWeight/$count;
  57. }
  58. else{
  59. #黑体字
  60. #查找前后相连的黑体字
  61. $begin = $iWord;
  62. while ($fetch[$begin]["bold"] ==1) {
  63. $begin--;
  64. if($begin<0){
  65. break;
  66. }
  67. }
  68. $begin = $begin+1;
  69. $end = $iWord;
  70. while ($fetch[$end]["bold"] ==1) {
  71. $end++;
  72. if($end>count($fetch)-1){
  73. break;
  74. }
  75. }
  76. $end = $end-1;
  77. $bold_count = $end-$begin+1;
  78. if($bold_count==1){
  79. $query = "SELECT * from wordindex where id=".$fetch[$iWord]["wordindex"];
  80. $stmt_word = $dh_word->query($query);
  81. $wordinfo = $stmt_word->fetch(PDO::FETCH_ASSOC);
  82. $bookId = (int)$fetch[$iWord]["book"];
  83. if(mb_substr($wordinfo["word"],-2)=="ti"){
  84. $weight = 100+$book_weight[$bookId];
  85. }
  86. else{
  87. $weight = 100+$book_weight[$bookId];
  88. }
  89. //echo "单独黑体 $weight \n";
  90. }
  91. else{
  92. #连续黑体字
  93. //echo "连续黑体字";
  94. $len_sum = 0;
  95. $len_curr = 0;
  96. for ($iBold=$begin; $iBold <=$end ; $iBold++) {
  97. # code...
  98. $boldid = $fetch[$iBold]["wordindex"];
  99. $query = "SELECT len from wordindex where id=".$boldid;
  100. $stmt_bold = $dh_word->query($query);
  101. $wordbold = $stmt_bold->fetch(PDO::FETCH_ASSOC);
  102. $len_sum += $wordbold["len"];
  103. if($iBold==$i){
  104. $len_curr = $wordbold["len"];
  105. }
  106. }
  107. $weight = 10+$len_curr/$len_sum;
  108. }
  109. }
  110. //echo $weight."\n";
  111. $fetch[$iWord]["weight"] = (int)($weight*100);
  112. }
  113. # 将整段权重写入据库
  114. $dh_pali->beginTransaction();
  115. $query = "UPDATE word set weight = ? where id=? ";
  116. $stmt_weight = $dh_pali->prepare($query);
  117. foreach ($fetch as $key => $value) {
  118. $stmt_weight->execute(array($value["weight"],$value["id"]));
  119. }
  120. $dh_pali->commit();
  121. if (!$stmt_weight || ($stmt_weight && $stmt_weight->errorCode() != 0)) {
  122. $error = $dh_pali->errorInfo();
  123. echo "error - $error[2]";
  124. }
  125. else{
  126. //echo "修改数据库成功 book={$i} paragraph={$j} \n";
  127. }
  128. }
  129. } else {
  130. echo "无法获取段落最大值";
  131. }
  132. echo "处理时间 :".( microtime(true)-$time_start);
  133. }
  134. ?>