word_index_weight_refresh.php 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. <?php
  2. /*
  3. 计算单词权重
  4. */
  5. require_once '../path.php';
  6. require_once './word_index_weight_table.php';
  7. if (isset($_GET["from"])) {
  8. $from = (int)$_GET["from"];
  9. $to = (int)$_GET["to"];
  10. } else {
  11. if ($argc != 3) {
  12. echo "无效的参数 ";
  13. exit;
  14. }
  15. $from = (int) $argv[1];
  16. $to = (int) $argv[2];
  17. if ($to > 217) {
  18. $to = 217;
  19. }
  20. }
  21. $dh_word = new PDO( _FILE_DB_WORD_INDEX_, "", "");
  22. $dh_word->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  23. $dh_pali = new PDO( _FILE_DB_PALI_INDEX_, "", "");
  24. $dh_pali->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
  25. echo "from=$from to = $to \n";
  26. for ($i = $from; $i <= $to; $i++) {
  27. $time_start = microtime(true);
  28. echo "正在处理 book= $i ";
  29. $query = "SELECT max(paragraph) from word where book=?";
  30. $stmt = $dh_pali->prepare($query);
  31. $stmt->execute(array($i));
  32. $row = $stmt->fetch(PDO::FETCH_NUM);
  33. if ($row) {
  34. $max_para = $row[0];
  35. echo "段落数量:$max_para \n";
  36. for ($j = 0; $j <= $max_para; $j++) {
  37. # code...
  38. $query = "SELECT id,book,wordindex,bold from word where book={$i} and paragraph={$j} order by id ASC";
  39. $stmt = $dh_pali->query($query);
  40. $fetch = $stmt->fetchAll(PDO::FETCH_ASSOC);
  41. $query = "SELECT wordindex,count(*) as co from word where book={$i} and paragraph={$j} group by wordindex";
  42. $stmt = $dh_pali->query($query);
  43. $fetch_voc = $stmt->fetchAll(PDO::FETCH_ASSOC);
  44. $vocabulary = array();
  45. foreach ($fetch_voc as $key => $value) {
  46. $vocabulary[$value["wordindex"]] = $value["co"];
  47. }
  48. for ($iWord = 0; $iWord < count($fetch); $iWord++) {
  49. # 非黑体字
  50. if ($fetch[$iWord]["bold"] == 0) {
  51. $count = $vocabulary[$fetch[$iWord]["wordindex"]];
  52. $paraWeight = pow(1.01, $count); //总分
  53. if ($paraWeight > 1.9) {
  54. $paraWeight = 1.9;
  55. }
  56. $weight = $paraWeight / $count;
  57. } else {
  58. #黑体字
  59. #查找前后相连的黑体字
  60. $begin = $iWord;
  61. while ($fetch[$begin]["bold"] == 1) {
  62. $begin--;
  63. if ($begin < 0) {
  64. break;
  65. }
  66. }
  67. $begin = $begin + 1;
  68. $end = $iWord;
  69. while ($fetch[$end]["bold"] == 1) {
  70. $end++;
  71. if ($end > count($fetch) - 1) {
  72. break;
  73. }
  74. }
  75. $end = $end - 1;
  76. $bold_count = $end - $begin + 1;
  77. if ($bold_count == 1) {
  78. $query = "SELECT * from wordindex where id=" . $fetch[$iWord]["wordindex"];
  79. $stmt_word = $dh_word->query($query);
  80. $wordinfo = $stmt_word->fetch(PDO::FETCH_ASSOC);
  81. $bookId = (int) $fetch[$iWord]["book"];
  82. if (mb_substr($wordinfo["word"], -2) == "ti") {
  83. $weight = 100 + $book_weight[$bookId];
  84. } else {
  85. $weight = 100 + $book_weight[$bookId];
  86. }
  87. //echo "单独黑体 $weight \n";
  88. } else {
  89. #连续黑体字
  90. //echo "连续黑体字";
  91. $len_sum = 0;
  92. $len_curr = 0;
  93. for ($iBold = $begin; $iBold <= $end; $iBold++) {
  94. # code...
  95. $boldid = $fetch[$iBold]["wordindex"];
  96. $query = "SELECT len from wordindex where id=" . $boldid;
  97. $stmt_bold = $dh_word->query($query);
  98. $wordbold = $stmt_bold->fetch(PDO::FETCH_ASSOC);
  99. $len_sum += $wordbold["len"];
  100. if ($iBold == $i) {
  101. $len_curr = $wordbold["len"];
  102. }
  103. }
  104. $weight = 10 + $len_curr / $len_sum;
  105. }
  106. }
  107. //echo $weight."\n";
  108. $fetch[$iWord]["weight"] = (int) ($weight * 100);
  109. }
  110. # 将整段权重写入据库
  111. $dh_pali->beginTransaction();
  112. $query = "UPDATE word set weight = ? where id=? ";
  113. $stmt_weight = $dh_pali->prepare($query);
  114. foreach ($fetch as $key => $value) {
  115. $stmt_weight->execute(array($value["weight"], $value["id"]));
  116. }
  117. $dh_pali->commit();
  118. if (!$stmt_weight || ($stmt_weight && $stmt_weight->errorCode() != 0)) {
  119. $error = $dh_pali->errorInfo();
  120. echo "error - $error[2]";
  121. } else {
  122. //echo "修改数据库成功 book={$i} paragraph={$j} \n";
  123. }
  124. }
  125. } else {
  126. echo "无法获取段落最大值";
  127. }
  128. echo "处理时间 :" . (microtime(true) - $time_start);
  129. }