UpgradeProgressPara.php 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. <?php
  2. namespace App\Console\Commands;
  3. use App\Models\PaliSentence;
  4. use App\Models\Progress;
  5. use App\Models\Sentence;
  6. use App\Tools\Tools;
  7. use Illuminate\Console\Command;
  8. use Illuminate\Support\Facades\Cache;
  9. use Illuminate\Support\Facades\DB;
  10. class UpgradeProgressPara extends Command
  11. {
  12. protected $signature = 'upgrade:progress.para {--book=} {--para=} {--channel=} {--fresh : 清除缓存断点,从头开始}';
  13. protected $description = '更新段落翻译进度(可重入:中断后重跑自动跳过已处理的段落)';
  14. // 缓存键:记录最后处理到的位置 (book_id, paragraph, channel_uid),48h 过期
  15. private const CACHE_KEY = 'upgrade-progress-para:cursor';
  16. public function handle(): int
  17. {
  18. if (Tools::isStop()) {
  19. return 0;
  20. }
  21. if ($this->option('fresh')) {
  22. Cache::forget(self::CACHE_KEY);
  23. $this->info('Cleared cached cursor.');
  24. }
  25. $this->info('upgrade:progress.para start');
  26. $startTime = time();
  27. $book = $this->option('book');
  28. $para = $this->option('para');
  29. $channelId = $this->option('channel');
  30. if ($channelId) {
  31. $this->line('channel='.$channelId);
  32. }
  33. // 构建查询:按 (book_id, paragraph, channel_uid) 分组
  34. $sentences = $this->buildQuery($book, $para, $channelId);
  35. // 从缓存恢复断点:跳过上次已处理的记录
  36. $cursor = Cache::get(self::CACHE_KEY);
  37. if ($cursor && ! $this->option('book')) {
  38. $sentences = $this->applyResumeFilter($sentences, $cursor);
  39. $this->info("Resuming from book={$cursor['book']}, para={$cursor['para']}");
  40. }
  41. $total = DB::query()->fromSub($sentences, 't')->count();
  42. $this->info("sentences: {$total}");
  43. $curr = 0;
  44. foreach ($sentences->cursor() as $sentence) {
  45. // 计算此段落的完成时间和最后更新时间
  46. $baseQuery = Sentence::where('strlen', '>', 0)
  47. ->where('book_id', $sentence->book_id)
  48. ->where('paragraph', $sentence->paragraph)
  49. ->where('channel_uid', $sentence->channel_uid);
  50. $finalAt = (clone $baseQuery)->max('created_at');
  51. $updateAt = (clone $baseQuery)->max('updated_at');
  52. // 查询段落内每个句子的起始词位置
  53. $wordStarts = (clone $baseQuery)->pluck('word_start');
  54. if ($wordStarts->isNotEmpty()) {
  55. // 累加等效巴利语字符数:每个句子对应的 PaliSentence.length
  56. $paraStrlen = PaliSentence::where('book', $sentence->book_id)
  57. ->where('paragraph', $sentence->paragraph)
  58. ->whereIn('word_begin', $wordStarts)
  59. ->sum('length');
  60. $paraInfo = [
  61. 'book' => $sentence->book_id,
  62. 'para' => $sentence->paragraph,
  63. 'channel_id' => $sentence->channel_uid,
  64. ];
  65. Progress::updateOrInsert($paraInfo, [
  66. 'lang' => 'en',
  67. 'all_strlen' => $paraStrlen,
  68. 'public_strlen' => $paraStrlen,
  69. 'created_at' => $finalAt,
  70. 'updated_at' => $updateAt,
  71. ]);
  72. }
  73. $curr++;
  74. // 每 500 条保存一次断点到缓存
  75. if ($curr % 500 === 0) {
  76. Cache::put(self::CACHE_KEY, [
  77. 'book' => $sentence->book_id,
  78. 'para' => $sentence->paragraph,
  79. ], now()->addHours(48));
  80. $percent = (int) ($curr * 100 / $total);
  81. $this->info("[{$percent}%] book={$sentence->book_id} para={$sentence->paragraph}");
  82. sleep(1);
  83. }
  84. }
  85. // 全部完成,清除断点缓存
  86. Cache::forget(self::CACHE_KEY);
  87. $time = time() - $startTime;
  88. $this->info("upgrade:progress.para finished in {$time}s");
  89. return 0;
  90. }
  91. /** 构建分组查询 */
  92. private function buildQuery(?string $book, ?string $para, ?string $channelId)
  93. {
  94. $table = Sentence::where('strlen', '>', 0);
  95. if ($book || $para || $channelId) {
  96. if ($book) {
  97. $table = $table->where('book_id', $book);
  98. }
  99. if ($para) {
  100. $table = $table->where('paragraph', $para);
  101. }
  102. if ($channelId) {
  103. $table = $table->where('channel_uid', $channelId);
  104. }
  105. } else {
  106. $table = $table->where('book_id', '<', 1000)
  107. ->whereNotNull('channel_uid');
  108. }
  109. return $table->groupBy('book_id', 'paragraph', 'channel_uid')
  110. ->select('book_id', 'paragraph', 'channel_uid')
  111. ->orderBy('book_id')
  112. ->orderBy('paragraph');
  113. }
  114. /** 从断点位置之后继续:跳过 (book < X) 或 (book = X and para <= Y) 的记录 */
  115. private function applyResumeFilter($query, array $cursor)
  116. {
  117. return $query->where(function ($q) use ($cursor) {
  118. $q->where('book_id', '>', $cursor['book'])
  119. ->orWhere(function ($q2) use ($cursor) {
  120. $q2->where('book_id', $cursor['book'])
  121. ->where('paragraph', '>', $cursor['para']);
  122. });
  123. });
  124. }
  125. }