Explorar o código

:recycle: UpgradeAITranslation: 缓存键加入 type/channel,游标仅含 book/para

- CACHE_KEY 改为按 type、channel 区分断点缓存
- 游标 $cursor 简化为 book|para
- 修复 getSentenceText -> getSentenceContent
- 修复 Log::debug 第二参数须为数组

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
visuddhinanda hai 1 día
pai
achega
74f2b7f4af
Modificáronse 1 ficheiros con 93 adicións e 36 borrados
  1. 93 36
      api-v13/app/Console/Commands/UpgradeAITranslation.php

+ 93 - 36
api-v13/app/Console/Commands/UpgradeAITranslation.php

@@ -2,26 +2,22 @@
 
 namespace App\Console\Commands;
 
-use Illuminate\Console\Command;
-use Illuminate\Support\Facades\Log;
-
-use App\Services\OpenAIService;
-use App\Services\AIModelService;
-use App\Services\SentenceService;
-use App\Services\SearchPaliDataService;
-use App\Services\AIAssistant\NissayaTranslateService;
-use App\Services\AuthService;
-
+use App\Helpers\LlmResponseParser;
+use App\Http\Api\ChannelApi;
 use App\Http\Resources\AiModelResource;
-
-use App\Models\PaliText;
 use App\Models\PaliSentence;
+use App\Models\PaliText;
 use App\Models\Sentence;
-
-use App\Helpers\LlmResponseParser;
-
-use App\Http\Api\ChannelApi;
+use App\Services\AIAssistant\NissayaTranslateService;
+use App\Services\AIModelService;
+use App\Services\AuthService;
+use App\Services\OpenAIService;
+use App\Services\SearchPaliDataService;
+use App\Services\SentenceService;
 use App\Tools\Tools;
+use Illuminate\Console\Command;
+use Illuminate\Support\Facades\Cache;
+use Illuminate\Support\Facades\Log;
 
 class UpgradeAITranslation extends Command
 {
@@ -29,9 +25,13 @@ class UpgradeAITranslation extends Command
      * The name and signature of the console command.
      * php artisan upgrade:ai.translation translation --book=141 --para=535
      * php artisan upgrade:ai.translation nissaya --book=207 --para=1247
+     *
      * @var string
      */
-    protected $signature = 'upgrade:ai.translation {type} {--book=} {--para=} {--resume} {--model=} ';
+    protected $signature = 'upgrade:ai.translation {type} {channel} {--book=} {--para=} {--resume} {--model=} {--fresh : 清除缓存断点,从头开始}';
+
+    // 缓存键前缀:以 type、channel 区分,记录已完成的 "book|para" 集合,中断后重跑自动跳过
+    private const CACHE_KEY_PREFIX = 'upgrade:ai.translation:done';
 
     /**
      * The console command description.
@@ -39,14 +39,23 @@ class UpgradeAITranslation extends Command
      * @var string
      */
     protected $description = 'Command description';
+
     protected $sentenceService;
+
     protected $modelService;
+
     protected $openAIService;
+
     protected $nissayaTranslateService;
+
     protected AiModelResource $model;
+
     protected $modelToken;
+
     protected $workChannel;
+
     protected $accessToken;
+
     /**
      * Create a new command instance.
      *
@@ -77,8 +86,31 @@ class UpgradeAITranslation extends Command
             $this->info("model:{$this->model['model']}");
             $this->modelToken = AuthService::getUserToken($this->model['uid']);
         }
-        $this->workChannel = ChannelApi::getById($this->ask('请输入结果channel'));
-        // TODO 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
+        $this->workChannel = ChannelApi::getById($this->argument('channel'));
+        // 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
+        if ($this->workChannel['type'] !== $this->argument('type')) {
+            $this->error('channel type not match request '.$this->argument('type').' input is '.$this->workChannel['type']);
+
+            return 1;
+        }
+
+        $type = $this->argument('type');
+        $channelId = $this->workChannel['id'] ?? '';
+
+        // 缓存键:按 type、channel 区分不同任务的断点
+        $cacheKey = self::CACHE_KEY_PREFIX.':'.$type.':'.$channelId;
+
+        if ($this->option('fresh')) {
+            Cache::forget($cacheKey);
+            $this->info('Cleared cached cursor.');
+        }
+
+        // 是否为完整遍历(未指定 book/para),仅此情形在结束后清空断点缓存
+        $isFullRun = ! $this->option('book') && ! $this->option('para');
+
+        // 从缓存恢复已完成的 (book, para) 集合,作为重入时的稳定游标
+        $done = Cache::get($cacheKey, []);
+
         $books = [];
         if ($this->option('book')) {
             $books = [$this->option('book')];
@@ -92,7 +124,14 @@ class UpgradeAITranslation extends Command
                 $paragraphs = [$this->option('para')];
             }
             foreach ($paragraphs as $key => $paragraph) {
-                $this->info($this->argument('type') . " {$book}-{$paragraph}");
+                // 稳定游标:缓存键已含 type、channel,此处仅以 book|para 标识处理单元
+                $cursor = $book.'|'.$paragraph;
+                if (isset($done[$cursor])) {
+                    $this->info("skip {$cursor}");
+
+                    continue;
+                }
+                $this->info($this->argument('type')." {$book}-{$paragraph}");
                 $data = [];
                 switch ($this->argument('type')) {
                     case 'translation':
@@ -105,12 +144,22 @@ class UpgradeAITranslation extends Command
                         $data = $this->aiWBW($book, $paragraph);
                         break;
                     default:
-                        # code...
+                        // code...
                         break;
                 }
                 $this->save($data);
+
+                // 该处理单元全部写库完成后再标记游标,确保中途中断不会误跳过
+                $done[$cursor] = true;
+                Cache::put($cacheKey, $done, now()->addHours(24));
             }
         }
+
+        // 完整遍历正常结束,清空断点缓存
+        if ($isFullRun) {
+            Cache::forget($cacheKey);
+        }
+
         return 0;
     }
 
@@ -121,21 +170,22 @@ class UpgradeAITranslation extends Command
             ->where('paragraph', $para)
             ->orderBy('word_begin')
             ->get();
-        if (!$sentences) {
+        if (! $sentences) {
             return null;
         }
         $json = [];
         foreach ($sentences as $key => $sentence) {
-            $content = $sentenceService->getSentenceText($book, $para, $sentence->word_begin, $sentence->word_end);
+            $content = $sentenceService->getSentenceContent($book, $para, $sentence->word_begin, $sentence->word_end);
             $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
             $json[] = ['id' => $id, 'content' => $content['markdown']];
         }
+
         return $json;
     }
 
     private function aiPaliTranslate($book, $para)
     {
-        $prompt = <<<md
+        $prompt = <<<'md'
         你是一个巴利语翻译助手。
         pali 是巴利原文的一个段落,json格式, 每条记录是一个句子。包括id 和 content 两个字段
         请翻译这个段落为简体中文。
@@ -159,10 +209,11 @@ class UpgradeAITranslation extends Command
     md;
 
         $pali = $this->getPaliContent($book, $para);
-        $originalText = "```json\n" . json_encode($pali, JSON_UNESCAPED_UNICODE) . "\n```";
+        $originalText = "```json\n".json_encode($pali, JSON_UNESCAPED_UNICODE)."\n```";
         Log::debug($originalText);
-        if (!$this->model) {
+        if (! $this->model) {
             Log::error('model is invalid');
+
             return [];
         }
         $startAt = time();
@@ -175,16 +226,18 @@ class UpgradeAITranslation extends Command
             ->send("# pali\n\n{$originalText}\n\n");
         $complete = time() - $startAt;
         $translationText = $response['choices'][0]['message']['content'] ?? '[]';
-        Log::debug("complete in {$complete}s", $translationText);
+        Log::debug("complete in {$complete}s", ['content' => $translationText]);
         $json = [];
         if (is_string($translationText)) {
             $json = LlmResponseParser::jsonl($translationText);
         }
+
         return $json;
     }
+
     private function aiWBW($book, $para)
     {
-        $sysPrompt = <<<md
+        $sysPrompt = <<<'md'
         你是一个佛教翻译专家,精通巴利文和缅文,精通巴利文逐词解析
         ## 翻译要求:
         - 请将用户提供的巴利句子单词表中的每个巴利文单词翻译为中文
@@ -216,7 +269,7 @@ class UpgradeAITranslation extends Command
             $tpl = [];
             foreach ($wbw as $key => $word) {
                 if (
-                    !empty($word->real->value) &&
+                    ! empty($word->real->value) &&
                     $word->type->value !== '.ctl.'
                 ) {
                     $tpl[] = [
@@ -238,7 +291,7 @@ class UpgradeAITranslation extends Command
                 ->send("```json\n{$tplText}\n```");
             $complete = time() - $startAt;
             $content = $response['choices'][0]['message']['content'] ?? '[]';
-            Log::debug("ai response in {$complete}s content=" . $content);
+            Log::debug("ai response in {$complete}s content=".$content);
 
             $json = LlmResponseParser::jsonl($content);
 
@@ -248,22 +301,24 @@ class UpgradeAITranslation extends Command
                 'content' => json_encode($json, JSON_UNESCAPED_UNICODE),
             ];
         }
+
         return $result;
     }
+
     private function aiNissayaTranslate($book, $para)
     {
-        $sysPrompt = <<<md
+        $sysPrompt = <<<'md'
         你是一个佛教翻译专家,精通巴利文和缅文
         ## 翻译要求:
         - 请将nissaya单词表中的巴利文和缅文分别翻译为中文
         - 输入格式为 巴利文:缅文
         - 一行是一条记录,翻译的时候,请不要拆分一行中的巴利文单词或缅文单词,一行中出现多个单词的,一起翻译
         - 输出csv格式内容,分隔符为"$",
-        - 字段如下:巴利文\$巴利文的中文译文\$缅文\$缅文的中文译文 #两个译文的语义相似度(%)
+        - 字段如下:巴利文$巴利文的中文译文$缅文$缅文的中文译文 #两个译文的语义相似度(%)
 
         **范例**:
 
-        pana\$然而\$ဝါဒန္တရကား\$教义之说 #60%
+        pana$然而$ဝါဒန္တရကား$教义之说 #60%
 
         直接输出csv, 无需其他内容
         用```包裹的行为注释内容,也需要翻译和解释。放在最后面。如果没有```,无需处理
@@ -306,22 +361,24 @@ class UpgradeAITranslation extends Command
             $aiNissaya = $this->nissayaTranslateService
                 ->setModel($this->model)
                 ->translate($sentence->content, false);
-            Log::debug("ai response ", ['content' => $aiNissaya['data']]);
+            Log::debug('ai response ', ['content' => $aiNissaya['data']]);
             $result[] = [
                 'id' => $id,
                 'content' => json_encode($aiNissaya['data'] ?? [], JSON_UNESCAPED_UNICODE),
-                'content_type' => 'json'
+                'content_type' => 'json',
             ];
         }
+
         return $result;
     }
 
     private function save($data)
     {
-        //写入句子库
+        // 写入句子库
         $sentData = [];
         $sentData = array_map(function ($n) {
             $sId = explode('-', $n['id']);
+
             return [
                 'book_id' => $sId[0],
                 'paragraph' => $sId[1],