Browse Source

Merge pull request #2399 from visuddhinanda/development

Development
visuddhinanda 1 day ago
parent
commit
e6757c532e

+ 101 - 37
api-v13/app/Console/Commands/UpgradeAITranslation.php

@@ -2,26 +2,22 @@
 
 namespace App\Console\Commands;
 
-use Illuminate\Console\Command;
-use Illuminate\Support\Facades\Log;
-
-use App\Services\OpenAIService;
-use App\Services\AIModelService;
-use App\Services\SentenceService;
-use App\Services\SearchPaliDataService;
-use App\Services\AIAssistant\NissayaTranslateService;
-use App\Services\AuthService;
-
+use App\Helpers\LlmResponseParser;
+use App\Http\Api\ChannelApi;
 use App\Http\Resources\AiModelResource;
-
-use App\Models\PaliText;
 use App\Models\PaliSentence;
+use App\Models\PaliText;
 use App\Models\Sentence;
-
-use App\Helpers\LlmResponseParser;
-
-use App\Http\Api\ChannelApi;
+use App\Services\AIAssistant\NissayaTranslateService;
+use App\Services\AIModelService;
+use App\Services\AuthService;
+use App\Services\OpenAIService;
+use App\Services\SearchPaliDataService;
+use App\Services\SentenceService;
 use App\Tools\Tools;
+use Illuminate\Console\Command;
+use Illuminate\Support\Facades\Cache;
+use Illuminate\Support\Facades\Log;
 
 class UpgradeAITranslation extends Command
 {
@@ -29,9 +25,13 @@ class UpgradeAITranslation extends Command
      * The name and signature of the console command.
      * php artisan upgrade:ai.translation translation --book=141 --para=535
      * php artisan upgrade:ai.translation nissaya --book=207 --para=1247
+     *
      * @var string
      */
-    protected $signature = 'upgrade:ai.translation {type} {--book=} {--para=} {--resume} {--model=} ';
+    protected $signature = 'upgrade:ai.translation {type} {channel} {--book=} {--para=} {--resume} {--model=} {--fresh : 清除缓存断点,从头开始}';
+
+    // 缓存键前缀:以 type、channel 区分,记录已完成的 "book|para" 集合,中断后重跑自动跳过
+    private const CACHE_KEY_PREFIX = 'upgrade:ai.translation:done';
 
     /**
      * The console command description.
@@ -39,14 +39,23 @@ class UpgradeAITranslation extends Command
      * @var string
      */
     protected $description = 'Command description';
+
     protected $sentenceService;
+
     protected $modelService;
+
     protected $openAIService;
+
     protected $nissayaTranslateService;
+
     protected AiModelResource $model;
+
     protected $modelToken;
+
     protected $workChannel;
+
     protected $accessToken;
+
     /**
      * Create a new command instance.
      *
@@ -77,13 +86,43 @@ class UpgradeAITranslation extends Command
             $this->info("model:{$this->model['model']}");
             $this->modelToken = AuthService::getUserToken($this->model['uid']);
         }
-        $this->workChannel = ChannelApi::getById($this->ask('请输入结果channel'));
-        // TODO 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
+        $this->workChannel = ChannelApi::getById($this->argument('channel'));
+        // 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
+        if ($this->workChannel['type'] !== $this->argument('type')) {
+            $this->error('channel type not match request '.$this->argument('type').' input is '.$this->workChannel['type']);
+
+            return 1;
+        }
+
+        $type = $this->argument('type');
+        $channelId = $this->workChannel['id'] ?? '';
+
+        // 缓存键:按 type、channel 区分不同任务的断点
+        $cacheKey = self::CACHE_KEY_PREFIX.':'.$type.':'.$channelId;
+
+        if ($this->option('fresh')) {
+            Cache::forget($cacheKey);
+            $this->info('Cleared cached cursor.');
+        }
+
+        // 是否为完整遍历(未指定 book/para),仅此情形在结束后清空断点缓存
+        $isFullRun = ! $this->option('book') && ! $this->option('para');
+
+        // 从缓存恢复已完成的 (book, para) 集合,作为重入时的稳定游标
+        $done = Cache::get($cacheKey, []);
+
         $books = [];
         if ($this->option('book')) {
             $books = [$this->option('book')];
         } else {
-            $books = range(1, 217);
+            // 未指定 book 时,若已有断点缓存,从上次处理到的 book 继续,无需从 1 开始
+            $startBook = 1;
+            if (! empty($done)) {
+                $doneBooks = array_map(fn ($cursor) => (int) explode('|', $cursor)[0], array_keys($done));
+                $startBook = max($doneBooks);
+                $this->info("resume from book {$startBook}");
+            }
+            $books = range($startBook, 217);
         }
         foreach ($books as $key => $book) {
             $maxParagraph = PaliText::where('book', $book)->max('paragraph');
@@ -92,7 +131,14 @@ class UpgradeAITranslation extends Command
                 $paragraphs = [$this->option('para')];
             }
             foreach ($paragraphs as $key => $paragraph) {
-                $this->info($this->argument('type') . " {$book}-{$paragraph}");
+                // 稳定游标:缓存键已含 type、channel,此处仅以 book|para 标识处理单元
+                $cursor = $book.'|'.$paragraph;
+                if (isset($done[$cursor])) {
+                    $this->info("skip {$cursor}");
+
+                    continue;
+                }
+
                 $data = [];
                 switch ($this->argument('type')) {
                     case 'translation':
@@ -105,12 +151,22 @@ class UpgradeAITranslation extends Command
                         $data = $this->aiWBW($book, $paragraph);
                         break;
                     default:
-                        # code...
+                        // code...
                         break;
                 }
                 $this->save($data);
+                $this->info($this->argument('type')." {$book}-{$paragraph} ".count($data).' sentences');
+                // 该处理单元全部写库完成后再标记游标,确保中途中断不会误跳过
+                $done[$cursor] = true;
+                Cache::put($cacheKey, $done, now()->addHours(24));
             }
         }
+
+        // 完整遍历正常结束,清空断点缓存
+        if ($isFullRun) {
+            Cache::forget($cacheKey);
+        }
+
         return 0;
     }
 
@@ -121,21 +177,22 @@ class UpgradeAITranslation extends Command
             ->where('paragraph', $para)
             ->orderBy('word_begin')
             ->get();
-        if (!$sentences) {
+        if (! $sentences) {
             return null;
         }
         $json = [];
         foreach ($sentences as $key => $sentence) {
-            $content = $sentenceService->getSentenceText($book, $para, $sentence->word_begin, $sentence->word_end);
+            $content = $sentenceService->getSentenceContent($book, $para, $sentence->word_begin, $sentence->word_end);
             $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
             $json[] = ['id' => $id, 'content' => $content['markdown']];
         }
+
         return $json;
     }
 
     private function aiPaliTranslate($book, $para)
     {
-        $prompt = <<<md
+        $prompt = <<<'md'
         你是一个巴利语翻译助手。
         pali 是巴利原文的一个段落,json格式, 每条记录是一个句子。包括id 和 content 两个字段
         请翻译这个段落为简体中文。
@@ -159,10 +216,11 @@ class UpgradeAITranslation extends Command
     md;
 
         $pali = $this->getPaliContent($book, $para);
-        $originalText = "```json\n" . json_encode($pali, JSON_UNESCAPED_UNICODE) . "\n```";
+        $originalText = "```json\n".json_encode($pali, JSON_UNESCAPED_UNICODE)."\n```";
         Log::debug($originalText);
-        if (!$this->model) {
+        if (! $this->model) {
             Log::error('model is invalid');
+
             return [];
         }
         $startAt = time();
@@ -175,16 +233,18 @@ class UpgradeAITranslation extends Command
             ->send("# pali\n\n{$originalText}\n\n");
         $complete = time() - $startAt;
         $translationText = $response['choices'][0]['message']['content'] ?? '[]';
-        Log::debug("complete in {$complete}s", $translationText);
+        Log::debug("complete in {$complete}s", ['content' => $translationText]);
         $json = [];
         if (is_string($translationText)) {
             $json = LlmResponseParser::jsonl($translationText);
         }
+
         return $json;
     }
+
     private function aiWBW($book, $para)
     {
-        $sysPrompt = <<<md
+        $sysPrompt = <<<'md'
         你是一个佛教翻译专家,精通巴利文和缅文,精通巴利文逐词解析
         ## 翻译要求:
         - 请将用户提供的巴利句子单词表中的每个巴利文单词翻译为中文
@@ -216,7 +276,7 @@ class UpgradeAITranslation extends Command
             $tpl = [];
             foreach ($wbw as $key => $word) {
                 if (
-                    !empty($word->real->value) &&
+                    ! empty($word->real->value) &&
                     $word->type->value !== '.ctl.'
                 ) {
                     $tpl[] = [
@@ -238,7 +298,7 @@ class UpgradeAITranslation extends Command
                 ->send("```json\n{$tplText}\n```");
             $complete = time() - $startAt;
             $content = $response['choices'][0]['message']['content'] ?? '[]';
-            Log::debug("ai response in {$complete}s content=" . $content);
+            Log::debug("ai response in {$complete}s content=".$content);
 
             $json = LlmResponseParser::jsonl($content);
 
@@ -248,22 +308,24 @@ class UpgradeAITranslation extends Command
                 'content' => json_encode($json, JSON_UNESCAPED_UNICODE),
             ];
         }
+
         return $result;
     }
+
     private function aiNissayaTranslate($book, $para)
     {
-        $sysPrompt = <<<md
+        $sysPrompt = <<<'md'
         你是一个佛教翻译专家,精通巴利文和缅文
         ## 翻译要求:
         - 请将nissaya单词表中的巴利文和缅文分别翻译为中文
         - 输入格式为 巴利文:缅文
         - 一行是一条记录,翻译的时候,请不要拆分一行中的巴利文单词或缅文单词,一行中出现多个单词的,一起翻译
         - 输出csv格式内容,分隔符为"$",
-        - 字段如下:巴利文\$巴利文的中文译文\$缅文\$缅文的中文译文 #两个译文的语义相似度(%)
+        - 字段如下:巴利文$巴利文的中文译文$缅文$缅文的中文译文 #两个译文的语义相似度(%)
 
         **范例**:
 
-        pana\$然而\$ဝါဒန္တရကား\$教义之说 #60%
+        pana$然而$ဝါဒန္တရကား$教义之说 #60%
 
         直接输出csv, 无需其他内容
         用```包裹的行为注释内容,也需要翻译和解释。放在最后面。如果没有```,无需处理
@@ -306,22 +368,24 @@ class UpgradeAITranslation extends Command
             $aiNissaya = $this->nissayaTranslateService
                 ->setModel($this->model)
                 ->translate($sentence->content, false);
-            Log::debug("ai response ", ['content' => $aiNissaya['data']]);
+            Log::debug('ai response ', ['content' => $aiNissaya['data']]);
             $result[] = [
                 'id' => $id,
                 'content' => json_encode($aiNissaya['data'] ?? [], JSON_UNESCAPED_UNICODE),
-                'content_type' => 'json'
+                'content_type' => 'json',
             ];
         }
+
         return $result;
     }
 
     private function save($data)
     {
-        //写入句子库
+        // 写入句子库
         $sentData = [];
         $sentData = array_map(function ($n) {
             $sId = explode('-', $n['id']);
+
             return [
                 'book_id' => $sId[0],
                 'paragraph' => $sId[1],

+ 34 - 1
api-v13/app/Console/Commands/UpgradeSystemCommentary.php

@@ -27,7 +27,7 @@ class UpgradeSystemCommentary extends Command
      *
      * @var string
      */
-    protected $signature = 'upgrade:sys.commentary {--book=} {--para=} {--list} {--model=} {--fresh : 清除缓存断点,从头开始}';
+    protected $signature = 'upgrade:sys.commentary {--book=} {--para=} {--list} {--model=} {--skip= : 跳过指定的 book_name,逗号分隔,支持前缀通配,如 abhi*,sn2} {--fresh : 清除缓存断点,从头开始}';
 
     protected $prompt = <<<'md'
     你是一个注释对照阅读助手。
@@ -132,6 +132,12 @@ md;
         // 从缓存恢复已完成的 (book_name, cs_para) 集合,作为重入时的稳定游标
         $done = Cache::get(self::CACHE_KEY, []);
 
+        // 需要跳过的 book_name 规则,逗号分隔,以 * 结尾为前缀匹配,否则全等匹配
+        $skipPatterns = [];
+        if ($this->option('skip')) {
+            $skipPatterns = array_values(array_filter(array_map('trim', explode(',', $this->option('skip')))));
+        }
+
         $channel = ChannelApi::getChannelByName('_System_commentary_');
 
         $books = [];
@@ -148,6 +154,12 @@ md;
                 ->get()->toArray();
         }
         foreach ($books as $key => $currBook) {
+            // 命中跳过规则时直接处理下一本:即便上次游标停在此书,也跳到下一个有效 book_name
+            if ($this->shouldSkipBook($currBook['book_name'], $skipPatterns)) {
+                $this->info('skip book '.$currBook['book_name']);
+
+                continue;
+            }
             $paragraphs = [];
             if ($this->option('para')) {
                 $paragraphs[] = ['cs_para' => $this->option('para')];
@@ -278,6 +290,27 @@ md;
         return 0;
     }
 
+    /**
+     * 判断 book_name 是否命中跳过规则。
+     *
+     * @param  array<int, string>  $patterns  以 * 结尾为前缀匹配,否则全等匹配
+     */
+    private function shouldSkipBook(string $bookName, array $patterns): bool
+    {
+        foreach ($patterns as $pattern) {
+            if (str_ends_with($pattern, '*')) {
+                $prefix = rtrim($pattern, '*');
+                if ($prefix !== '' && str_starts_with($bookName, $prefix)) {
+                    return true;
+                }
+            } elseif ($bookName === $pattern) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
     private function hasData($typeData, $typeName)
     {
         if (

+ 16 - 0
api-v13/documents/ai-test.md

@@ -0,0 +1,16 @@
+# 服务器端测试和运行的项目
+
+| 序列号 | 项目 | 代码完成| 功能测试  | 可重入 | 运行中 |结束|
+|--------|------|-------------|--------|--------|--------|--------|
+| 1 | 三藏全文搜索 |✅ |✅ |✅ |✅ |✅ |
+| 2 | 百科全文搜索 |✅ | | | | |
+| 3 | 注疏穿插 |✅ |✅ |✅ |✅ |
+| 4 | 汉译 nissaya |✅ |✅ |✅ |
+| 5 | ai 译文(deepseek) |✅ | | |
+| 6 | ai 译文(claude) | | | |
+| 7 | 第三方译文导入 |✅ | | |
+| 8 | 五大册-AI 汉译 |✅ | | |
+| 9 | AI 百科 |✅ | | |
+| 10 | AI wbw | | | |
+
+> 功能测试 是指在wikipali server上功能测试通过