Răsfoiți Sursa

feat: PaliTranslateService 多步骤巴译中工作流

把 UpgradeAITranslation 里的 aiPaliTranslate 提取为 PaliTranslateService,
仿 NissayaTranslateService 放在 app/Services/AIAssistant/,支持
translate / review / revise 三步骤,可单独或串联运行(--steps)。

- review 仅打分+问题清单不改译文;revise 据 review 修订并回写库
- translate/revise prompt 增加专有名词规则:优先标准译名,否则意译,
  困难时音译,全文译名一致
- review prompt 增加专有名词维度(标准译名/音意译/混淆/前后一致)
- OpenAIService::setThinking 接受 ?bool,传 null 时不改动

已实跑验证(book=131 para=27, deepseek-v3):三步串联正常,
review 结果正确传入 revise,写库内容为最终 revise 译文。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
visuddhinanda 4 zile în urmă
părinte
comite
84d89ed328

+ 29 - 133
api-v13/app/Console/Commands/UpgradeAITranslation.php

@@ -5,16 +5,14 @@ namespace App\Console\Commands;
 use App\Helpers\LlmResponseParser;
 use App\Http\Api\ChannelApi;
 use App\Http\Resources\AiModelResource;
-use App\Models\PaliSentence;
 use App\Models\PaliText;
 use App\Models\Sentence;
 use App\Services\AIAssistant\NissayaTranslateService;
+use App\Services\AIAssistant\PaliTranslateService;
 use App\Services\AIModelService;
 use App\Services\AuthService;
 use App\Services\OpenAIService;
-use App\Services\SearchPaliDataService;
 use App\Services\SentenceService;
-use App\Tools\Tools;
 use Illuminate\Console\Command;
 use Illuminate\Support\Facades\Cache;
 use Illuminate\Support\Facades\Log;
@@ -23,7 +21,7 @@ class UpgradeAITranslation extends Command
 {
     /**
      * The name and signature of the console command.
-     * php artisan upgrade:ai.translation translation --book=141 --para=535
+     * php artisan upgrade:ai.translation translation --book=131 --para=27
      * php artisan upgrade:ai.translation nissaya --book=207 --para=1247
      *
      * @var string
@@ -36,6 +34,7 @@ class UpgradeAITranslation extends Command
     {--resume}
     {--model=}
     {--thinking= : 开启和关闭deepseek thinking true | false}
+    {--steps=translate : translation 工作流步骤,逗号分隔,可选 translate,review,revise}
     {--fresh : 清除缓存断点,从头开始}';
 
     // 缓存键前缀:以 type、channel 区分,记录已完成的 "book|para" 集合,中断后重跑自动跳过
@@ -67,7 +66,8 @@ class UpgradeAITranslation extends Command
         protected AIModelService $modelService,
         protected SentenceService $sentenceService,
         protected OpenAIService $openAIService,
-        protected NissayaTranslateService $nissayaTranslateService
+        protected NissayaTranslateService $nissayaTranslateService,
+        protected PaliTranslateService $paliTranslateService
     ) {
         parent::__construct();
     }
@@ -82,33 +82,43 @@ class UpgradeAITranslation extends Command
         /**
          * model
          */
-        if (!$this->option('model')) {
+        if (! $this->option('model')) {
             $this->error('model is request');
+
             return 1;
         }
         $this->model = $this->modelService->getModelById($this->option('model'));
         $this->info("model:{$this->model['model']}");
         $this->modelToken = AuthService::getUserToken($this->model['uid']);
 
-        //channel
+        // channel
         $this->workChannel = ChannelApi::getById($this->argument('channel'));
         // 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
         if ($this->workChannel['type'] !== $this->argument('type')) {
-            $this->error('channel type not match request ' . $this->argument('type') . ' input is ' . $this->workChannel['type']);
+            $this->error('channel type not match request '.$this->argument('type').' input is '.$this->workChannel['type']);
 
             return 1;
         }
 
         if ($this->option('thinking')) {
             $this->thinking = $this->option('thinking') === 'true';
-            $this->line('thinking is ' . $this->option('thinking'));
+            $this->line('thinking is '.$this->option('thinking'));
+        }
+
+        // translation 工作流步骤校验
+        $steps = array_values(array_filter(array_map('trim', explode(',', (string) $this->option('steps')))));
+        $invalid = array_diff($steps, PaliTranslateService::STEPS);
+        if (! empty($invalid)) {
+            $this->error('invalid steps: '.implode(',', $invalid).'. allowed: '.implode(',', PaliTranslateService::STEPS));
+
+            return 1;
         }
 
         $type = $this->argument('type');
         $channelId = $this->workChannel['id'] ?? '';
 
         // 缓存键:按 type、channel 区分不同任务的断点
-        $cacheKey = self::CACHE_KEY_PREFIX . ':' . $type . ':' . $channelId;
+        $cacheKey = self::CACHE_KEY_PREFIX.':'.$type.':'.$channelId;
 
         if ($this->option('fresh')) {
             Cache::forget($cacheKey);
@@ -128,7 +138,7 @@ class UpgradeAITranslation extends Command
             // 未指定 book 时,若已有断点缓存,从上次处理到的 book 继续,无需从 1 开始
             $startBook = 1;
             if (! empty($done)) {
-                $doneBooks = array_map(fn($cursor) => (int) explode('|', $cursor)[0], array_keys($done));
+                $doneBooks = array_map(fn ($cursor) => (int) explode('|', $cursor)[0], array_keys($done));
                 $startBook = max($doneBooks);
                 $this->info("resume from book {$startBook}");
             }
@@ -142,7 +152,7 @@ class UpgradeAITranslation extends Command
             }
             foreach ($paragraphs as $key => $paragraph) {
                 // 稳定游标:缓存键已含 type、channel,此处仅以 book|para 标识处理单元
-                $cursor = $book . '|' . $paragraph;
+                $cursor = $book.'|'.$paragraph;
                 if (isset($done[$cursor])) {
                     $this->info("skip {$cursor}");
 
@@ -152,7 +162,11 @@ class UpgradeAITranslation extends Command
                 $data = [];
                 switch ($this->argument('type')) {
                     case 'translation':
-                        $data = $this->aiPaliTranslate($book, $paragraph);
+                        $data = $this->paliTranslateService
+                            ->setModel($this->model)
+                            ->setChannel($this->workChannel)
+                            ->setThinking($this->thinking ?? null)
+                            ->run($steps, (int) $book, (int) $paragraph);
                         break;
                     case 'nissaya':
                         $data = $this->aiNissayaTranslate($book, $paragraph);
@@ -166,7 +180,7 @@ class UpgradeAITranslation extends Command
                 }
                 $this->save($data);
                 $time = time() - $start;
-                $this->info($this->argument('type') . " {$book}-{$paragraph} " . count($data) . ' sentences time=' . $time);
+                $this->info($this->argument('type')." {$book}-{$paragraph} ".count($data).' sentences time='.$time);
                 // 该处理单元全部写库完成后再标记游标,确保中途中断不会误跳过
                 $done[$cursor] = true;
                 Cache::put($cacheKey, $done, now()->addHours(24));
@@ -181,82 +195,6 @@ class UpgradeAITranslation extends Command
         return 0;
     }
 
-    private function getPaliContent($book, $para)
-    {
-        $sentenceService = app(SearchPaliDataService::class);
-        $sentences = PaliSentence::where('book', $book)
-            ->where('paragraph', $para)
-            ->orderBy('word_begin')
-            ->get();
-        if (! $sentences) {
-            return null;
-        }
-        $json = [];
-        foreach ($sentences as $key => $sentence) {
-            $content = $sentenceService->getSentenceContent($book, $para, $sentence->word_begin, $sentence->word_end);
-            $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
-            $json[] = ['id' => $id, 'content' => $content['markdown']];
-        }
-
-        return $json;
-    }
-
-    private function aiPaliTranslate($book, $para)
-    {
-        $prompt = <<<'md'
-        你是一个巴利语翻译助手。
-        pali 是巴利原文的一个段落,json格式, 每条记录是一个句子。包括id 和 content 两个字段
-        请翻译这个段落为简体中文。
-
-        翻译要求
-        1. 语言风格为现代汉语书面语,不要使用古汉语或者半文半白。
-        2. 译文严谨,完全贴合巴利原文,不要加入自己的理解
-        3. 巴利原文中的黑体字在译文中也使用黑体。其他标点符号跟随巴利原文,但应该替换为相应的汉字全角符号
-
-        输出格式jsonl
-        输出id 和 content 两个字段,
-        id 使用巴利原文句子的id ,
-        content 为中文译文
-
-        直接输出jsonl数据,无需解释
-
-
-    **输出范例**
-    {"id":"1-2-3-4","content":"译文"}
-    {"id":"2-3-4-5","content":"译文"}
-    md;
-
-        $pali = $this->getPaliContent($book, $para);
-        $originalText = "```json\n" . json_encode($pali, JSON_UNESCAPED_UNICODE) . "\n```";
-        Log::debug($originalText);
-        if (! $this->model) {
-            Log::error('model is invalid');
-
-            return [];
-        }
-        $startAt = time();
-        $llm = $this->openAIService->setApiUrl($this->model['url'])
-            ->setModel($this->model['model'])
-            ->setApiKey($this->model['key'])
-            ->setSystemPrompt($prompt)
-            ->setTemperature(0.0)
-            ->setStream(false);
-        if (isset($this->thinking)) {
-            $llm = $llm->setThinking($this->thinking);
-        }
-
-        $response =    $llm->send("# pali\n\n{$originalText}\n\n");
-        $complete = time() - $startAt;
-        $translationText = $response['choices'][0]['message']['content'] ?? '[]';
-        Log::debug("complete in {$complete}s", ['content' => $translationText]);
-        $json = [];
-        if (is_string($translationText)) {
-            $json = LlmResponseParser::jsonl($translationText);
-        }
-
-        return $json;
-    }
-
     private function aiWBW($book, $para)
     {
         $sysPrompt = <<<'md'
@@ -316,7 +254,7 @@ class UpgradeAITranslation extends Command
             $response = $llm->send("```json\n{$tplText}\n```");
             $complete = time() - $startAt;
             $content = $response['choices'][0]['message']['content'] ?? '[]';
-            Log::debug("ai response in {$complete}s content=" . $content);
+            Log::debug("ai response in {$complete}s content=".$content);
 
             $json = LlmResponseParser::jsonl($content);
 
@@ -332,23 +270,6 @@ class UpgradeAITranslation extends Command
 
     private function aiNissayaTranslate($book, $para)
     {
-        $sysPrompt = <<<'md'
-        你是一个佛教翻译专家,精通巴利文和缅文
-        ## 翻译要求:
-        - 请将nissaya单词表中的巴利文和缅文分别翻译为中文
-        - 输入格式为 巴利文:缅文
-        - 一行是一条记录,翻译的时候,请不要拆分一行中的巴利文单词或缅文单词,一行中出现多个单词的,一起翻译
-        - 输出csv格式内容,分隔符为"$",
-        - 字段如下:巴利文$巴利文的中文译文$缅文$缅文的中文译文 #两个译文的语义相似度(%)
-
-        **范例**:
-
-        pana$然而$ဝါဒန္တရကား$教义之说 #60%
-
-        直接输出csv, 无需其他内容
-        用```包裹的行为注释内容,也需要翻译和解释。放在最后面。如果没有```,无需处理
-        md;
-
         $sentences = Sentence::nissaya()
             ->language('my') // 过滤缅文
             ->where('book_id', $book)
@@ -358,31 +279,6 @@ class UpgradeAITranslation extends Command
         $result = [];
         foreach ($sentences as $key => $sentence) {
             $id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
-            /*
-            $nissaya = [];
-            $rows = explode("\n", $sentence->content);
-            foreach ($rows as $key => $row) {
-                if (strpos('=', $row) >= 0) {
-                    $factors = explode("=", $row);
-                    $nissaya[] = Tools::MyToRm($factors[0]) . ':' . end($factors);
-                } else {
-                    $nissaya[] = $row;
-                }
-            }
-            $nissayaText = json_encode(implode("\n", $nissaya), JSON_UNESCAPED_UNICODE);
-            Log::debug($nissayaText);
-            $startAt = time();
-            $response = $this->openAIService->setApiUrl($this->model['url'])
-                ->setModel($this->model['model'])
-                ->setApiKey($this->model['key'])
-                ->setSystemPrompt($sysPrompt)
-                ->setTemperature(0.7)
-                ->setStream(false)
-                ->send("# nissaya\n\n{$nissayaText}\n\n");
-            $complete = time() - $startAt;
-            $content = $response['choices'][0]['message']['content'] ?? '';
-            Log::debug("ai response in {$complete}s content=" . $content);
-            */
             $aiNissaya = $this->nissayaTranslateService
                 ->setModel($this->model)
                 ->translate($sentence->content, false);

+ 42 - 46
api-v13/app/Services/AIAssistant/ArticleTranslateService.php

@@ -2,35 +2,29 @@
 
 namespace App\Services\AIAssistant;
 
+use App\Http\Api\ChannelApi;
+use App\Models\CustomBook;
 use App\Services\ArticleService;
+use App\Services\AuthService;
 use App\Services\PaliContentService;
 use App\Services\SentenceService;
-use App\Services\AuthService;
-
-use App\Models\CustomBook;
-
-
 use Illuminate\Support\Facades\Log;
-use App\Http\Api\ChannelApi;
-
 
 class ArticleTranslateService
 {
-    protected ArticleService $articleService;
-    protected PaliContentService $paliContentService;
-    protected TranslateService $translateService;
-    protected SentenceService $sentenceService;
-
-
     protected string $modelId;
+
     protected string $modelToken;
+
     protected array $translation = [];
+
     protected string $outputChannelId;
+
     protected string $currArticleId;
 
     protected bool $thinking;
 
-    protected string $systemPrompt = <<<PROMPT
+    protected string $systemPrompt = <<<'PROMPT'
     请根据提供的原文,翻译为简体中文。
 
     原文为逐句数据,翻译时请依照句子的上下文翻译。
@@ -61,49 +55,42 @@ class ArticleTranslateService
     PROMPT;
 
     public function __construct(
-        ArticleService $article,
-        PaliContentService $paliContent,
-        TranslateService $translateService,
-        SentenceService $sentenceService
-    ) {
-        $this->articleService = $article;
-        $this->paliContentService = $paliContent;
-        $this->translateService = $translateService;
-        $this->sentenceService = $sentenceService;
-    }
+        protected ArticleService $articleService,
+        protected PaliContentService $paliContentService,
+        protected TranslateService $translateService,
+        protected SentenceService $sentenceService
+    ) {}
 
     /**
      * 设置模型配置
-     *
-     * @param string $model
-     * @return self
      */
     public function setModel(string $model): self
     {
         $this->modelId = $model;
         $this->modelToken = app(AuthService::class)->getUserToken($model);
+
         return $this;
     }
+
     /**
      * 设置模型配置
-     *
-     * @param bool $thinking
-     * @return self
      */
     public function setThinking(bool $thinking): self
     {
         $this->thinking = $thinking;
+
         return $this;
     }
+
     /**
      * 设置模型配置
      *
-     * @param string $model
-     * @return self
+     * @param  string  $model
      */
     public function setChannel(string $id): self
     {
         $this->outputChannelId = $id;
+
         return $this;
     }
 
@@ -111,6 +98,7 @@ class ArticleTranslateService
     {
         return $this->currArticleId;
     }
+
     public function translateAnthology(string $anthologyId, ?callable $onEach = null): int
     {
         $articleIds = $this->articleService->articlesInAnthology($anthologyId);
@@ -124,17 +112,19 @@ class ArticleTranslateService
 
         return count($articleIds);
     }
+
     public function translateArticle(string $articleId)
     {
         $this->currArticleId = $articleId;
-        //获取文章中的句子id
+        // 获取文章中的句子id
         $sentenceIds = $this->articleService->sentenceIds($articleId);
-        if (!$sentenceIds || count($sentenceIds) === 0) {
+        if (! $sentenceIds || count($sentenceIds) === 0) {
             $this->translation = [];
+
             return $this;
         }
-        $bookId = (int)explode('-', $sentenceIds[0])[0];
-        //提取原文
+        $bookId = (int) explode('-', $sentenceIds[0])[0];
+        // 提取原文
         $originalChannelId = CustomBook::where('book_id', $bookId)->value('channel_id');
 
         $original = $this->paliContentService->sentences($sentenceIds, [$originalChannelId], 'read');
@@ -144,27 +134,29 @@ class ArticleTranslateService
                 $org = $sent['origin'][0];
                 $orgData[] = [
                     'id' => "{$org['book']}-{$org['para']}-{$org['wordStart']}-{$org['wordEnd']}",
-                    'content' => !empty($org['content']) ? $org['content'] : $org['html'],
+                    'content' => ! empty($org['content']) ? $org['content'] : $org['html'],
                 ];
             }
         }
-        //翻译
+        // 翻译
         $result = $this->translateService->setModel($this->modelId)
             ->setSystemPrompt($this->systemPrompt)
-            ->setTranslatePrompt("# 原文\n\n" .
-                "```json\n" .
-                json_encode($orgData, JSON_UNESCAPED_UNICODE) .
+            ->setTranslatePrompt("# 原文\n\n".
+                "```json\n".
+                json_encode($orgData, JSON_UNESCAPED_UNICODE).
                 "\n```")
             ->translate();
         Log::debug('ai translation', ['data' => $result->toArray()['data']]);
         $this->translation = $result->toArray()['data'];
+
         return $this;
     }
-    //写入结果channel
+
+    // 写入结果channel
     public function save()
     {
         if (
-            !is_array($this->translation) ||
+            ! is_array($this->translation) ||
             count($this->translation) === 0
         ) {
             return 0;
@@ -173,6 +165,7 @@ class ArticleTranslateService
         $sentData = [];
         $sentData = array_map(function ($n) use ($channelInfo) {
             $sId = explode('-', $n['id']);
+
             return [
                 'book_id' => $sId[0],
                 'paragraph' => $sId[1],
@@ -186,16 +179,17 @@ class ArticleTranslateService
                 'editor_uid' => $this->modelId,
             ];
         }, $this->translation);
-        foreach ($sentData as  $value) {
+        foreach ($sentData as $value) {
             $this->sentenceService->save($value);
         }
+
         return count($sentData);
     }
 
     public function saveRpc(string $endpoint, string $accessToken)
     {
         if (
-            !is_array($this->translation) ||
+            ! is_array($this->translation) ||
             count($this->translation) === 0
         ) {
             return 0;
@@ -204,6 +198,7 @@ class ArticleTranslateService
         $sentData = [];
         $sentData = array_map(function ($n) use ($channelInfo, $accessToken) {
             $sId = explode('-', $n['id']);
+
             return [
                 'book_id' => $sId[0],
                 'paragraph' => $sId[1],
@@ -215,9 +210,10 @@ class ArticleTranslateService
                 'access_token' => $accessToken,
             ];
         }, $this->translation);
-        foreach ($sentData as  $value) {
+        foreach ($sentData as $value) {
             $this->sentenceService->saveRpc($endpoint, $value, $this->modelToken);
         }
+
         return count($sentData);
     }
 

+ 34 - 33
api-v13/app/Services/AIAssistant/NissayaTranslateService.php

@@ -2,25 +2,24 @@
 
 namespace App\Services\AIAssistant;
 
+use App\Http\Resources\AiModelResource;
 use App\Services\NissayaParser;
 use App\Services\OpenAIService;
 use App\Services\RomanizeService;
 use Illuminate\Support\Facades\Log;
-use App\Http\Resources\AiModelResource;
-
 
 class NissayaTranslateService
 {
-    protected OpenAIService $openAIService;
-    protected NissayaParser $nissayaParser;
-    protected RomanizeService $romanizeService;
     protected AiModelResource $model;
+
     protected bool $romanize;
 
+    protected bool $thinking;
+
     /**
      * 翻译提示词模板
      */
-    protected string $translatePrompt = <<<PROMPT
+    protected string $translatePrompt = <<<'PROMPT'
 你是一个专业的缅甸语翻译专家。你的任务是将缅文逐词解析(Nissaya)翻译成中文。
 
 输入格式:
@@ -51,58 +50,64 @@ class NissayaTranslateService
 PROMPT;
 
     public function __construct(
-        OpenAIService $openAIService,
-        NissayaParser $nissayaParser,
-        RomanizeService $romanizeService
+        protected OpenAIService $openAIService,
+        protected NissayaParser $nissayaParser,
+        protected RomanizeService $romanizeService
     ) {
-        $this->openAIService = $openAIService;
-        $this->nissayaParser = $nissayaParser;
-        $this->romanizeService = $romanizeService;
         $this->romanize = true;
     }
 
     /**
      * 设置模型配置
-     *
-     * @param \App\Http\Resources\AiModelResource $model
-     * @return self
      */
     public function setModel(AiModelResource $model): self
     {
         $this->model = $model;
+
+        return $this;
+    }
+
+    /**
+     * 设置模型配置
+     */
+    public function setThinking(?bool $thinking): self
+    {
+        if ($thinking === null) {
+            return $this;
+        }
+        $this->thinking = $thinking;
+
         return $this;
     }
 
     /**
      * 设置翻译提示词
-     *
-     * @param string $prompt
-     * @return self
      */
     public function setTranslatePrompt(string $prompt): self
     {
         $this->translatePrompt = $prompt;
+
         return $this;
     }
 
     /**
      * 设置翻译提示词
      *
-     * @param string $prompt
-     * @return self
+     * @param  string  $prompt
      */
     public function setRomanize(bool $romanize): self
     {
         $this->romanize = $romanize;
+
         return $this;
     }
 
     /**
      * 翻译缅文版逐词解析
      *
-     * @param string $text 格式: 巴利文=缅文
-     * @param bool $stream 是否流式输出
-     * @return array
+     * @param  string  $text  格式: 巴利文=缅文
+     * @param  bool  $stream  是否流式输出
+     *
      * @throws \Exception
      */
     public function translate(string $text, bool $stream = false): array
@@ -143,6 +148,7 @@ PROMPT;
                 ->setSystemPrompt($this->translatePrompt)
                 ->setTemperature(0.3)
                 ->setStream($stream)
+                ->setThinking($this->thinking)
                 ->send($jsonlInput);
 
             $complete = time() - $startAt;
@@ -192,14 +198,12 @@ PROMPT;
                 $data[$key]['original'] = $this->romanizeService->myanmarToRoman($value['original']);
             }
         }
+
         return $data;
     }
 
     /**
      * 将数组转换为JSONL格式
-     *
-     * @param array $data
-     * @return string
      */
     protected function arrayToJsonl(array $data): string
     {
@@ -207,14 +211,12 @@ PROMPT;
         foreach ($data as $item) {
             $lines[] = json_encode($item, JSON_UNESCAPED_UNICODE);
         }
+
         return implode("\n", $lines);
     }
 
     /**
      * 将JSONL格式转换为数组
-     *
-     * @param string $jsonl
-     * @return array
      */
     protected function jsonlToArray(string $jsonl): array
     {
@@ -248,9 +250,7 @@ PROMPT;
     /**
      * 批量翻译(将大文本分批处理)
      *
-     * @param string $text
-     * @param int $batchSize 每批处理的条目数
-     * @return array
+     * @param  int  $batchSize  每批处理的条目数
      */
     public function translateInBatches(string $text, int $batchSize = 50): array
     {
@@ -266,7 +266,7 @@ PROMPT;
             ];
 
             foreach ($batches as $index => $batch) {
-                Log::debug("NissayaTranslate: 处理批次 " . ($index + 1) . "/" . count($batches));
+                Log::debug('NissayaTranslate: 处理批次 '.($index + 1).'/'.count($batches));
 
                 $jsonlInput = $this->arrayToJsonl($batch);
                 $response = $this->openAIService
@@ -276,6 +276,7 @@ PROMPT;
                     ->setSystemPrompt($this->translatePrompt)
                     ->setTemperature(0.7)
                     ->setStream(false)
+                    ->setThinking($this->thinking)
                     ->send($jsonlInput);
 
                 $content = $response['choices'][0]['message']['content'] ?? '';

+ 388 - 0
api-v13/app/Services/AIAssistant/PaliTranslateService.php

@@ -0,0 +1,388 @@
+<?php
+
+namespace App\Services\AIAssistant;
+
+use App\Helpers\LlmResponseParser;
+use App\Http\Resources\AiModelResource;
+use App\Models\PaliSentence;
+use App\Models\Sentence;
+use App\Services\OpenAIService;
+use App\Services\SearchPaliDataService;
+use Illuminate\Support\Facades\Log;
+
+/**
+ * 巴利原文 -> 简体中文 的多步骤翻译工作流。
+ *
+ * 支持三个步骤,可单独运行或按顺序串联:
+ * - translate:根据巴利原文产出译文
+ * - review:对已有译文打分并给出问题清单(不修改译文)
+ * - revise:根据 review 的问题清单产出改进后的译文
+ *
+ * 单独运行 review / revise 时,已有译文从输出 channel 读取。
+ */
+class PaliTranslateService
+{
+    /**
+     * 可用的工作流步骤
+     */
+    public const STEPS = ['translate', 'review', 'revise'];
+
+    protected AiModelResource $model;
+
+    protected ?bool $thinking = null;
+
+    protected bool $stream = false;
+
+    /**
+     * 输出 channel(用于单独运行 review / revise 时读取已有译文)
+     *
+     * @var array<string, mixed>
+     */
+    protected array $workChannel = [];
+
+    /**
+     * translate 步骤的提示词
+     */
+    protected string $translatePrompt = <<<'md'
+        你是一个巴利语翻译助手。
+        pali 是巴利原文的一个段落,json格式, 每条记录是一个句子。包括id 和 content 两个字段
+        请翻译这个段落为简体中文。
+
+        翻译要求
+        1. 语言风格为现代汉语书面语,不要使用古汉语或者半文半白。
+        2. 译文严谨,完全贴合巴利原文,不要加入自己的理解
+        3. 经名、人名、地名等专有名词:有约定俗成的标准译名时优先使用标准译名;没有标准译名的,尽量按词义意译;意译确有困难的再使用音译。同一专有名词在全文中译名须前后一致
+        4. 巴利原文中的黑体字在译文中也使用黑体。其他标点符号跟随巴利原文,但应该替换为相应的汉字全角符号
+
+        输出格式jsonl
+        输出id 和 content 两个字段,
+        id 使用巴利原文句子的id ,
+        content 为中文译文
+
+        直接输出jsonl数据,无需解释
+
+
+        **输出范例**
+        {"id":"1-2-3-4","content":"译文"}
+        {"id":"2-3-4-5","content":"译文"}
+        md;
+
+    /**
+     * review 步骤的提示词:对已有译文打分并指出问题,不修改译文。
+     */
+    protected string $reviewPrompt = <<<'md'
+        你是一个资深的巴利语翻译审校专家。
+        用户会提供巴利原文(pali)以及一份待审校的简体中文译文(translation),两者均为 json,通过 id 一一对应。
+
+        请逐句审校译文,但**不要修改译文**,只输出审校意见。
+        审校维度:
+        1. 准确性:译文是否完全贴合巴利原文,有无漏译、增译、误译
+        2. 专有名词:人名、地名、经名等专有名词的译名是否正确、是否使用约定俗成的标准译名,有无与读音相近的其他专名混淆(如把 Aṭṭhakanāgara“八城”误作 Āṭānāṭiya“阿吒曩胝”),同一专名在段落内译名是否前后一致
+        3. 语言:是否为规范的现代汉语书面语,有无古汉语或半文半白
+        4. 格式:黑体、全角标点是否符合要求
+
+        输出格式jsonl,每条记录对应一个句子,包含三个字段:
+        id:与原文相同的句子id
+        score:译文质量评分,整数 0-100
+        issues:问题清单,简明中文描述;若没有问题则输出空字符串
+
+        直接输出jsonl数据,无需解释
+
+        **输出范例**
+        {"id":"1-2-3-4","score":85,"issues":"漏译了 bhagavā;标点未使用全角"}
+        {"id":"2-3-4-5","score":100,"issues":""}
+        md;
+
+    /**
+     * revise 步骤的提示词:根据审校意见产出改进后的译文。
+     */
+    protected string $revisePrompt = <<<'md'
+        你是一个巴利语翻译助手。
+        用户会提供巴利原文(pali)、当前译文(translation)以及审校意见(review),均为 json,通过 id 一一对应。
+
+        请根据审校意见(review)修订当前译文(translation),产出改进后的译文。
+        修订要求:
+        1. 针对 review 中 issues 指出的问题进行修正
+        2. issues 为空、且 score 较高的句子可保持原译文
+        3. 语言风格为现代汉语书面语,不要使用古汉语或者半文半白
+        4. 译文严谨,完全贴合巴利原文,不要加入自己的理解
+        5. 经名、人名、地名等专有名词:有约定俗成的标准译名时优先使用标准译名;没有标准译名的,尽量按词义意译;意译确有困难的再使用音译。同一专有名词在全文中译名须前后一致
+        6. 巴利原文中的黑体字在译文中也使用黑体。其他标点符号跟随巴利原文,但应替换为相应的汉字全角符号
+
+        输出格式jsonl
+        输出id 和 content 两个字段,
+        id 使用巴利原文句子的id ,
+        content 为修订后的中文译文
+
+        直接输出jsonl数据,无需解释
+
+        **输出范例**
+        {"id":"1-2-3-4","content":"译文"}
+        {"id":"2-3-4-5","content":"译文"}
+        md;
+
+    public function __construct(
+        protected OpenAIService $openAIService,
+        protected SearchPaliDataService $searchPaliDataService,
+    ) {}
+
+    /**
+     * 设置模型配置
+     */
+    public function setModel(AiModelResource $model): self
+    {
+        $this->model = $model;
+
+        return $this;
+    }
+
+    /**
+     * 设置 deepseek thinking 开关;传入 null 时保持默认(不改动)
+     */
+    public function setThinking(?bool $thinking): self
+    {
+        if ($thinking === null) {
+            return $this;
+        }
+        $this->thinking = $thinking;
+
+        return $this;
+    }
+
+    /**
+     * 设置是否流式输出
+     */
+    public function setStream(bool $stream): self
+    {
+        $this->stream = $stream;
+
+        return $this;
+    }
+
+    /**
+     * 设置输出 channel(用于单独运行 review / revise 时读取已有译文)
+     *
+     * @param  array<string, mixed>  $channel
+     */
+    public function setChannel(array $channel): self
+    {
+        $this->workChannel = $channel;
+
+        return $this;
+    }
+
+    /**
+     * 设置 translate 步骤的提示词
+     */
+    public function setTranslatePrompt(string $prompt): self
+    {
+        $this->translatePrompt = $prompt;
+
+        return $this;
+    }
+
+    /**
+     * 设置 review 步骤的提示词
+     */
+    public function setReviewPrompt(string $prompt): self
+    {
+        $this->reviewPrompt = $prompt;
+
+        return $this;
+    }
+
+    /**
+     * 设置 revise 步骤的提示词
+     */
+    public function setRevisePrompt(string $prompt): self
+    {
+        $this->revisePrompt = $prompt;
+
+        return $this;
+    }
+
+    /**
+     * 执行多步骤工作流,返回最终译文(list of ['id' => ..., 'content' => ...])。
+     *
+     * @param  string[]  $steps  translate / review / revise 的有序子集
+     * @return array<int, array{id: string, content: string}>
+     */
+    public function run(array $steps, int $book, int $para): array
+    {
+        if (! isset($this->model)) {
+            Log::error('PaliTranslate: model is invalid');
+
+            return [];
+        }
+
+        $pali = $this->getPaliContent($book, $para);
+
+        // 工作流不以 translate 开头时,从输出 channel 读取已有译文作为输入
+        $translation = in_array('translate', $steps, true)
+            ? []
+            : $this->existingTranslation($book, $para);
+
+        $review = [];
+
+        foreach ($steps as $step) {
+            switch ($step) {
+                case 'translate':
+                    $translation = $this->translate($pali);
+                    break;
+                case 'review':
+                    $review = $this->review($pali, $translation);
+                    Log::debug('PaliTranslate: review 完成', ['review' => $review]);
+                    break;
+                case 'revise':
+                    $translation = $this->revise($pali, $translation, $review);
+                    break;
+            }
+        }
+
+        // 只有产出译文的步骤(translate / revise)才返回可写库的数据;
+        // 仅 review 时 review 报告已写入日志,无需重新保存原译文
+        $producesTranslation = (bool) array_intersect($steps, ['translate', 'revise']);
+
+        return $producesTranslation ? $translation : [];
+    }
+
+    /**
+     * 提取段落的巴利原文,按句子返回 ['id' => ..., 'content' => ...]
+     *
+     * @return array<int, array{id: string, content: string}>
+     */
+    public function getPaliContent(int $book, int $para): array
+    {
+        $sentences = PaliSentence::where('book', $book)
+            ->where('paragraph', $para)
+            ->orderBy('word_begin')
+            ->get();
+
+        $json = [];
+        foreach ($sentences as $sentence) {
+            $content = $this->searchPaliDataService->getSentenceContent($book, $para, $sentence->word_begin, $sentence->word_end);
+            $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
+            $json[] = ['id' => $id, 'content' => $content['markdown']];
+        }
+
+        return $json;
+    }
+
+    /**
+     * translate 步骤:根据巴利原文产出译文
+     *
+     * @param  array<int, array{id: string, content: string}>  $pali
+     * @return array<int, array{id: string, content: string}>
+     */
+    public function translate(array $pali): array
+    {
+        $originalText = $this->jsonBlock($pali);
+        Log::debug('PaliTranslate: translate', ['pali' => $originalText]);
+
+        $content = $this->send($this->translatePrompt, "# pali\n\n{$originalText}\n\n");
+
+        return LlmResponseParser::jsonl($content);
+    }
+
+    /**
+     * review 步骤:对已有译文打分并给出问题清单(不修改译文)
+     *
+     * @param  array<int, array{id: string, content: string}>  $pali
+     * @param  array<int, array{id: string, content: string}>  $translation
+     * @return array<int, array{id: string, score: int, issues: string}>
+     */
+    public function review(array $pali, array $translation): array
+    {
+        $userText = "# pali\n\n".$this->jsonBlock($pali)."\n\n"
+            ."# translation\n\n".$this->jsonBlock($translation)."\n\n";
+        Log::debug('PaliTranslate: review', ['input' => $userText]);
+
+        $content = $this->send($this->reviewPrompt, $userText);
+        Log::debug('PaliTranslate: review', ['output' => $content]);
+
+        return LlmResponseParser::jsonl($content);
+    }
+
+    /**
+     * revise 步骤:根据审校意见产出改进后的译文
+     *
+     * @param  array<int, array{id: string, content: string}>  $pali
+     * @param  array<int, array{id: string, content: string}>  $translation
+     * @param  array<int, array{id: string, score: int, issues: string}>  $review
+     * @return array<int, array{id: string, content: string}>
+     */
+    public function revise(array $pali, array $translation, array $review): array
+    {
+        $userText = "# pali\n\n".$this->jsonBlock($pali)."\n\n"
+            ."# translation\n\n".$this->jsonBlock($translation)."\n\n"
+            ."# review\n\n".$this->jsonBlock($review)."\n\n";
+        Log::debug('PaliTranslate: revise', ['input' => $userText]);
+
+        $content = $this->send($this->revisePrompt, $userText);
+        Log::debug('PaliTranslate: revise', ['output' => $content]);
+
+        return LlmResponseParser::jsonl($content);
+    }
+
+    /**
+     * 从输出 channel 读取已有译文,按句子返回 ['id' => ..., 'content' => ...]
+     *
+     * @return array<int, array{id: string, content: string}>
+     */
+    protected function existingTranslation(int $book, int $para): array
+    {
+        $channelId = $this->workChannel['id'] ?? null;
+        if (! $channelId) {
+            Log::warning('PaliTranslate: 未设置输出 channel,无法读取已有译文');
+
+            return [];
+        }
+
+        $sentences = Sentence::where('channel_uid', $channelId)
+            ->where('book_id', $book)
+            ->where('paragraph', $para)
+            ->orderBy('word_start')
+            ->get();
+
+        $result = [];
+        foreach ($sentences as $sentence) {
+            $id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
+            $result[] = ['id' => $id, 'content' => $sentence->content];
+        }
+
+        return $result;
+    }
+
+    /**
+     * 调用 LLM,返回响应文本
+     */
+    protected function send(string $systemPrompt, string $userText): string
+    {
+        $startAt = time();
+        $response = $this->openAIService
+            ->setApiUrl($this->model['url'])
+            ->setModel($this->model['model'])
+            ->setApiKey($this->model['key'])
+            ->setSystemPrompt($systemPrompt)
+            ->setTemperature(0.0)
+            ->setThinking($this->thinking)
+            ->setStream($this->stream)
+            ->send($userText);
+        $complete = time() - $startAt;
+
+        $content = $response['choices'][0]['message']['content'] ?? '[]';
+        Log::debug("PaliTranslate: complete in {$complete}s", ['content' => $content]);
+
+        return is_string($content) ? $content : '[]';
+    }
+
+    /**
+     * 将数组包裹为 ```json ... ``` 代码块
+     *
+     * @param  array<int, mixed>  $data
+     */
+    protected function jsonBlock(array $data): string
+    {
+        return "```json\n".json_encode($data, JSON_UNESCAPED_UNICODE)."\n```";
+    }
+}

+ 36 - 14
api-v13/app/Services/OpenAIService.php

@@ -2,76 +2,92 @@
 
 namespace App\Services;
 
+use Illuminate\Http\Client\ConnectionException;
 use Illuminate\Support\Facades\Http;
 use Illuminate\Support\Facades\Log;
-use Illuminate\Http\Client\ConnectionException;
-use Illuminate\Http\Client\RequestException;
 
 class OpenAIService
 {
     protected int $retries = 3;
+
     protected int $delayMs = 2000;
+
     protected string $model = 'gpt-4-1106-preview';
+
     protected string $apiUrl = 'https://api.openai.com/v1/chat/completions';
+
     protected string $apiKey;
+
     protected string $systemPrompt = '你是一个有帮助的助手。';
+
     protected float $temperature = 0.7;
+
     protected bool $stream = false;
+
     protected int $timeout = 600;
+
     protected int $maxTokens = 0;
+
     protected bool $thinking;
 
     public static function withRetry(int $retries = 3, int $delayMs = 2000): static
     {
-        return (new static())->setRetry($retries, $delayMs);
+        return (new static)->setRetry($retries, $delayMs);
     }
 
     public function setRetry(int $retries, int $delayMs): static
     {
         $this->retries = $retries;
         $this->delayMs = $delayMs;
+
         return $this;
     }
 
     public function setModel(string $model): static
     {
         $this->model = $model;
+
         return $this;
     }
 
     /**
      * 设置模型配置
-     *
-     * @param bool $thinking
-     * @return self
      */
-    public function setThinking(bool $thinking): self
+    public function setThinking(?bool $thinking): self
     {
+        if ($thinking === null) {
+            return $this;
+        }
         $this->thinking = $thinking;
+
         return $this;
     }
 
     public function setApiUrl(string $url): static
     {
         $this->apiUrl = $url;
+
         return $this;
     }
 
     public function setApiKey(string $key): static
     {
         $this->apiKey = $key;
+
         return $this;
     }
 
     public function setSystemPrompt(string $prompt): static
     {
         $this->systemPrompt = $prompt;
+
         return $this;
     }
 
     public function setTemperature(float $temperature): static
     {
         $this->temperature = $temperature;
+
         return $this;
     }
 
@@ -90,6 +106,7 @@ class OpenAIService
     public function setMaxToken(int $maxTokens): static
     {
         $this->maxTokens = $maxTokens;
+
         return $this;
     }
 
@@ -113,6 +130,7 @@ class OpenAIService
                 Log::warning("请求被限流(429),等待 {$retryAfter} 秒后重试...(第 {$attempt} 次)");
                 sleep($retryAfter);
                 $lastException = $e;
+
                 continue;
             } catch (ServerErrorException $e) {
                 // 5xx 服务器错误,使用指数退避重试
@@ -121,6 +139,7 @@ class OpenAIService
                     usleep($this->delayMs * 1000 * pow(2, $attempt - 1));
                 }
                 $lastException = $e;
+
                 continue;
             } catch (ConnectionException $e) {
                 // 网络连接错误,使用指数退避重试
@@ -129,6 +148,7 @@ class OpenAIService
                     usleep($this->delayMs * 1000 * pow(2, $attempt - 1));
                 }
                 $lastException = $e;
+
                 continue;
             } catch (NetworkException $e) {
                 // 其他网络错误,使用指数退避重试
@@ -137,6 +157,7 @@ class OpenAIService
                     usleep($this->delayMs * 1000 * pow(2, $attempt - 1));
                 }
                 $lastException = $e;
+
                 continue;
             } catch (ClientErrorException $e) {
                 // 4xx 客户端错误(除429外)不重试,直接抛出
@@ -144,7 +165,7 @@ class OpenAIService
                 throw $e;
             } catch (\Exception $e) {
                 // 其他未知异常,不重试,直接抛出
-                Log::error("GPT 请求异常:" . $e->getMessage());
+                Log::error('GPT 请求异常:'.$e->getMessage());
                 throw $e;
             }
         }
@@ -152,7 +173,7 @@ class OpenAIService
         // 所有重试都失败了
         Log::error("请求多次失败,已重试 {$this->retries} 次");
         throw new \RuntimeException(
-            '请求多次失败或超时,请稍后再试。原因: ' . ($lastException ? $lastException->getMessage() : '未知'),
+            '请求多次失败或超时,请稍后再试。原因: '.($lastException ? $lastException->getMessage() : '未知'),
             504,
             $lastException
         );
@@ -191,7 +212,7 @@ class OpenAIService
 
         // 处理 429 速率限制
         if ($status === 429) {
-            $retryAfter = (int)($response->header('Retry-After') ?? 20);
+            $retryAfter = (int) ($response->header('Retry-After') ?? 20);
             throw new RateLimitException(
                 $body['error']['message'] ?? '请求被限流',
                 $status,
@@ -255,8 +276,8 @@ class OpenAIService
             CURLOPT_POST => true,
             CURLOPT_HTTPHEADER => [
                 "Authorization: Bearer {$this->apiKey}",
-                "Content-Type: application/json",
-                "Accept: text/event-stream",
+                'Content-Type: application/json',
+                'Accept: text/event-stream',
             ],
             CURLOPT_POSTFIELDS => json_encode($payload),
             CURLOPT_RETURNTRANSFER => false,
@@ -269,7 +290,7 @@ class OpenAIService
                 foreach ($lines as $line) {
                     $line = trim($line);
 
-                    if (!str_starts_with($line, 'data: ')) {
+                    if (! str_starts_with($line, 'data: ')) {
                         continue;
                     }
 
@@ -280,13 +301,14 @@ class OpenAIService
                     }
 
                     $obj = json_decode($json, true);
-                    if (!is_array($obj)) {
+                    if (! is_array($obj)) {
                         continue;
                     }
 
                     // 检查是否有错误
                     if (isset($obj['error'])) {
                         $errorMessage = $obj['error']['message'] ?? 'Stream error';
+
                         return 0; // 停止接收
                     }
 

+ 2 - 2
api-v13/documents/ai-test.md

@@ -5,11 +5,11 @@
 | 1      | 三藏全文搜索      | ✅       | ✅       | ✅     | ✅     | ✅   |
 | 2      | 百科全文搜索      | ✅       |          |        |        |      |
 | 3      | 注疏穿插          | ✅       | ✅       | ✅     | ✅     |      |
-| 4      | 汉译 nissaya      | ✅       | ✅       | ✅     |        |      |
+| 4      | 汉译 nissaya      | ✅       | ✅       | ✅     |        |      |
 | 5      | ai 译文(deepseek) | ✅       |          |        |        |      |
 | 6      | ai 译文(claude)   |          |          |        |        |      |
 | 7      | 第三方译文导入    | ✅       |          |        |        |      |
-| 8      | 五大册-AI 汉译    | ✅       |          |        |        |      |
+| 8      | 五大册-AI 汉译    | ✅       |          |        |        |      |
 | 9      | AI 百科           | ✅       |          |        |        |      |
 | 10     | AI wbw            |          |          |        |        |      |