|
|
@@ -0,0 +1,388 @@
|
|
|
+<?php
|
|
|
+
|
|
|
+namespace App\Services\AIAssistant;
|
|
|
+
|
|
|
+use App\Helpers\LlmResponseParser;
|
|
|
+use App\Http\Resources\AiModelResource;
|
|
|
+use App\Models\PaliSentence;
|
|
|
+use App\Models\Sentence;
|
|
|
+use App\Services\OpenAIService;
|
|
|
+use App\Services\SearchPaliDataService;
|
|
|
+use Illuminate\Support\Facades\Log;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 巴利原文 -> 简体中文 的多步骤翻译工作流。
|
|
|
+ *
|
|
|
+ * 支持三个步骤,可单独运行或按顺序串联:
|
|
|
+ * - translate:根据巴利原文产出译文
|
|
|
+ * - review:对已有译文打分并给出问题清单(不修改译文)
|
|
|
+ * - revise:根据 review 的问题清单产出改进后的译文
|
|
|
+ *
|
|
|
+ * 单独运行 review / revise 时,已有译文从输出 channel 读取。
|
|
|
+ */
|
|
|
+class PaliTranslateService
|
|
|
+{
|
|
|
+ /**
|
|
|
+ * 可用的工作流步骤
|
|
|
+ */
|
|
|
+ public const STEPS = ['translate', 'review', 'revise'];
|
|
|
+
|
|
|
+ protected AiModelResource $model;
|
|
|
+
|
|
|
+ protected ?bool $thinking = null;
|
|
|
+
|
|
|
+ protected bool $stream = false;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 输出 channel(用于单独运行 review / revise 时读取已有译文)
|
|
|
+ *
|
|
|
+ * @var array<string, mixed>
|
|
|
+ */
|
|
|
+ protected array $workChannel = [];
|
|
|
+
|
|
|
+ /**
|
|
|
+ * translate 步骤的提示词
|
|
|
+ */
|
|
|
+ protected string $translatePrompt = <<<'md'
|
|
|
+ 你是一个巴利语翻译助手。
|
|
|
+ pali 是巴利原文的一个段落,json格式, 每条记录是一个句子。包括id 和 content 两个字段
|
|
|
+ 请翻译这个段落为简体中文。
|
|
|
+
|
|
|
+ 翻译要求
|
|
|
+ 1. 语言风格为现代汉语书面语,不要使用古汉语或者半文半白。
|
|
|
+ 2. 译文严谨,完全贴合巴利原文,不要加入自己的理解
|
|
|
+ 3. 经名、人名、地名等专有名词:有约定俗成的标准译名时优先使用标准译名;没有标准译名的,尽量按词义意译;意译确有困难的再使用音译。同一专有名词在全文中译名须前后一致
|
|
|
+ 4. 巴利原文中的黑体字在译文中也使用黑体。其他标点符号跟随巴利原文,但应该替换为相应的汉字全角符号
|
|
|
+
|
|
|
+ 输出格式jsonl
|
|
|
+ 输出id 和 content 两个字段,
|
|
|
+ id 使用巴利原文句子的id ,
|
|
|
+ content 为中文译文
|
|
|
+
|
|
|
+ 直接输出jsonl数据,无需解释
|
|
|
+
|
|
|
+
|
|
|
+ **输出范例**
|
|
|
+ {"id":"1-2-3-4","content":"译文"}
|
|
|
+ {"id":"2-3-4-5","content":"译文"}
|
|
|
+ md;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * review 步骤的提示词:对已有译文打分并指出问题,不修改译文。
|
|
|
+ */
|
|
|
+ protected string $reviewPrompt = <<<'md'
|
|
|
+ 你是一个资深的巴利语翻译审校专家。
|
|
|
+ 用户会提供巴利原文(pali)以及一份待审校的简体中文译文(translation),两者均为 json,通过 id 一一对应。
|
|
|
+
|
|
|
+ 请逐句审校译文,但**不要修改译文**,只输出审校意见。
|
|
|
+ 审校维度:
|
|
|
+ 1. 准确性:译文是否完全贴合巴利原文,有无漏译、增译、误译
|
|
|
+ 2. 专有名词:人名、地名、经名等专有名词的译名是否正确、是否使用约定俗成的标准译名,有无与读音相近的其他专名混淆(如把 Aṭṭhakanāgara“八城”误作 Āṭānāṭiya“阿吒曩胝”),同一专名在段落内译名是否前后一致
|
|
|
+ 3. 语言:是否为规范的现代汉语书面语,有无古汉语或半文半白
|
|
|
+ 4. 格式:黑体、全角标点是否符合要求
|
|
|
+
|
|
|
+ 输出格式jsonl,每条记录对应一个句子,包含三个字段:
|
|
|
+ id:与原文相同的句子id
|
|
|
+ score:译文质量评分,整数 0-100
|
|
|
+ issues:问题清单,简明中文描述;若没有问题则输出空字符串
|
|
|
+
|
|
|
+ 直接输出jsonl数据,无需解释
|
|
|
+
|
|
|
+ **输出范例**
|
|
|
+ {"id":"1-2-3-4","score":85,"issues":"漏译了 bhagavā;标点未使用全角"}
|
|
|
+ {"id":"2-3-4-5","score":100,"issues":""}
|
|
|
+ md;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * revise 步骤的提示词:根据审校意见产出改进后的译文。
|
|
|
+ */
|
|
|
+ protected string $revisePrompt = <<<'md'
|
|
|
+ 你是一个巴利语翻译助手。
|
|
|
+ 用户会提供巴利原文(pali)、当前译文(translation)以及审校意见(review),均为 json,通过 id 一一对应。
|
|
|
+
|
|
|
+ 请根据审校意见(review)修订当前译文(translation),产出改进后的译文。
|
|
|
+ 修订要求:
|
|
|
+ 1. 针对 review 中 issues 指出的问题进行修正
|
|
|
+ 2. issues 为空、且 score 较高的句子可保持原译文
|
|
|
+ 3. 语言风格为现代汉语书面语,不要使用古汉语或者半文半白
|
|
|
+ 4. 译文严谨,完全贴合巴利原文,不要加入自己的理解
|
|
|
+ 5. 经名、人名、地名等专有名词:有约定俗成的标准译名时优先使用标准译名;没有标准译名的,尽量按词义意译;意译确有困难的再使用音译。同一专有名词在全文中译名须前后一致
|
|
|
+ 6. 巴利原文中的黑体字在译文中也使用黑体。其他标点符号跟随巴利原文,但应替换为相应的汉字全角符号
|
|
|
+
|
|
|
+ 输出格式jsonl
|
|
|
+ 输出id 和 content 两个字段,
|
|
|
+ id 使用巴利原文句子的id ,
|
|
|
+ content 为修订后的中文译文
|
|
|
+
|
|
|
+ 直接输出jsonl数据,无需解释
|
|
|
+
|
|
|
+ **输出范例**
|
|
|
+ {"id":"1-2-3-4","content":"译文"}
|
|
|
+ {"id":"2-3-4-5","content":"译文"}
|
|
|
+ md;
|
|
|
+
|
|
|
+ public function __construct(
|
|
|
+ protected OpenAIService $openAIService,
|
|
|
+ protected SearchPaliDataService $searchPaliDataService,
|
|
|
+ ) {}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 设置模型配置
|
|
|
+ */
|
|
|
+ public function setModel(AiModelResource $model): self
|
|
|
+ {
|
|
|
+ $this->model = $model;
|
|
|
+
|
|
|
+ return $this;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 设置 deepseek thinking 开关;传入 null 时保持默认(不改动)
|
|
|
+ */
|
|
|
+ public function setThinking(?bool $thinking): self
|
|
|
+ {
|
|
|
+ if ($thinking === null) {
|
|
|
+ return $this;
|
|
|
+ }
|
|
|
+ $this->thinking = $thinking;
|
|
|
+
|
|
|
+ return $this;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 设置是否流式输出
|
|
|
+ */
|
|
|
+ public function setStream(bool $stream): self
|
|
|
+ {
|
|
|
+ $this->stream = $stream;
|
|
|
+
|
|
|
+ return $this;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 设置输出 channel(用于单独运行 review / revise 时读取已有译文)
|
|
|
+ *
|
|
|
+ * @param array<string, mixed> $channel
|
|
|
+ */
|
|
|
+ public function setChannel(array $channel): self
|
|
|
+ {
|
|
|
+ $this->workChannel = $channel;
|
|
|
+
|
|
|
+ return $this;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 设置 translate 步骤的提示词
|
|
|
+ */
|
|
|
+ public function setTranslatePrompt(string $prompt): self
|
|
|
+ {
|
|
|
+ $this->translatePrompt = $prompt;
|
|
|
+
|
|
|
+ return $this;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 设置 review 步骤的提示词
|
|
|
+ */
|
|
|
+ public function setReviewPrompt(string $prompt): self
|
|
|
+ {
|
|
|
+ $this->reviewPrompt = $prompt;
|
|
|
+
|
|
|
+ return $this;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 设置 revise 步骤的提示词
|
|
|
+ */
|
|
|
+ public function setRevisePrompt(string $prompt): self
|
|
|
+ {
|
|
|
+ $this->revisePrompt = $prompt;
|
|
|
+
|
|
|
+ return $this;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 执行多步骤工作流,返回最终译文(list of ['id' => ..., 'content' => ...])。
|
|
|
+ *
|
|
|
+ * @param string[] $steps translate / review / revise 的有序子集
|
|
|
+ * @return array<int, array{id: string, content: string}>
|
|
|
+ */
|
|
|
+ public function run(array $steps, int $book, int $para): array
|
|
|
+ {
|
|
|
+ if (! isset($this->model)) {
|
|
|
+ Log::error('PaliTranslate: model is invalid');
|
|
|
+
|
|
|
+ return [];
|
|
|
+ }
|
|
|
+
|
|
|
+ $pali = $this->getPaliContent($book, $para);
|
|
|
+
|
|
|
+ // 工作流不以 translate 开头时,从输出 channel 读取已有译文作为输入
|
|
|
+ $translation = in_array('translate', $steps, true)
|
|
|
+ ? []
|
|
|
+ : $this->existingTranslation($book, $para);
|
|
|
+
|
|
|
+ $review = [];
|
|
|
+
|
|
|
+ foreach ($steps as $step) {
|
|
|
+ switch ($step) {
|
|
|
+ case 'translate':
|
|
|
+ $translation = $this->translate($pali);
|
|
|
+ break;
|
|
|
+ case 'review':
|
|
|
+ $review = $this->review($pali, $translation);
|
|
|
+ Log::debug('PaliTranslate: review 完成', ['review' => $review]);
|
|
|
+ break;
|
|
|
+ case 'revise':
|
|
|
+ $translation = $this->revise($pali, $translation, $review);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 只有产出译文的步骤(translate / revise)才返回可写库的数据;
|
|
|
+ // 仅 review 时 review 报告已写入日志,无需重新保存原译文
|
|
|
+ $producesTranslation = (bool) array_intersect($steps, ['translate', 'revise']);
|
|
|
+
|
|
|
+ return $producesTranslation ? $translation : [];
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 提取段落的巴利原文,按句子返回 ['id' => ..., 'content' => ...]
|
|
|
+ *
|
|
|
+ * @return array<int, array{id: string, content: string}>
|
|
|
+ */
|
|
|
+ public function getPaliContent(int $book, int $para): array
|
|
|
+ {
|
|
|
+ $sentences = PaliSentence::where('book', $book)
|
|
|
+ ->where('paragraph', $para)
|
|
|
+ ->orderBy('word_begin')
|
|
|
+ ->get();
|
|
|
+
|
|
|
+ $json = [];
|
|
|
+ foreach ($sentences as $sentence) {
|
|
|
+ $content = $this->searchPaliDataService->getSentenceContent($book, $para, $sentence->word_begin, $sentence->word_end);
|
|
|
+ $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
|
|
|
+ $json[] = ['id' => $id, 'content' => $content['markdown']];
|
|
|
+ }
|
|
|
+
|
|
|
+ return $json;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * translate 步骤:根据巴利原文产出译文
|
|
|
+ *
|
|
|
+ * @param array<int, array{id: string, content: string}> $pali
|
|
|
+ * @return array<int, array{id: string, content: string}>
|
|
|
+ */
|
|
|
+ public function translate(array $pali): array
|
|
|
+ {
|
|
|
+ $originalText = $this->jsonBlock($pali);
|
|
|
+ Log::debug('PaliTranslate: translate', ['pali' => $originalText]);
|
|
|
+
|
|
|
+ $content = $this->send($this->translatePrompt, "# pali\n\n{$originalText}\n\n");
|
|
|
+
|
|
|
+ return LlmResponseParser::jsonl($content);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * review 步骤:对已有译文打分并给出问题清单(不修改译文)
|
|
|
+ *
|
|
|
+ * @param array<int, array{id: string, content: string}> $pali
|
|
|
+ * @param array<int, array{id: string, content: string}> $translation
|
|
|
+ * @return array<int, array{id: string, score: int, issues: string}>
|
|
|
+ */
|
|
|
+ public function review(array $pali, array $translation): array
|
|
|
+ {
|
|
|
+ $userText = "# pali\n\n".$this->jsonBlock($pali)."\n\n"
|
|
|
+ ."# translation\n\n".$this->jsonBlock($translation)."\n\n";
|
|
|
+ Log::debug('PaliTranslate: review', ['input' => $userText]);
|
|
|
+
|
|
|
+ $content = $this->send($this->reviewPrompt, $userText);
|
|
|
+ Log::debug('PaliTranslate: review', ['output' => $content]);
|
|
|
+
|
|
|
+ return LlmResponseParser::jsonl($content);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * revise 步骤:根据审校意见产出改进后的译文
|
|
|
+ *
|
|
|
+ * @param array<int, array{id: string, content: string}> $pali
|
|
|
+ * @param array<int, array{id: string, content: string}> $translation
|
|
|
+ * @param array<int, array{id: string, score: int, issues: string}> $review
|
|
|
+ * @return array<int, array{id: string, content: string}>
|
|
|
+ */
|
|
|
+ public function revise(array $pali, array $translation, array $review): array
|
|
|
+ {
|
|
|
+ $userText = "# pali\n\n".$this->jsonBlock($pali)."\n\n"
|
|
|
+ ."# translation\n\n".$this->jsonBlock($translation)."\n\n"
|
|
|
+ ."# review\n\n".$this->jsonBlock($review)."\n\n";
|
|
|
+ Log::debug('PaliTranslate: revise', ['input' => $userText]);
|
|
|
+
|
|
|
+ $content = $this->send($this->revisePrompt, $userText);
|
|
|
+ Log::debug('PaliTranslate: revise', ['output' => $content]);
|
|
|
+
|
|
|
+ return LlmResponseParser::jsonl($content);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 从输出 channel 读取已有译文,按句子返回 ['id' => ..., 'content' => ...]
|
|
|
+ *
|
|
|
+ * @return array<int, array{id: string, content: string}>
|
|
|
+ */
|
|
|
+ protected function existingTranslation(int $book, int $para): array
|
|
|
+ {
|
|
|
+ $channelId = $this->workChannel['id'] ?? null;
|
|
|
+ if (! $channelId) {
|
|
|
+ Log::warning('PaliTranslate: 未设置输出 channel,无法读取已有译文');
|
|
|
+
|
|
|
+ return [];
|
|
|
+ }
|
|
|
+
|
|
|
+ $sentences = Sentence::where('channel_uid', $channelId)
|
|
|
+ ->where('book_id', $book)
|
|
|
+ ->where('paragraph', $para)
|
|
|
+ ->orderBy('word_start')
|
|
|
+ ->get();
|
|
|
+
|
|
|
+ $result = [];
|
|
|
+ foreach ($sentences as $sentence) {
|
|
|
+ $id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
|
|
|
+ $result[] = ['id' => $id, 'content' => $sentence->content];
|
|
|
+ }
|
|
|
+
|
|
|
+ return $result;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 调用 LLM,返回响应文本
|
|
|
+ */
|
|
|
+ protected function send(string $systemPrompt, string $userText): string
|
|
|
+ {
|
|
|
+ $startAt = time();
|
|
|
+ $response = $this->openAIService
|
|
|
+ ->setApiUrl($this->model['url'])
|
|
|
+ ->setModel($this->model['model'])
|
|
|
+ ->setApiKey($this->model['key'])
|
|
|
+ ->setSystemPrompt($systemPrompt)
|
|
|
+ ->setTemperature(0.0)
|
|
|
+ ->setThinking($this->thinking)
|
|
|
+ ->setStream($this->stream)
|
|
|
+ ->send($userText);
|
|
|
+ $complete = time() - $startAt;
|
|
|
+
|
|
|
+ $content = $response['choices'][0]['message']['content'] ?? '[]';
|
|
|
+ Log::debug("PaliTranslate: complete in {$complete}s", ['content' => $content]);
|
|
|
+
|
|
|
+ return is_string($content) ? $content : '[]';
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 将数组包裹为 ```json ... ``` 代码块
|
|
|
+ *
|
|
|
+ * @param array<int, mixed> $data
|
|
|
+ */
|
|
|
+ protected function jsonBlock(array $data): string
|
|
|
+ {
|
|
|
+ return "```json\n".json_encode($data, JSON_UNESCAPED_UNICODE)."\n```";
|
|
|
+ }
|
|
|
+}
|