|
@@ -5,16 +5,14 @@ namespace App\Console\Commands;
|
|
|
use App\Helpers\LlmResponseParser;
|
|
use App\Helpers\LlmResponseParser;
|
|
|
use App\Http\Api\ChannelApi;
|
|
use App\Http\Api\ChannelApi;
|
|
|
use App\Http\Resources\AiModelResource;
|
|
use App\Http\Resources\AiModelResource;
|
|
|
-use App\Models\PaliSentence;
|
|
|
|
|
use App\Models\PaliText;
|
|
use App\Models\PaliText;
|
|
|
use App\Models\Sentence;
|
|
use App\Models\Sentence;
|
|
|
use App\Services\AIAssistant\NissayaTranslateService;
|
|
use App\Services\AIAssistant\NissayaTranslateService;
|
|
|
|
|
+use App\Services\AIAssistant\PaliTranslateService;
|
|
|
use App\Services\AIModelService;
|
|
use App\Services\AIModelService;
|
|
|
use App\Services\AuthService;
|
|
use App\Services\AuthService;
|
|
|
use App\Services\OpenAIService;
|
|
use App\Services\OpenAIService;
|
|
|
-use App\Services\SearchPaliDataService;
|
|
|
|
|
use App\Services\SentenceService;
|
|
use App\Services\SentenceService;
|
|
|
-use App\Tools\Tools;
|
|
|
|
|
use Illuminate\Console\Command;
|
|
use Illuminate\Console\Command;
|
|
|
use Illuminate\Support\Facades\Cache;
|
|
use Illuminate\Support\Facades\Cache;
|
|
|
use Illuminate\Support\Facades\Log;
|
|
use Illuminate\Support\Facades\Log;
|
|
@@ -23,7 +21,7 @@ class UpgradeAITranslation extends Command
|
|
|
{
|
|
{
|
|
|
/**
|
|
/**
|
|
|
* The name and signature of the console command.
|
|
* The name and signature of the console command.
|
|
|
- * php artisan upgrade:ai.translation translation --book=141 --para=535
|
|
|
|
|
|
|
+ * php artisan upgrade:ai.translation translation --book=131 --para=27
|
|
|
* php artisan upgrade:ai.translation nissaya --book=207 --para=1247
|
|
* php artisan upgrade:ai.translation nissaya --book=207 --para=1247
|
|
|
*
|
|
*
|
|
|
* @var string
|
|
* @var string
|
|
@@ -36,6 +34,7 @@ class UpgradeAITranslation extends Command
|
|
|
{--resume}
|
|
{--resume}
|
|
|
{--model=}
|
|
{--model=}
|
|
|
{--thinking= : 开启和关闭deepseek thinking true | false}
|
|
{--thinking= : 开启和关闭deepseek thinking true | false}
|
|
|
|
|
+ {--steps=translate : translation 工作流步骤,逗号分隔,可选 translate,review,revise}
|
|
|
{--fresh : 清除缓存断点,从头开始}';
|
|
{--fresh : 清除缓存断点,从头开始}';
|
|
|
|
|
|
|
|
// 缓存键前缀:以 type、channel 区分,记录已完成的 "book|para" 集合,中断后重跑自动跳过
|
|
// 缓存键前缀:以 type、channel 区分,记录已完成的 "book|para" 集合,中断后重跑自动跳过
|
|
@@ -67,7 +66,8 @@ class UpgradeAITranslation extends Command
|
|
|
protected AIModelService $modelService,
|
|
protected AIModelService $modelService,
|
|
|
protected SentenceService $sentenceService,
|
|
protected SentenceService $sentenceService,
|
|
|
protected OpenAIService $openAIService,
|
|
protected OpenAIService $openAIService,
|
|
|
- protected NissayaTranslateService $nissayaTranslateService
|
|
|
|
|
|
|
+ protected NissayaTranslateService $nissayaTranslateService,
|
|
|
|
|
+ protected PaliTranslateService $paliTranslateService
|
|
|
) {
|
|
) {
|
|
|
parent::__construct();
|
|
parent::__construct();
|
|
|
}
|
|
}
|
|
@@ -82,33 +82,43 @@ class UpgradeAITranslation extends Command
|
|
|
/**
|
|
/**
|
|
|
* model
|
|
* model
|
|
|
*/
|
|
*/
|
|
|
- if (!$this->option('model')) {
|
|
|
|
|
|
|
+ if (! $this->option('model')) {
|
|
|
$this->error('model is request');
|
|
$this->error('model is request');
|
|
|
|
|
+
|
|
|
return 1;
|
|
return 1;
|
|
|
}
|
|
}
|
|
|
$this->model = $this->modelService->getModelById($this->option('model'));
|
|
$this->model = $this->modelService->getModelById($this->option('model'));
|
|
|
$this->info("model:{$this->model['model']}");
|
|
$this->info("model:{$this->model['model']}");
|
|
|
$this->modelToken = AuthService::getUserToken($this->model['uid']);
|
|
$this->modelToken = AuthService::getUserToken($this->model['uid']);
|
|
|
|
|
|
|
|
- //channel
|
|
|
|
|
|
|
+ // channel
|
|
|
$this->workChannel = ChannelApi::getById($this->argument('channel'));
|
|
$this->workChannel = ChannelApi::getById($this->argument('channel'));
|
|
|
// 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
|
|
// 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
|
|
|
if ($this->workChannel['type'] !== $this->argument('type')) {
|
|
if ($this->workChannel['type'] !== $this->argument('type')) {
|
|
|
- $this->error('channel type not match request ' . $this->argument('type') . ' input is ' . $this->workChannel['type']);
|
|
|
|
|
|
|
+ $this->error('channel type not match request '.$this->argument('type').' input is '.$this->workChannel['type']);
|
|
|
|
|
|
|
|
return 1;
|
|
return 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if ($this->option('thinking')) {
|
|
if ($this->option('thinking')) {
|
|
|
$this->thinking = $this->option('thinking') === 'true';
|
|
$this->thinking = $this->option('thinking') === 'true';
|
|
|
- $this->line('thinking is ' . $this->option('thinking'));
|
|
|
|
|
|
|
+ $this->line('thinking is '.$this->option('thinking'));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // translation 工作流步骤校验
|
|
|
|
|
+ $steps = array_values(array_filter(array_map('trim', explode(',', (string) $this->option('steps')))));
|
|
|
|
|
+ $invalid = array_diff($steps, PaliTranslateService::STEPS);
|
|
|
|
|
+ if (! empty($invalid)) {
|
|
|
|
|
+ $this->error('invalid steps: '.implode(',', $invalid).'. allowed: '.implode(',', PaliTranslateService::STEPS));
|
|
|
|
|
+
|
|
|
|
|
+ return 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
$type = $this->argument('type');
|
|
$type = $this->argument('type');
|
|
|
$channelId = $this->workChannel['id'] ?? '';
|
|
$channelId = $this->workChannel['id'] ?? '';
|
|
|
|
|
|
|
|
// 缓存键:按 type、channel 区分不同任务的断点
|
|
// 缓存键:按 type、channel 区分不同任务的断点
|
|
|
- $cacheKey = self::CACHE_KEY_PREFIX . ':' . $type . ':' . $channelId;
|
|
|
|
|
|
|
+ $cacheKey = self::CACHE_KEY_PREFIX.':'.$type.':'.$channelId;
|
|
|
|
|
|
|
|
if ($this->option('fresh')) {
|
|
if ($this->option('fresh')) {
|
|
|
Cache::forget($cacheKey);
|
|
Cache::forget($cacheKey);
|
|
@@ -128,7 +138,7 @@ class UpgradeAITranslation extends Command
|
|
|
// 未指定 book 时,若已有断点缓存,从上次处理到的 book 继续,无需从 1 开始
|
|
// 未指定 book 时,若已有断点缓存,从上次处理到的 book 继续,无需从 1 开始
|
|
|
$startBook = 1;
|
|
$startBook = 1;
|
|
|
if (! empty($done)) {
|
|
if (! empty($done)) {
|
|
|
- $doneBooks = array_map(fn($cursor) => (int) explode('|', $cursor)[0], array_keys($done));
|
|
|
|
|
|
|
+ $doneBooks = array_map(fn ($cursor) => (int) explode('|', $cursor)[0], array_keys($done));
|
|
|
$startBook = max($doneBooks);
|
|
$startBook = max($doneBooks);
|
|
|
$this->info("resume from book {$startBook}");
|
|
$this->info("resume from book {$startBook}");
|
|
|
}
|
|
}
|
|
@@ -142,7 +152,7 @@ class UpgradeAITranslation extends Command
|
|
|
}
|
|
}
|
|
|
foreach ($paragraphs as $key => $paragraph) {
|
|
foreach ($paragraphs as $key => $paragraph) {
|
|
|
// 稳定游标:缓存键已含 type、channel,此处仅以 book|para 标识处理单元
|
|
// 稳定游标:缓存键已含 type、channel,此处仅以 book|para 标识处理单元
|
|
|
- $cursor = $book . '|' . $paragraph;
|
|
|
|
|
|
|
+ $cursor = $book.'|'.$paragraph;
|
|
|
if (isset($done[$cursor])) {
|
|
if (isset($done[$cursor])) {
|
|
|
$this->info("skip {$cursor}");
|
|
$this->info("skip {$cursor}");
|
|
|
|
|
|
|
@@ -152,7 +162,11 @@ class UpgradeAITranslation extends Command
|
|
|
$data = [];
|
|
$data = [];
|
|
|
switch ($this->argument('type')) {
|
|
switch ($this->argument('type')) {
|
|
|
case 'translation':
|
|
case 'translation':
|
|
|
- $data = $this->aiPaliTranslate($book, $paragraph);
|
|
|
|
|
|
|
+ $data = $this->paliTranslateService
|
|
|
|
|
+ ->setModel($this->model)
|
|
|
|
|
+ ->setChannel($this->workChannel)
|
|
|
|
|
+ ->setThinking($this->thinking ?? null)
|
|
|
|
|
+ ->run($steps, (int) $book, (int) $paragraph);
|
|
|
break;
|
|
break;
|
|
|
case 'nissaya':
|
|
case 'nissaya':
|
|
|
$data = $this->aiNissayaTranslate($book, $paragraph);
|
|
$data = $this->aiNissayaTranslate($book, $paragraph);
|
|
@@ -166,7 +180,7 @@ class UpgradeAITranslation extends Command
|
|
|
}
|
|
}
|
|
|
$this->save($data);
|
|
$this->save($data);
|
|
|
$time = time() - $start;
|
|
$time = time() - $start;
|
|
|
- $this->info($this->argument('type') . " {$book}-{$paragraph} " . count($data) . ' sentences time=' . $time);
|
|
|
|
|
|
|
+ $this->info($this->argument('type')." {$book}-{$paragraph} ".count($data).' sentences time='.$time);
|
|
|
// 该处理单元全部写库完成后再标记游标,确保中途中断不会误跳过
|
|
// 该处理单元全部写库完成后再标记游标,确保中途中断不会误跳过
|
|
|
$done[$cursor] = true;
|
|
$done[$cursor] = true;
|
|
|
Cache::put($cacheKey, $done, now()->addHours(24));
|
|
Cache::put($cacheKey, $done, now()->addHours(24));
|
|
@@ -181,82 +195,6 @@ class UpgradeAITranslation extends Command
|
|
|
return 0;
|
|
return 0;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- private function getPaliContent($book, $para)
|
|
|
|
|
- {
|
|
|
|
|
- $sentenceService = app(SearchPaliDataService::class);
|
|
|
|
|
- $sentences = PaliSentence::where('book', $book)
|
|
|
|
|
- ->where('paragraph', $para)
|
|
|
|
|
- ->orderBy('word_begin')
|
|
|
|
|
- ->get();
|
|
|
|
|
- if (! $sentences) {
|
|
|
|
|
- return null;
|
|
|
|
|
- }
|
|
|
|
|
- $json = [];
|
|
|
|
|
- foreach ($sentences as $key => $sentence) {
|
|
|
|
|
- $content = $sentenceService->getSentenceContent($book, $para, $sentence->word_begin, $sentence->word_end);
|
|
|
|
|
- $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
|
|
|
|
|
- $json[] = ['id' => $id, 'content' => $content['markdown']];
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return $json;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- private function aiPaliTranslate($book, $para)
|
|
|
|
|
- {
|
|
|
|
|
- $prompt = <<<'md'
|
|
|
|
|
- 你是一个巴利语翻译助手。
|
|
|
|
|
- pali 是巴利原文的一个段落,json格式, 每条记录是一个句子。包括id 和 content 两个字段
|
|
|
|
|
- 请翻译这个段落为简体中文。
|
|
|
|
|
-
|
|
|
|
|
- 翻译要求
|
|
|
|
|
- 1. 语言风格为现代汉语书面语,不要使用古汉语或者半文半白。
|
|
|
|
|
- 2. 译文严谨,完全贴合巴利原文,不要加入自己的理解
|
|
|
|
|
- 3. 巴利原文中的黑体字在译文中也使用黑体。其他标点符号跟随巴利原文,但应该替换为相应的汉字全角符号
|
|
|
|
|
-
|
|
|
|
|
- 输出格式jsonl
|
|
|
|
|
- 输出id 和 content 两个字段,
|
|
|
|
|
- id 使用巴利原文句子的id ,
|
|
|
|
|
- content 为中文译文
|
|
|
|
|
-
|
|
|
|
|
- 直接输出jsonl数据,无需解释
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- **输出范例**
|
|
|
|
|
- {"id":"1-2-3-4","content":"译文"}
|
|
|
|
|
- {"id":"2-3-4-5","content":"译文"}
|
|
|
|
|
- md;
|
|
|
|
|
-
|
|
|
|
|
- $pali = $this->getPaliContent($book, $para);
|
|
|
|
|
- $originalText = "```json\n" . json_encode($pali, JSON_UNESCAPED_UNICODE) . "\n```";
|
|
|
|
|
- Log::debug($originalText);
|
|
|
|
|
- if (! $this->model) {
|
|
|
|
|
- Log::error('model is invalid');
|
|
|
|
|
-
|
|
|
|
|
- return [];
|
|
|
|
|
- }
|
|
|
|
|
- $startAt = time();
|
|
|
|
|
- $llm = $this->openAIService->setApiUrl($this->model['url'])
|
|
|
|
|
- ->setModel($this->model['model'])
|
|
|
|
|
- ->setApiKey($this->model['key'])
|
|
|
|
|
- ->setSystemPrompt($prompt)
|
|
|
|
|
- ->setTemperature(0.0)
|
|
|
|
|
- ->setStream(false);
|
|
|
|
|
- if (isset($this->thinking)) {
|
|
|
|
|
- $llm = $llm->setThinking($this->thinking);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- $response = $llm->send("# pali\n\n{$originalText}\n\n");
|
|
|
|
|
- $complete = time() - $startAt;
|
|
|
|
|
- $translationText = $response['choices'][0]['message']['content'] ?? '[]';
|
|
|
|
|
- Log::debug("complete in {$complete}s", ['content' => $translationText]);
|
|
|
|
|
- $json = [];
|
|
|
|
|
- if (is_string($translationText)) {
|
|
|
|
|
- $json = LlmResponseParser::jsonl($translationText);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return $json;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
private function aiWBW($book, $para)
|
|
private function aiWBW($book, $para)
|
|
|
{
|
|
{
|
|
|
$sysPrompt = <<<'md'
|
|
$sysPrompt = <<<'md'
|
|
@@ -316,7 +254,7 @@ class UpgradeAITranslation extends Command
|
|
|
$response = $llm->send("```json\n{$tplText}\n```");
|
|
$response = $llm->send("```json\n{$tplText}\n```");
|
|
|
$complete = time() - $startAt;
|
|
$complete = time() - $startAt;
|
|
|
$content = $response['choices'][0]['message']['content'] ?? '[]';
|
|
$content = $response['choices'][0]['message']['content'] ?? '[]';
|
|
|
- Log::debug("ai response in {$complete}s content=" . $content);
|
|
|
|
|
|
|
+ Log::debug("ai response in {$complete}s content=".$content);
|
|
|
|
|
|
|
|
$json = LlmResponseParser::jsonl($content);
|
|
$json = LlmResponseParser::jsonl($content);
|
|
|
|
|
|
|
@@ -332,23 +270,6 @@ class UpgradeAITranslation extends Command
|
|
|
|
|
|
|
|
private function aiNissayaTranslate($book, $para)
|
|
private function aiNissayaTranslate($book, $para)
|
|
|
{
|
|
{
|
|
|
- $sysPrompt = <<<'md'
|
|
|
|
|
- 你是一个佛教翻译专家,精通巴利文和缅文
|
|
|
|
|
- ## 翻译要求:
|
|
|
|
|
- - 请将nissaya单词表中的巴利文和缅文分别翻译为中文
|
|
|
|
|
- - 输入格式为 巴利文:缅文
|
|
|
|
|
- - 一行是一条记录,翻译的时候,请不要拆分一行中的巴利文单词或缅文单词,一行中出现多个单词的,一起翻译
|
|
|
|
|
- - 输出csv格式内容,分隔符为"$",
|
|
|
|
|
- - 字段如下:巴利文$巴利文的中文译文$缅文$缅文的中文译文 #两个译文的语义相似度(%)
|
|
|
|
|
-
|
|
|
|
|
- **范例**:
|
|
|
|
|
-
|
|
|
|
|
- pana$然而$ဝါဒန္တရကား$教义之说 #60%
|
|
|
|
|
-
|
|
|
|
|
- 直接输出csv, 无需其他内容
|
|
|
|
|
- 用```包裹的行为注释内容,也需要翻译和解释。放在最后面。如果没有```,无需处理
|
|
|
|
|
- md;
|
|
|
|
|
-
|
|
|
|
|
$sentences = Sentence::nissaya()
|
|
$sentences = Sentence::nissaya()
|
|
|
->language('my') // 过滤缅文
|
|
->language('my') // 过滤缅文
|
|
|
->where('book_id', $book)
|
|
->where('book_id', $book)
|
|
@@ -358,31 +279,6 @@ class UpgradeAITranslation extends Command
|
|
|
$result = [];
|
|
$result = [];
|
|
|
foreach ($sentences as $key => $sentence) {
|
|
foreach ($sentences as $key => $sentence) {
|
|
|
$id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
|
|
$id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
|
|
|
- /*
|
|
|
|
|
- $nissaya = [];
|
|
|
|
|
- $rows = explode("\n", $sentence->content);
|
|
|
|
|
- foreach ($rows as $key => $row) {
|
|
|
|
|
- if (strpos('=', $row) >= 0) {
|
|
|
|
|
- $factors = explode("=", $row);
|
|
|
|
|
- $nissaya[] = Tools::MyToRm($factors[0]) . ':' . end($factors);
|
|
|
|
|
- } else {
|
|
|
|
|
- $nissaya[] = $row;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- $nissayaText = json_encode(implode("\n", $nissaya), JSON_UNESCAPED_UNICODE);
|
|
|
|
|
- Log::debug($nissayaText);
|
|
|
|
|
- $startAt = time();
|
|
|
|
|
- $response = $this->openAIService->setApiUrl($this->model['url'])
|
|
|
|
|
- ->setModel($this->model['model'])
|
|
|
|
|
- ->setApiKey($this->model['key'])
|
|
|
|
|
- ->setSystemPrompt($sysPrompt)
|
|
|
|
|
- ->setTemperature(0.7)
|
|
|
|
|
- ->setStream(false)
|
|
|
|
|
- ->send("# nissaya\n\n{$nissayaText}\n\n");
|
|
|
|
|
- $complete = time() - $startAt;
|
|
|
|
|
- $content = $response['choices'][0]['message']['content'] ?? '';
|
|
|
|
|
- Log::debug("ai response in {$complete}s content=" . $content);
|
|
|
|
|
- */
|
|
|
|
|
$aiNissaya = $this->nissayaTranslateService
|
|
$aiNissaya = $this->nissayaTranslateService
|
|
|
->setModel($this->model)
|
|
->setModel($this->model)
|
|
|
->translate($sentence->content, false);
|
|
->translate($sentence->content, false);
|