UpgradeAITranslation.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Cache;
  5. use Illuminate\Support\Facades\Log;
  6. use App\Helpers\LlmResponseParser;
  7. use App\Http\Api\ChannelApi;
  8. use App\Http\Resources\AiModelResource;
  9. use App\Models\PaliText;
  10. use App\Models\Sentence;
  11. use App\Services\AIAssistant\NissayaTranslateService;
  12. use App\Services\AIAssistant\PaliTranslateService;
  13. use App\Services\AIModelService;
  14. use App\Services\AuthService;
  15. use App\Services\OpenAIService;
  16. use App\Services\SentenceService;
  17. use function PHPUnit\Framework\isEmpty;
  18. class UpgradeAITranslation extends Command
  19. {
  20. /**
  21. * The name and signature of the console command.
  22. * php artisan upgrade:ai.translation translation --book=131 --para=27
  23. * php artisan upgrade:ai.translation nissaya --book=207 --para=1247
  24. *
  25. * nissaya 参考资料用法示例(--nissaya 指定哪些步骤注入 nissaya 逐词缅文释义):
  26. * - 默认(不传) translate/review/evaluate 全部注入 nissaya
  27. * php artisan upgrade:ai.translation translation {channel} --book=131 --para=27 --steps=translate,review,revise,evaluate
  28. * - 仅 review 注入
  29. * php artisan upgrade:ai.translation translation {channel} --book=131 --para=27 --steps=translate,review,evaluate --nissaya=review
  30. * - review + evaluate 注入,translate 不注入
  31. * php artisan upgrade:ai.translation translation {channel} --book=131 --para=27 --steps=translate,review,evaluate --nissaya=review,evaluate
  32. * - 全部不注入
  33. * php artisan upgrade:ai.translation translation {channel} --book=131 --para=27 --steps=translate,review,evaluate --nissaya=
  34. *
  35. * @var string
  36. */
  37. protected $signature = 'upgrade:ai.translation
  38. {type}
  39. {channel}
  40. {--book=}
  41. {--para=}
  42. {--resume}
  43. {--model=}
  44. {--thinking= : 开启和关闭deepseek thinking true | false}
  45. {--steps=translate : translation 工作流步骤,逗号分隔,可选 translate,review,revise,evaluate(evaluate 为质量评估,须放最后)}
  46. {--nissaya=translate,review,evaluate : 启用 nissaya 参考资料的步骤,逗号分隔,可选 translate,review,evaluate;传空字符串则全部不注入}
  47. {--fresh : 清除缓存断点,从头开始}';
  48. // 缓存键前缀:以 type、channel 区分,记录已完成的 "book|para" 集合,中断后重跑自动跳过
  49. private const CACHE_KEY_PREFIX = 'upgrade:ai.translation:done';
  50. /**
  51. * The console command description.
  52. *
  53. * @var string
  54. */
  55. protected $description = 'Command description';
  56. protected AiModelResource $model;
  57. protected string $modelToken;
  58. protected array $workChannel;
  59. protected string $accessToken;
  60. protected bool $thinking;
  61. /**
  62. * Create a new command instance.
  63. *
  64. * @return void
  65. */
  66. public function __construct(
  67. protected AIModelService $modelService,
  68. protected SentenceService $sentenceService,
  69. protected OpenAIService $openAIService,
  70. protected NissayaTranslateService $nissayaTranslateService,
  71. protected PaliTranslateService $paliTranslateService
  72. ) {
  73. parent::__construct();
  74. }
  75. /**
  76. * Execute the console command.
  77. *
  78. * @return int
  79. */
  80. public function handle()
  81. {
  82. /**
  83. * model
  84. */
  85. if (! $this->option('model')) {
  86. $this->error('model is request');
  87. return 1;
  88. }
  89. $this->model = $this->modelService->getModelById($this->option('model'));
  90. if (empty($this->model)) {
  91. $this->error('invalid model id ');
  92. return 1;
  93. }
  94. $this->info("model:{$this->model['model']}");
  95. $this->modelToken = AuthService::getUserToken($this->model['uid']);
  96. // channel
  97. $this->workChannel = ChannelApi::getById($this->argument('channel'));
  98. // 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
  99. if ($this->workChannel['type'] !== $this->argument('type')) {
  100. $this->error('channel type not match request ' . $this->argument('type') . ' input is ' . $this->workChannel['type']);
  101. return 1;
  102. }
  103. if ($this->option('thinking')) {
  104. $this->thinking = $this->option('thinking') === 'true';
  105. $this->line('thinking is ' . $this->option('thinking'));
  106. }
  107. // translation 工作流步骤校验
  108. $steps = array_values(array_filter(array_map('trim', explode(',', (string) $this->option('steps')))));
  109. $invalid = array_diff($steps, PaliTranslateService::STEPS);
  110. if (! empty($invalid)) {
  111. $this->error('invalid steps: ' . implode(',', $invalid) . '. allowed: ' . implode(',', PaliTranslateService::STEPS));
  112. return 1;
  113. }
  114. // nissaya 参考资料注入步骤校验(哪些步骤启用 nissaya)
  115. $nissayaSteps = array_values(array_filter(array_map('trim', explode(',', (string) $this->option('nissaya')))));
  116. $invalidNissaya = array_diff($nissayaSteps, PaliTranslateService::NISSAYA_STEPS);
  117. if (! empty($invalidNissaya)) {
  118. $this->error('invalid nissaya steps: ' . implode(',', $invalidNissaya) . '. allowed: ' . implode(',', PaliTranslateService::NISSAYA_STEPS));
  119. return 1;
  120. }
  121. $type = $this->argument('type');
  122. $channelId = $this->workChannel['id'] ?? '';
  123. // 缓存键:按 type、channel 区分不同任务的断点
  124. $cacheKey = self::CACHE_KEY_PREFIX . ':' . $type . ':' . $channelId;
  125. if ($this->option('fresh')) {
  126. Cache::forget($cacheKey);
  127. $this->info('Cleared cached cursor.');
  128. }
  129. // 是否为完整遍历(未指定 book/para),仅此情形在结束后清空断点缓存
  130. $isFullRun = ! $this->option('book') && ! $this->option('para');
  131. // 从缓存恢复已完成的 (book, para) 集合,作为重入时的稳定游标
  132. $done = Cache::get($cacheKey, []);
  133. $books = [];
  134. if ($this->option('book')) {
  135. $books = [$this->option('book')];
  136. } else {
  137. // 未指定 book 时,若已有断点缓存,从上次处理到的 book 继续,无需从 1 开始
  138. $startBook = 1;
  139. if (! empty($done)) {
  140. $doneBooks = array_map(fn($cursor) => (int) explode('|', $cursor)[0], array_keys($done));
  141. $startBook = max($doneBooks);
  142. $this->info("resume from book {$startBook}");
  143. }
  144. $books = range($startBook, 217);
  145. }
  146. foreach ($books as $book) {
  147. $maxParagraph = PaliText::where('book', $book)->max('paragraph');
  148. $paragraphs = range(1, $maxParagraph);
  149. if ($this->option('para')) {
  150. $paragraphs = [$this->option('para')];
  151. }
  152. foreach ($paragraphs as $paragraph) {
  153. // 稳定游标:缓存键已含 type、channel,此处仅以 book|para 标识处理单元
  154. $cursor = $book . '|' . $paragraph;
  155. if (isset($done[$cursor])) {
  156. $this->info("skip {$cursor}");
  157. continue;
  158. }
  159. $start = time();
  160. $data = [];
  161. switch ($this->argument('type')) {
  162. case 'translation':
  163. $data = $this->paliTranslateService
  164. ->setModel($this->model)
  165. ->setChannel($this->workChannel)
  166. ->setThinking($this->thinking ?? null)
  167. ->setNissayaSteps($nissayaSteps)
  168. ->run($steps, (int) $book, (int) $paragraph);
  169. break;
  170. case 'nissaya':
  171. $data = $this->aiNissayaTranslate($book, $paragraph);
  172. break;
  173. case 'wbw':
  174. $data = $this->aiWBW($book, $paragraph);
  175. break;
  176. default:
  177. // code...
  178. break;
  179. }
  180. $this->save($data);
  181. $time = time() - $start;
  182. $this->info($this->argument('type') . " {$book}-{$paragraph} " . count($data) . ' sentences time=' . $time);
  183. // 该处理单元全部写库完成后再标记游标,确保中途中断不会误跳过
  184. $done[$cursor] = true;
  185. Cache::put($cacheKey, $done, now()->addHours(24));
  186. }
  187. $param = [
  188. '--book' => $book,
  189. '--channel' => $this->workChannel,
  190. ];
  191. if ($this->option('para')) {
  192. $param['--para'] = $this->option('para');
  193. }
  194. $this->call('upgrade:progress.para', $param);
  195. $this->call('upgrade:progress.chapter', $param);
  196. $param = [
  197. 'book' => $book,
  198. '--channel' => $this->workChannel,
  199. '--summary' => 'off',
  200. '--granularity' => 'chapter'
  201. ];
  202. if ($this->option('para')) {
  203. $param['--para'] = $this->option('para');
  204. }
  205. $this->call('opensearch:index-tipitaka', $param);
  206. }
  207. // 完整遍历正常结束,清空断点缓存
  208. if ($isFullRun) {
  209. Cache::forget($cacheKey);
  210. }
  211. return 0;
  212. }
  213. private function aiWBW($book, $para)
  214. {
  215. $sysPrompt = <<<'md'
  216. 你是一个佛教翻译专家,精通巴利文和缅文,精通巴利文逐词解析
  217. ## 翻译要求:
  218. - 请将用户提供的巴利句子单词表中的每个巴利文单词翻译为中文
  219. - 这些单词是一个完整的句子,请根据单词的上下文翻译
  220. - original 里面的数据是巴利文单词
  221. - 输入格式为 json 数组
  222. - 输出jsonl格式
  223. 在原来的数据中添加下列输出字段
  224. 1. meaning:单词的中文意思,如果有两个可能的意思,两个意思之间用/符号分隔
  225. 5. confidence:你认为你给出的这个单词的信息的信心指数(准确程度) 数值1-100 如果觉得非常有把握100, 如果觉得把握不大,适当降低信心指数
  226. 6. note:如果你认为信心指数很低,这个是疑难单词,请在note字段写明原因,如果不是疑难单词,请不要填写note
  227. **范例**:
  228. {"id":1,"original":"bhikkhusanghassa","meaning":"比库僧团[的]","confidence":100}
  229. 直接输出jsonl, 无需其他内容
  230. md;
  231. $channelId = ChannelApi::getSysChannel('_System_Wbw_VRI_');
  232. $sentences = Sentence::where('channel_uid', $channelId)
  233. ->where('book_id', $book)
  234. ->where('paragraph', $para)
  235. ->get();
  236. $result = [];
  237. foreach ($sentences as $key => $sentence) {
  238. $wbw = json_decode($sentence->content);
  239. $tpl = [];
  240. foreach ($wbw as $key => $word) {
  241. if (
  242. ! empty($word->real->value) &&
  243. $word->type->value !== '.ctl.'
  244. ) {
  245. $tpl[] = [
  246. 'id' => $word->sn[0],
  247. 'original' => $word->real->value,
  248. ];
  249. }
  250. }
  251. $tplText = json_encode($tpl, JSON_UNESCAPED_UNICODE);
  252. Log::debug($tplText);
  253. $startAt = time();
  254. $llm = $this->openAIService->setApiUrl($this->model['url'])
  255. ->setModel($this->model['model'])
  256. ->setApiKey($this->model['key'])
  257. ->setSystemPrompt($sysPrompt)
  258. ->setTemperature(0.7)
  259. ->setStream(false);
  260. if (isset($this->thinking)) {
  261. $llm = $llm->setThinking($this->thinking);
  262. }
  263. $response = $llm->send("```json\n{$tplText}\n```");
  264. $complete = time() - $startAt;
  265. $content = $response['choices'][0]['message']['content'] ?? '[]';
  266. Log::debug("ai response in {$complete}s content=" . $content);
  267. $json = LlmResponseParser::jsonl($content);
  268. $id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
  269. $result[] = [
  270. 'id' => $id,
  271. 'content' => json_encode($json, JSON_UNESCAPED_UNICODE),
  272. ];
  273. }
  274. return $result;
  275. }
  276. private function aiNissayaTranslate($book, $para)
  277. {
  278. $sentences = Sentence::nissaya()
  279. ->language('my') // 过滤缅文
  280. ->where('book_id', $book)
  281. ->where('paragraph', $para)
  282. ->orderBy('word_start')
  283. ->get();
  284. $result = [];
  285. foreach ($sentences as $key => $sentence) {
  286. if (! empty($sentence->content)) {
  287. $id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
  288. $aiNissaya = $this->nissayaTranslateService
  289. ->setModel($this->model)
  290. ->translate($sentence->content, false);
  291. Log::debug('ai response ', ['content' => $aiNissaya['data']]);
  292. $result[] = [
  293. 'id' => $id,
  294. 'content' => json_encode($aiNissaya['data'] ?? [], JSON_UNESCAPED_UNICODE),
  295. 'content_type' => 'json',
  296. ];
  297. }
  298. }
  299. return $result;
  300. }
  301. private function save($data)
  302. {
  303. // 写入句子库
  304. $sentData = [];
  305. $sentData = array_map(function ($n) {
  306. $sId = explode('-', $n['id']);
  307. return [
  308. 'book_id' => $sId[0],
  309. 'paragraph' => $sId[1],
  310. 'word_start' => $sId[2],
  311. 'word_end' => $sId[3],
  312. 'channel_uid' => $this->workChannel['id'],
  313. 'content' => $n['content'],
  314. 'content_type' => $n['content_type'] ?? 'markdown',
  315. 'lang' => $this->workChannel['lang'],
  316. 'status' => $this->workChannel['status'],
  317. 'editor_uid' => $this->model['uid'],
  318. ];
  319. }, $data);
  320. foreach ($sentData as $key => $value) {
  321. $this->sentenceService->saveWithHistory($value);
  322. }
  323. }
  324. }