UpgradeAITranslation.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Log;
  5. use App\Services\OpenAIService;
  6. use App\Services\AIModelService;
  7. use App\Services\SentenceService;
  8. use App\Services\SearchPaliDataService;
  9. use App\Services\AIAssistant\NissayaTranslateService;
  10. use App\Services\AuthService;
  11. use App\Http\Resources\AiModelResource;
  12. use App\Models\PaliText;
  13. use App\Models\PaliSentence;
  14. use App\Models\Sentence;
  15. use App\Helpers\LlmResponseParser;
  16. use App\Http\Api\ChannelApi;
  17. use App\Tools\Tools;
  18. class UpgradeAITranslation extends Command
  19. {
  20. /**
  21. * The name and signature of the console command.
  22. * php artisan upgrade:ai.translation translation --book=141 --para=535
  23. * php artisan upgrade:ai.translation nissaya --book=207 --para=1247
  24. * @var string
  25. */
  26. protected $signature = 'upgrade:ai.translation {type} {--book=} {--para=} {--resume} {--model=} ';
  27. /**
  28. * The console command description.
  29. *
  30. * @var string
  31. */
  32. protected $description = 'Command description';
  33. protected $sentenceService;
  34. protected $modelService;
  35. protected $openAIService;
  36. protected $nissayaTranslateService;
  37. protected AiModelResource $model;
  38. protected $modelToken;
  39. protected $workChannel;
  40. protected $accessToken;
  41. /**
  42. * Create a new command instance.
  43. *
  44. * @return void
  45. */
  46. public function __construct(
  47. AIModelService $model,
  48. SentenceService $sent,
  49. OpenAIService $openAI,
  50. NissayaTranslateService $nissayaTranslate
  51. ) {
  52. $this->modelService = $model;
  53. $this->sentenceService = $sent;
  54. $this->openAIService = $openAI;
  55. $this->nissayaTranslateService = $nissayaTranslate;
  56. parent::__construct();
  57. }
  58. /**
  59. * Execute the console command.
  60. *
  61. * @return int
  62. */
  63. public function handle()
  64. {
  65. if ($this->option('model')) {
  66. $this->model = $this->modelService->getModelById($this->option('model'));
  67. $this->info("model:{$this->model['model']}");
  68. $this->modelToken = AuthService::getUserToken($this->model['uid']);
  69. }
  70. $this->workChannel = ChannelApi::getById($this->ask('请输入结果channel'));
  71. // TODO 需要判断输入channel 与翻译类型是否一致 nissaya -> nissaya channel
  72. $books = [];
  73. if ($this->option('book')) {
  74. $books = [$this->option('book')];
  75. } else {
  76. $books = range(1, 217);
  77. }
  78. foreach ($books as $key => $book) {
  79. $maxParagraph = PaliText::where('book', $book)->max('paragraph');
  80. $paragraphs = range(1, $maxParagraph);
  81. if ($this->option('para')) {
  82. $paragraphs = [$this->option('para')];
  83. }
  84. foreach ($paragraphs as $key => $paragraph) {
  85. $this->info($this->argument('type') . " {$book}-{$paragraph}");
  86. $data = [];
  87. switch ($this->argument('type')) {
  88. case 'translation':
  89. $data = $this->aiPaliTranslate($book, $paragraph);
  90. break;
  91. case 'nissaya':
  92. $data = $this->aiNissayaTranslate($book, $paragraph);
  93. break;
  94. case 'wbw':
  95. $data = $this->aiWBW($book, $paragraph);
  96. break;
  97. default:
  98. # code...
  99. break;
  100. }
  101. $this->save($data);
  102. }
  103. }
  104. return 0;
  105. }
  106. private function getPaliContent($book, $para)
  107. {
  108. $sentenceService = app(SearchPaliDataService::class);
  109. $sentences = PaliSentence::where('book', $book)
  110. ->where('paragraph', $para)
  111. ->orderBy('word_begin')
  112. ->get();
  113. if (!$sentences) {
  114. return null;
  115. }
  116. $json = [];
  117. foreach ($sentences as $key => $sentence) {
  118. $content = $sentenceService->getSentenceText($book, $para, $sentence->word_begin, $sentence->word_end);
  119. $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
  120. $json[] = ['id' => $id, 'content' => $content['markdown']];
  121. }
  122. return $json;
  123. }
  124. private function aiPaliTranslate($book, $para)
  125. {
  126. $prompt = <<<md
  127. 你是一个巴利语翻译助手。
  128. pali 是巴利原文的一个段落,json格式, 每条记录是一个句子。包括id 和 content 两个字段
  129. 请翻译这个段落为简体中文。
  130. 翻译要求
  131. 1. 语言风格为现代汉语书面语,不要使用古汉语或者半文半白。
  132. 2. 译文严谨,完全贴合巴利原文,不要加入自己的理解
  133. 3. 巴利原文中的黑体字在译文中也使用黑体。其他标点符号跟随巴利原文,但应该替换为相应的汉字全角符号
  134. 输出格式jsonl
  135. 输出id 和 content 两个字段,
  136. id 使用巴利原文句子的id ,
  137. content 为中文译文
  138. 直接输出jsonl数据,无需解释
  139. **输出范例**
  140. {"id":"1-2-3-4","content":"译文"}
  141. {"id":"2-3-4-5","content":"译文"}
  142. md;
  143. $pali = $this->getPaliContent($book, $para);
  144. $originalText = "```json\n" . json_encode($pali, JSON_UNESCAPED_UNICODE) . "\n```";
  145. Log::debug($originalText);
  146. if (!$this->model) {
  147. Log::error('model is invalid');
  148. return [];
  149. }
  150. $startAt = time();
  151. $response = $this->openAIService->setApiUrl($this->model['url'])
  152. ->setModel($this->model['model'])
  153. ->setApiKey($this->model['key'])
  154. ->setSystemPrompt($prompt)
  155. ->setTemperature(0.0)
  156. ->setStream(false)
  157. ->send("# pali\n\n{$originalText}\n\n");
  158. $complete = time() - $startAt;
  159. $translationText = $response['choices'][0]['message']['content'] ?? '[]';
  160. Log::debug("complete in {$complete}s", $translationText);
  161. $json = [];
  162. if (is_string($translationText)) {
  163. $json = LlmResponseParser::jsonl($translationText);
  164. }
  165. return $json;
  166. }
  167. private function aiWBW($book, $para)
  168. {
  169. $sysPrompt = <<<md
  170. 你是一个佛教翻译专家,精通巴利文和缅文,精通巴利文逐词解析
  171. ## 翻译要求:
  172. - 请将用户提供的巴利句子单词表中的每个巴利文单词翻译为中文
  173. - 这些单词是一个完整的句子,请根据单词的上下文翻译
  174. - original 里面的数据是巴利文单词
  175. - 输入格式为 json 数组
  176. - 输出jsonl格式
  177. 在原来的数据中添加下列输出字段
  178. 1. meaning:单词的中文意思,如果有两个可能的意思,两个意思之间用/符号分隔
  179. 5. confidence:你认为你给出的这个单词的信息的信心指数(准确程度) 数值1-100 如果觉得非常有把握100, 如果觉得把握不大,适当降低信心指数
  180. 6. note:如果你认为信心指数很低,这个是疑难单词,请在note字段写明原因,如果不是疑难单词,请不要填写note
  181. **范例**:
  182. {"id":1,"original":"bhikkhusanghassa","meaning":"比库僧团[的]","confidence":100}
  183. 直接输出jsonl, 无需其他内容
  184. md;
  185. $channelId = ChannelApi::getSysChannel('_System_Wbw_VRI_');
  186. $sentences = Sentence::where('channel_uid', $channelId)
  187. ->where('book_id', $book)
  188. ->where('paragraph', $para)
  189. ->get();
  190. $result = [];
  191. foreach ($sentences as $key => $sentence) {
  192. $wbw = json_decode($sentence->content);
  193. $tpl = [];
  194. foreach ($wbw as $key => $word) {
  195. if (
  196. !empty($word->real->value) &&
  197. $word->type->value !== '.ctl.'
  198. ) {
  199. $tpl[] = [
  200. 'id' => $word->sn[0],
  201. 'original' => $word->real->value,
  202. ];
  203. }
  204. }
  205. $tplText = json_encode($tpl, JSON_UNESCAPED_UNICODE);
  206. Log::debug($tplText);
  207. $startAt = time();
  208. $response = $this->openAIService->setApiUrl($this->model['url'])
  209. ->setModel($this->model['model'])
  210. ->setApiKey($this->model['key'])
  211. ->setSystemPrompt($sysPrompt)
  212. ->setTemperature(0.7)
  213. ->setStream(false)
  214. ->send("```json\n{$tplText}\n```");
  215. $complete = time() - $startAt;
  216. $content = $response['choices'][0]['message']['content'] ?? '[]';
  217. Log::debug("ai response in {$complete}s content=" . $content);
  218. $json = LlmResponseParser::jsonl($content);
  219. $id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
  220. $result[] = [
  221. 'id' => $id,
  222. 'content' => json_encode($json, JSON_UNESCAPED_UNICODE),
  223. ];
  224. }
  225. return $result;
  226. }
  227. private function aiNissayaTranslate($book, $para)
  228. {
  229. $sysPrompt = <<<md
  230. 你是一个佛教翻译专家,精通巴利文和缅文
  231. ## 翻译要求:
  232. - 请将nissaya单词表中的巴利文和缅文分别翻译为中文
  233. - 输入格式为 巴利文:缅文
  234. - 一行是一条记录,翻译的时候,请不要拆分一行中的巴利文单词或缅文单词,一行中出现多个单词的,一起翻译
  235. - 输出csv格式内容,分隔符为"$",
  236. - 字段如下:巴利文\$巴利文的中文译文\$缅文\$缅文的中文译文 #两个译文的语义相似度(%)
  237. **范例**:
  238. pana\$然而\$ဝါဒန္တရကား\$教义之说 #60%
  239. 直接输出csv, 无需其他内容
  240. 用```包裹的行为注释内容,也需要翻译和解释。放在最后面。如果没有```,无需处理
  241. md;
  242. $sentences = Sentence::nissaya()
  243. ->language('my') // 过滤缅文
  244. ->where('book_id', $book)
  245. ->where('paragraph', $para)
  246. ->orderBy('strlen')
  247. ->get();
  248. $result = [];
  249. foreach ($sentences as $key => $sentence) {
  250. $id = "{$sentence->book_id}-{$sentence->paragraph}-{$sentence->word_start}-{$sentence->word_end}";
  251. /*
  252. $nissaya = [];
  253. $rows = explode("\n", $sentence->content);
  254. foreach ($rows as $key => $row) {
  255. if (strpos('=', $row) >= 0) {
  256. $factors = explode("=", $row);
  257. $nissaya[] = Tools::MyToRm($factors[0]) . ':' . end($factors);
  258. } else {
  259. $nissaya[] = $row;
  260. }
  261. }
  262. $nissayaText = json_encode(implode("\n", $nissaya), JSON_UNESCAPED_UNICODE);
  263. Log::debug($nissayaText);
  264. $startAt = time();
  265. $response = $this->openAIService->setApiUrl($this->model['url'])
  266. ->setModel($this->model['model'])
  267. ->setApiKey($this->model['key'])
  268. ->setSystemPrompt($sysPrompt)
  269. ->setTemperature(0.7)
  270. ->setStream(false)
  271. ->send("# nissaya\n\n{$nissayaText}\n\n");
  272. $complete = time() - $startAt;
  273. $content = $response['choices'][0]['message']['content'] ?? '';
  274. Log::debug("ai response in {$complete}s content=" . $content);
  275. */
  276. $aiNissaya = $this->nissayaTranslateService
  277. ->setModel($this->model)
  278. ->translate($sentence->content, false);
  279. Log::debug("ai response ", ['content' => $aiNissaya['data']]);
  280. $result[] = [
  281. 'id' => $id,
  282. 'content' => json_encode($aiNissaya['data'] ?? [], JSON_UNESCAPED_UNICODE),
  283. 'content_type' => 'json'
  284. ];
  285. }
  286. return $result;
  287. }
  288. private function save($data)
  289. {
  290. //写入句子库
  291. $sentData = [];
  292. $sentData = array_map(function ($n) {
  293. $sId = explode('-', $n['id']);
  294. return [
  295. 'book_id' => $sId[0],
  296. 'paragraph' => $sId[1],
  297. 'word_start' => $sId[2],
  298. 'word_end' => $sId[3],
  299. 'channel_uid' => $this->workChannel['id'],
  300. 'content' => $n['content'],
  301. 'content_type' => $n['content_type'] ?? 'markdown',
  302. 'lang' => $this->workChannel['lang'],
  303. 'status' => $this->workChannel['status'],
  304. 'editor_uid' => $this->model['uid'],
  305. ];
  306. }, $data);
  307. foreach ($sentData as $key => $value) {
  308. $this->sentenceService->save($value);
  309. }
  310. }
  311. }