ArticleTranslateService.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. <?php
  2. namespace App\Services\AIAssistant;
  3. use App\Http\Api\ChannelApi;
  4. use App\Models\CustomBook;
  5. use App\Services\ArticleService;
  6. use App\Services\AuthService;
  7. use App\Services\PaliContentService;
  8. use App\Services\SentenceService;
  9. use Illuminate\Support\Facades\Log;
  10. class ArticleTranslateService
  11. {
  12. protected string $modelId;
  13. protected string $modelToken;
  14. protected array $translation = [];
  15. protected string $outputChannelId;
  16. protected string $currArticleId;
  17. protected bool $thinking;
  18. protected string $systemPrompt = <<<'PROMPT'
  19. 请根据提供的原文,翻译为简体中文。
  20. 原文为逐句数据,翻译时请依照句子的上下文翻译。
  21. id:句子编号
  22. content:内容
  23. # 翻译要求:
  24. 1. 缅文巴利要给出罗马巴利转写
  25. 2. 使用现代汉语
  26. 3. 逐句翻译
  27. # 输出格式要求:
  28. - jsonl 格式
  29. - 每条记录是一个句子
  30. - 每个句子只输出两个字段
  31. 1. id(句子编号)
  32. 2. content(译文)
  33. - 无需输出原文
  34. - 只输出jsonl格式的译文 无需出处额外的解释
  35. # 输出范例
  36. ```jsonl
  37. {"id":"1-2-3-4","content":"译文"}
  38. {"id":"2-3-4-5","content":"译文"}
  39. ```
  40. PROMPT;
  41. public function __construct(
  42. protected ArticleService $articleService,
  43. protected PaliContentService $paliContentService,
  44. protected TranslateService $translateService,
  45. protected SentenceService $sentenceService
  46. ) {}
  47. /**
  48. * 设置模型配置
  49. */
  50. public function setModel(string $model): self
  51. {
  52. $this->modelId = $model;
  53. $this->modelToken = app(AuthService::class)->getUserToken($model);
  54. return $this;
  55. }
  56. /**
  57. * 设置模型配置
  58. */
  59. public function setThinking(bool $thinking): self
  60. {
  61. $this->thinking = $thinking;
  62. return $this;
  63. }
  64. /**
  65. * 设置模型配置
  66. *
  67. * @param string $model
  68. */
  69. public function setChannel(string $id): self
  70. {
  71. $this->outputChannelId = $id;
  72. return $this;
  73. }
  74. public function getCurrArticleId()
  75. {
  76. return $this->currArticleId;
  77. }
  78. public function translateAnthology(string $anthologyId, ?callable $onEach = null): int
  79. {
  80. $articleIds = $this->articleService->articlesInAnthology($anthologyId);
  81. foreach ($articleIds as $article) {
  82. $this->translateArticle($article);
  83. if ($onEach) {
  84. $onEach($this);
  85. }
  86. }
  87. return count($articleIds);
  88. }
  89. public function translateArticle(string $articleId)
  90. {
  91. $this->currArticleId = $articleId;
  92. // 获取文章中的句子id
  93. $sentenceIds = $this->articleService->sentenceIds($articleId);
  94. if (! $sentenceIds || count($sentenceIds) === 0) {
  95. $this->translation = [];
  96. return $this;
  97. }
  98. $bookId = (int) explode('-', $sentenceIds[0])[0];
  99. // 提取原文
  100. $originalChannelId = CustomBook::where('book_id', $bookId)->value('channel_id');
  101. $original = $this->paliContentService->sentences($sentenceIds, [$originalChannelId], 'read');
  102. $orgData = [];
  103. foreach ($original as $key => $paragraph) {
  104. foreach ($paragraph['children'] as $key => $sent) {
  105. $org = $sent['origin'][0];
  106. $orgData[] = [
  107. 'id' => "{$org['book']}-{$org['para']}-{$org['wordStart']}-{$org['wordEnd']}",
  108. 'content' => ! empty($org['content']) ? $org['content'] : $org['html'],
  109. ];
  110. }
  111. }
  112. // 翻译
  113. $result = $this->translateService->setModel($this->modelId)
  114. ->setSystemPrompt($this->systemPrompt)
  115. ->setTranslatePrompt("# 原文\n\n".
  116. "```json\n".
  117. json_encode($orgData, JSON_UNESCAPED_UNICODE).
  118. "\n```")
  119. ->translate();
  120. Log::debug('ai translation', ['data' => $result->toArray()['data']]);
  121. $this->translation = $result->toArray()['data'];
  122. return $this;
  123. }
  124. // 写入结果channel
  125. public function save()
  126. {
  127. if (
  128. ! is_array($this->translation) ||
  129. count($this->translation) === 0
  130. ) {
  131. return 0;
  132. }
  133. $channelInfo = ChannelApi::getById($this->outputChannelId);
  134. $sentData = [];
  135. $sentData = array_map(function ($n) use ($channelInfo) {
  136. $sId = explode('-', $n['id']);
  137. return [
  138. 'book_id' => $sId[0],
  139. 'paragraph' => $sId[1],
  140. 'word_start' => $sId[2],
  141. 'word_end' => $sId[3],
  142. 'channel_uid' => $channelInfo['id'],
  143. 'content' => $n['content'],
  144. 'content_type' => $n['content_type'] ?? 'markdown',
  145. 'lang' => $channelInfo['lang'],
  146. 'status' => $channelInfo['status'],
  147. 'editor_uid' => $this->modelId,
  148. ];
  149. }, $this->translation);
  150. foreach ($sentData as $value) {
  151. $this->sentenceService->save($value);
  152. }
  153. return count($sentData);
  154. }
  155. public function saveRpc(string $endpoint, string $accessToken)
  156. {
  157. if (
  158. ! is_array($this->translation) ||
  159. count($this->translation) === 0
  160. ) {
  161. return 0;
  162. }
  163. $channelInfo = ChannelApi::getById($this->outputChannelId);
  164. $sentData = [];
  165. $sentData = array_map(function ($n) use ($channelInfo, $accessToken) {
  166. $sId = explode('-', $n['id']);
  167. return [
  168. 'book_id' => $sId[0],
  169. 'paragraph' => $sId[1],
  170. 'word_start' => $sId[2],
  171. 'word_end' => $sId[3],
  172. 'channel_uid' => $channelInfo['id'],
  173. 'content' => $n['content'],
  174. 'content_type' => $n['content_type'] ?? 'markdown',
  175. 'access_token' => $accessToken,
  176. ];
  177. }, $this->translation);
  178. foreach ($sentData as $value) {
  179. $this->sentenceService->saveRpc($endpoint, $value, $this->modelToken);
  180. }
  181. return count($sentData);
  182. }
  183. public function get()
  184. {
  185. return $this->translation;
  186. }
  187. }