ArticleTranslateService.php 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. <?php
  2. namespace App\Services\AIAssistant;
  3. use App\Services\ArticleService;
  4. use App\Services\PaliContentService;
  5. use App\Services\SentenceService;
  6. use App\Services\AuthService;
  7. use App\Models\CustomBook;
  8. use Illuminate\Support\Facades\Log;
  9. use App\Http\Api\ChannelApi;
  10. class ArticleTranslateService
  11. {
  12. protected ArticleService $articleService;
  13. protected PaliContentService $paliContentService;
  14. protected TranslateService $translateService;
  15. protected SentenceService $sentenceService;
  16. protected string $modelId;
  17. protected string $modelToken;
  18. protected array $translation = [];
  19. protected string $outputChannelId;
  20. protected string $currArticleId;
  21. protected bool $thinking;
  22. protected string $systemPrompt = <<<PROMPT
  23. 请根据提供的原文,翻译为简体中文。
  24. 原文为逐句数据,翻译时请依照句子的上下文翻译。
  25. id:句子编号
  26. content:内容
  27. # 翻译要求:
  28. 1. 缅文巴利要给出罗马巴利转写
  29. 2. 使用现代汉语
  30. 3. 逐句翻译
  31. # 输出格式要求:
  32. - jsonl 格式
  33. - 每条记录是一个句子
  34. - 每个句子只输出两个字段
  35. 1. id(句子编号)
  36. 2. content(译文)
  37. - 无需输出原文
  38. - 只输出jsonl格式的译文 无需出处额外的解释
  39. # 输出范例
  40. ```jsonl
  41. {"id":"1-2-3-4","content":"译文"}
  42. {"id":"2-3-4-5","content":"译文"}
  43. ```
  44. PROMPT;
  45. public function __construct(
  46. ArticleService $article,
  47. PaliContentService $paliContent,
  48. TranslateService $translateService,
  49. SentenceService $sentenceService
  50. ) {
  51. $this->articleService = $article;
  52. $this->paliContentService = $paliContent;
  53. $this->translateService = $translateService;
  54. $this->sentenceService = $sentenceService;
  55. }
  56. /**
  57. * 设置模型配置
  58. *
  59. * @param string $model
  60. * @return self
  61. */
  62. public function setModel(string $model): self
  63. {
  64. $this->modelId = $model;
  65. $this->modelToken = app(AuthService::class)->getUserToken($model);
  66. return $this;
  67. }
  68. /**
  69. * 设置模型配置
  70. *
  71. * @param bool $thinking
  72. * @return self
  73. */
  74. public function setThinking(bool $thinking): self
  75. {
  76. $this->thinking = $thinking;
  77. return $this;
  78. }
  79. /**
  80. * 设置模型配置
  81. *
  82. * @param string $model
  83. * @return self
  84. */
  85. public function setChannel(string $id): self
  86. {
  87. $this->outputChannelId = $id;
  88. return $this;
  89. }
  90. public function getCurrArticleId()
  91. {
  92. return $this->currArticleId;
  93. }
  94. public function translateAnthology(string $anthologyId, ?callable $onEach = null): int
  95. {
  96. $articleIds = $this->articleService->articlesInAnthology($anthologyId);
  97. foreach ($articleIds as $article) {
  98. $this->translateArticle($article);
  99. if ($onEach) {
  100. $onEach($this);
  101. }
  102. }
  103. return count($articleIds);
  104. }
  105. public function translateArticle(string $articleId)
  106. {
  107. $this->currArticleId = $articleId;
  108. //获取文章中的句子id
  109. $sentenceIds = $this->articleService->sentenceIds($articleId);
  110. if (!$sentenceIds || count($sentenceIds) === 0) {
  111. $this->translation = [];
  112. return $this;
  113. }
  114. $bookId = (int)explode('-', $sentenceIds[0])[0];
  115. //提取原文
  116. $originalChannelId = CustomBook::where('book_id', $bookId)->value('channel_id');
  117. $original = $this->paliContentService->sentences($sentenceIds, [$originalChannelId], 'read');
  118. $orgData = [];
  119. foreach ($original as $key => $paragraph) {
  120. foreach ($paragraph['children'] as $key => $sent) {
  121. $org = $sent['origin'][0];
  122. $orgData[] = [
  123. 'id' => "{$org['book']}-{$org['para']}-{$org['wordStart']}-{$org['wordEnd']}",
  124. 'content' => !empty($org['content']) ? $org['content'] : $org['html'],
  125. ];
  126. }
  127. }
  128. //翻译
  129. $result = $this->translateService->setModel($this->modelId)
  130. ->setSystemPrompt($this->systemPrompt)
  131. ->setTranslatePrompt("# 原文\n\n" .
  132. "```json\n" .
  133. json_encode($orgData, JSON_UNESCAPED_UNICODE) .
  134. "\n```")
  135. ->translate();
  136. Log::debug('ai translation', ['data' => $result->toArray()['data']]);
  137. $this->translation = $result->toArray()['data'];
  138. return $this;
  139. }
  140. //写入结果channel
  141. public function save()
  142. {
  143. if (
  144. !is_array($this->translation) ||
  145. count($this->translation) === 0
  146. ) {
  147. return 0;
  148. }
  149. $channelInfo = ChannelApi::getById($this->outputChannelId);
  150. $sentData = [];
  151. $sentData = array_map(function ($n) use ($channelInfo) {
  152. $sId = explode('-', $n['id']);
  153. return [
  154. 'book_id' => $sId[0],
  155. 'paragraph' => $sId[1],
  156. 'word_start' => $sId[2],
  157. 'word_end' => $sId[3],
  158. 'channel_uid' => $channelInfo['id'],
  159. 'content' => $n['content'],
  160. 'content_type' => $n['content_type'] ?? 'markdown',
  161. 'lang' => $channelInfo['lang'],
  162. 'status' => $channelInfo['status'],
  163. 'editor_uid' => $this->modelId,
  164. ];
  165. }, $this->translation);
  166. foreach ($sentData as $value) {
  167. $this->sentenceService->save($value);
  168. }
  169. return count($sentData);
  170. }
  171. public function saveRpc(string $endpoint, string $accessToken)
  172. {
  173. if (
  174. !is_array($this->translation) ||
  175. count($this->translation) === 0
  176. ) {
  177. return 0;
  178. }
  179. $channelInfo = ChannelApi::getById($this->outputChannelId);
  180. $sentData = [];
  181. $sentData = array_map(function ($n) use ($channelInfo, $accessToken) {
  182. $sId = explode('-', $n['id']);
  183. return [
  184. 'book_id' => $sId[0],
  185. 'paragraph' => $sId[1],
  186. 'word_start' => $sId[2],
  187. 'word_end' => $sId[3],
  188. 'channel_uid' => $channelInfo['id'],
  189. 'content' => $n['content'],
  190. 'content_type' => $n['content_type'] ?? 'markdown',
  191. 'access_token' => $accessToken,
  192. ];
  193. }, $this->translation);
  194. foreach ($sentData as $value) {
  195. $this->sentenceService->saveRpc($endpoint, $value, $this->modelToken);
  196. }
  197. return count($sentData);
  198. }
  199. public function get()
  200. {
  201. return $this->translation;
  202. }
  203. }