2
0

ArticleTranslateService.php 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. <?php
  2. namespace App\Services\AIAssistant;
  3. use App\Services\ArticleService;
  4. use App\Services\PaliContentService;
  5. use App\Services\SentenceService;
  6. use App\Models\CustomBook;
  7. use Illuminate\Support\Facades\Log;
  8. use App\Http\Api\ChannelApi;
  9. use Carbon\Callback;
  10. class ArticleTranslateService
  11. {
  12. protected ArticleService $articleService;
  13. protected PaliContentService $paliContentService;
  14. protected TranslateService $translateService;
  15. protected SentenceService $sentenceService;
  16. protected string $modelId;
  17. protected array $translation = [];
  18. protected string $outputChannelId;
  19. protected string $systemPrompt = <<<PROMPT
  20. 请根据提供的原文,翻译为简体中文。
  21. 原文为逐句数据,翻译时请依照句子的上下文翻译。
  22. id:句子编号
  23. content:内容
  24. # 翻译要求:
  25. 1. 缅文巴利要给出罗马巴利转写
  26. 2. 使用现代汉语
  27. 3. 逐句翻译
  28. # 输出格式要求:
  29. - jsonl 格式
  30. - 每条记录是一个句子
  31. - 每个句子只输出两个字段
  32. 1. id(句子编号)
  33. 2. content(译文)
  34. - 无需输出原文
  35. - 只输出jsonl格式的译文 无需出处额外的解释
  36. # 输出范例
  37. ```jsonl
  38. {"id":"1-2-3-4","content":"译文"}
  39. {"id":"2-3-4-5","content":"译文"}
  40. ```
  41. PROMPT;
  42. public function __construct(
  43. ArticleService $article,
  44. PaliContentService $paliContent,
  45. TranslateService $translateService,
  46. SentenceService $sentenceService
  47. ) {
  48. $this->articleService = $article;
  49. $this->paliContentService = $paliContent;
  50. $this->translateService = $translateService;
  51. $this->sentenceService = $sentenceService;
  52. }
  53. /**
  54. * 设置模型配置
  55. *
  56. * @param string $model
  57. * @return self
  58. */
  59. public function setModel(string $model): self
  60. {
  61. $this->modelId = $model;
  62. return $this;
  63. }
  64. /**
  65. * 设置模型配置
  66. *
  67. * @param string $model
  68. * @return self
  69. */
  70. public function setChannel(string $id): self
  71. {
  72. $this->outputChannelId = $id;
  73. return $this;
  74. }
  75. public function translateAnthology($anthologyId, ?callable $onEach = null): int
  76. {
  77. $articles = $this->articleService->articlesInAnthology($anthologyId);
  78. foreach ($articles as $article) {
  79. $sentences = $this->translateArticle($article)->save();
  80. if ($onEach) {
  81. $onEach($article, $sentences);
  82. }
  83. }
  84. return count($articles);
  85. }
  86. public function translateArticle(string $articleId)
  87. {
  88. //获取文章中的句子id
  89. $sentenceIds = $this->articleService->sentenceIds($articleId);
  90. if (!$sentenceIds || count($sentenceIds) === 0) {
  91. $this->translation = [];
  92. return $this;
  93. }
  94. $bookId = (int)explode('-', $sentenceIds[0])[0];
  95. //提取原文
  96. $originalChannelId = CustomBook::where('book_id', $bookId)->value('channel_id');
  97. $original = $this->paliContentService->sentences($sentenceIds, [$originalChannelId], 'read');
  98. $orgData = [];
  99. foreach ($original as $key => $paragraph) {
  100. foreach ($paragraph['children'] as $key => $sent) {
  101. $org = $sent['origin'][0];
  102. $orgData[] = [
  103. 'id' => "{$org['book']}-{$org['para']}-{$org['wordStart']}-{$org['wordEnd']}",
  104. 'content' => !empty($org['content']) ? $org['content'] : $org['html'],
  105. ];
  106. }
  107. }
  108. //翻译
  109. $result = $this->translateService->setModel($this->modelId)
  110. ->setSystemPrompt($this->systemPrompt)
  111. ->setTranslatePrompt("# 原文\n\n" .
  112. "```json\n" .
  113. json_encode($orgData, JSON_UNESCAPED_UNICODE) .
  114. "\n```")
  115. ->translate();
  116. Log::debug('ai translation', ['data' => $result->toArray()['data']]);
  117. $this->translation = $result->toArray()['data'];
  118. return $this;
  119. }
  120. //写入结果channel
  121. public function save()
  122. {
  123. if (
  124. !is_array($this->translation) ||
  125. count($this->translation) === 0
  126. ) {
  127. return 0;
  128. }
  129. $channelInfo = ChannelApi::getById($this->outputChannelId);
  130. $sentData = [];
  131. $sentData = array_map(function ($n) use ($channelInfo) {
  132. $sId = explode('-', $n['id']);
  133. return [
  134. 'book_id' => $sId[0],
  135. 'paragraph' => $sId[1],
  136. 'word_start' => $sId[2],
  137. 'word_end' => $sId[3],
  138. 'channel_uid' => $channelInfo['id'],
  139. 'content' => $n['content'],
  140. 'content_type' => $n['content_type'] ?? 'markdown',
  141. 'lang' => $channelInfo['lang'],
  142. 'status' => $channelInfo['status'],
  143. 'editor_uid' => $this->modelId,
  144. ];
  145. }, $this->translation);
  146. foreach ($sentData as $value) {
  147. $this->sentenceService->save($value);
  148. }
  149. return count($sentData);
  150. }
  151. public function get()
  152. {
  153. return $this->translation;
  154. }
  155. }