2
0

SummaryService.php 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. <?php
  2. namespace App\Services;
  3. use Illuminate\Support\Facades\Http;
  4. use App\Services\AIModelService;
  5. use Illuminate\Support\Facades\Cache;
  6. use Illuminate\Support\Facades\Log;
  7. class SummaryService
  8. {
  9. protected string $modelId;
  10. protected string $apiUrl = '';
  11. protected string $apiModel = 'deepseek-v3';
  12. protected int $maxRetries = 3;
  13. protected int $chunkSize = 20000; // 每段字符数,可根据模型上下文调整
  14. private string $system_prompt = '你是一个摘要写作助手.请根据用户的输入文本生成中文的摘要,直接输出摘要,无需解释说明。';
  15. /**
  16. * 创建服务实例,初始化 OpenAI API Key
  17. *
  18. * @return void
  19. */
  20. public function __construct(AIModelService $aiModels)
  21. {
  22. $models = $aiModels->getSysModels('summarize');
  23. // FIXME
  24. // $this->modelId = $models[0]['uid'];
  25. $this->apiUrl = config('mint.ai.proxy') . '/api/openai';
  26. }
  27. /**
  28. * 生成输入文本的摘要,并支持缓存与强制刷新。
  29. *
  30. * 此方法会根据文本长度自动拆分为多个片段,
  31. * 对每个片段调用模型生成部分摘要,
  32. * 并最终将所有部分摘要再次合并生成整体摘要。
  33. *
  34. * 同时支持缓存机制:
  35. * - 缓存键使用文本内容的 md5 计算。
  36. * - 默认缓存有效期为 1 天。
  37. * - 可通过 forceRefresh 参数强制重新生成摘要。
  38. *
  39. * @param string $text 输入的 Markdown 文本
  40. * @param int $maxTokens 每次请求允许的最大 tokens 数
  41. * @param bool $forceRefresh 是否忽略缓存并强制刷新摘要
  42. * @return string 最终生成的摘要文本
  43. */
  44. public function summarize(string $text, int $maxTokens = 500, bool $forceRefresh = false): string
  45. {
  46. // 1️⃣ 计算缓存 key
  47. $cacheKey = 'summary_' . md5($text);
  48. // 2️⃣ 检查缓存命中
  49. if (!$forceRefresh && Cache::has($cacheKey)) {
  50. Log::debug("SummaryService cache hit", ['key' => $cacheKey]);
  51. return Cache::get($cacheKey);
  52. }
  53. Log::debug("SummaryService generating new summary", [
  54. 'key' => $cacheKey,
  55. 'forceRefresh' => $forceRefresh
  56. ]);
  57. // 3️⃣ 执行摘要逻辑
  58. $chunks = $this->splitText($text, $this->chunkSize);
  59. $partialSummaries = [];
  60. foreach ($chunks as $chunk) {
  61. $summary = $this->callOpenAI($chunk, $maxTokens);
  62. if ($summary !== '') {
  63. $partialSummaries[] = $summary;
  64. }
  65. }
  66. if (count($partialSummaries) === 0) {
  67. Log::warning("SummaryService no partial summaries", ['key' => $cacheKey]);
  68. return '';
  69. }
  70. $finalSummary = '';
  71. if (count($partialSummaries) === 1) {
  72. $finalSummary = $partialSummaries[0];
  73. } else {
  74. $combinedText = implode("\n\n", $partialSummaries);
  75. $finalSummary = $this->callOpenAI($combinedText, $maxTokens);
  76. }
  77. // 4️⃣ 写入缓存(默认缓存 1 周)
  78. Cache::put($cacheKey, $finalSummary, now()->addWeek());
  79. Log::debug("SummaryService cached new summary", [
  80. 'key' => $cacheKey,
  81. 'summary' => mb_substr($finalSummary, 0, 10, 'UTF-8')
  82. ]);
  83. return $finalSummary;
  84. }
  85. /**
  86. * 按段落拆分文本
  87. *
  88. * 将 Markdown 文本按空行识别为段落,
  89. * 避免在段落中间截断。
  90. * 如果段落超过设定 chunkSize,则按字符截断。
  91. *
  92. * @param string $text 输入的 Markdown 文本
  93. * @param int $chunkSize 每个块的最大字符数
  94. * @return array 分割后的文本块数组
  95. */
  96. protected function splitText(string $text, int $chunkSize): array
  97. {
  98. $paragraphs = preg_split("/\r?\n\r?\n/", $text); // 按空行拆段落
  99. $chunks = [];
  100. $currentChunk = '';
  101. foreach ($paragraphs as $para) {
  102. $para = trim($para);
  103. if ($para === '') {
  104. continue;
  105. }
  106. // 如果单段落超长,按 chunkSize 截断
  107. if (mb_strlen($para) > $chunkSize) {
  108. $subStart = 0;
  109. while ($subStart < mb_strlen($para)) {
  110. $subChunk = mb_substr($para, $subStart, $chunkSize);
  111. $chunks[] = $subChunk;
  112. $subStart += $chunkSize;
  113. }
  114. continue;
  115. }
  116. // 如果加上当前段落超过 chunkSize,则先保存当前 chunk
  117. if (mb_strlen($currentChunk) + mb_strlen($para) + 2 > $chunkSize) { // +2 保留空行
  118. $chunks[] = $currentChunk;
  119. $currentChunk = $para;
  120. } else {
  121. // 否则累加到当前 chunk
  122. $currentChunk .= ($currentChunk === '' ? '' : "\n\n") . $para;
  123. }
  124. }
  125. if ($currentChunk !== '') {
  126. $chunks[] = $currentChunk;
  127. }
  128. return $chunks;
  129. }
  130. /**
  131. * 调用 OpenAI GPT 模型生成摘要
  132. *
  133. * 带有重试机制和指数退避。
  134. * 在 429 或 500+ 错误时重试,最大重试次数为 maxRetries。
  135. * 其他错误直接返回空字符串。
  136. *
  137. * @param string $text 输入文本
  138. * @param int $maxTokens 每次请求允许的最大 tokens 数
  139. * @return string 模型返回的摘要文本
  140. */
  141. protected function callOpenAI(string $text, int $maxTokens = 200): string
  142. {
  143. $attempt = 0;
  144. $delay = 1;
  145. $payload = [
  146. 'model' => $this->modelId,
  147. 'messages' => [
  148. [
  149. 'role' => 'system',
  150. 'content' => $this->system_prompt
  151. ],
  152. [
  153. 'role' => 'user',
  154. 'content' => $text
  155. ],
  156. ],
  157. 'max_tokens' => $maxTokens,
  158. ];
  159. while ($attempt < $this->maxRetries) {
  160. try {
  161. $response = Http::timeout(100)
  162. ->withHeaders([
  163. 'Authorization' => 'Bearer ',
  164. 'Content-Type' => 'application/json',
  165. ])->post($this->apiUrl, [
  166. 'model_id' => $this->modelId,
  167. 'payload' => $payload
  168. ]);
  169. if ($response->successful()) {
  170. $data = $response->json();
  171. return $data['choices'][0]['message']['content'] ?? '';
  172. }
  173. if (in_array($response->status(), [429, 500, 502, 503, 504])) {
  174. throw new \Exception("Temporary server error: " . $response->status());
  175. }
  176. return '';
  177. } catch (\Exception $e) {
  178. $attempt++;
  179. if ($attempt >= $this->maxRetries) {
  180. return '';
  181. }
  182. sleep($delay);
  183. $delay *= 10;
  184. }
  185. }
  186. return '';
  187. }
  188. }