SummaryService.php 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. <?php
  2. namespace App\Services;
  3. use Illuminate\Support\Facades\Http;
  4. use App\Services\AIModelService;
  5. use Illuminate\Support\Facades\Cache;
  6. use Illuminate\Support\Facades\Log;
  7. class SummaryService
  8. {
  9. protected string $modelId;
  10. protected string $apiUrl = '';
  11. protected string $apiModel = 'deepseek-v3';
  12. protected int $maxRetries = 3;
  13. protected int $chunkSize = 20000; // 每段字符数,可根据模型上下文调整
  14. private string $system_prompt = '你是一个摘要写作助手.请根据用户的输入文本生成中文的摘要,直接输出摘要,无需解释说明。';
  15. /**
  16. * 创建服务实例,初始化 OpenAI API Key
  17. *
  18. * @return void
  19. */
  20. public function __construct(AIModelService $aiModels)
  21. {
  22. $models = $aiModels->getSysModels('summarize');
  23. $this->modelId = $models[0]['uid'];
  24. $this->apiUrl = config('mint.ai.proxy') . '/api/openai';
  25. }
  26. /**
  27. * 生成输入文本的摘要,并支持缓存与强制刷新。
  28. *
  29. * 此方法会根据文本长度自动拆分为多个片段,
  30. * 对每个片段调用模型生成部分摘要,
  31. * 并最终将所有部分摘要再次合并生成整体摘要。
  32. *
  33. * 同时支持缓存机制:
  34. * - 缓存键使用文本内容的 md5 计算。
  35. * - 默认缓存有效期为 1 天。
  36. * - 可通过 forceRefresh 参数强制重新生成摘要。
  37. *
  38. * @param string $text 输入的 Markdown 文本
  39. * @param int $maxTokens 每次请求允许的最大 tokens 数
  40. * @param bool $forceRefresh 是否忽略缓存并强制刷新摘要
  41. * @return string 最终生成的摘要文本
  42. */
  43. public function summarize(string $text, int $maxTokens = 500, bool $forceRefresh = false): string
  44. {
  45. // 1️⃣ 计算缓存 key
  46. $cacheKey = 'summary_' . md5($text);
  47. // 2️⃣ 检查缓存命中
  48. if (!$forceRefresh && Cache::has($cacheKey)) {
  49. Log::debug("SummaryService cache hit", ['key' => $cacheKey]);
  50. return Cache::get($cacheKey);
  51. }
  52. Log::debug("SummaryService generating new summary", [
  53. 'key' => $cacheKey,
  54. 'forceRefresh' => $forceRefresh
  55. ]);
  56. // 3️⃣ 执行摘要逻辑
  57. $chunks = $this->splitText($text, $this->chunkSize);
  58. $partialSummaries = [];
  59. foreach ($chunks as $chunk) {
  60. $summary = $this->callOpenAI($chunk, $maxTokens);
  61. if ($summary !== '') {
  62. $partialSummaries[] = $summary;
  63. }
  64. }
  65. if (count($partialSummaries) === 0) {
  66. Log::warning("SummaryService no partial summaries", ['key' => $cacheKey]);
  67. return '';
  68. }
  69. $finalSummary = '';
  70. if (count($partialSummaries) === 1) {
  71. $finalSummary = $partialSummaries[0];
  72. } else {
  73. $combinedText = implode("\n\n", $partialSummaries);
  74. $finalSummary = $this->callOpenAI($combinedText, $maxTokens);
  75. }
  76. // 4️⃣ 写入缓存(默认缓存 1 周)
  77. Cache::put($cacheKey, $finalSummary, now()->addWeek());
  78. Log::debug("SummaryService cached new summary", [
  79. 'key' => $cacheKey,
  80. 'summary' => mb_substr($finalSummary, 0, 10, 'UTF-8')
  81. ]);
  82. return $finalSummary;
  83. }
  84. /**
  85. * 按段落拆分文本
  86. *
  87. * 将 Markdown 文本按空行识别为段落,
  88. * 避免在段落中间截断。
  89. * 如果段落超过设定 chunkSize,则按字符截断。
  90. *
  91. * @param string $text 输入的 Markdown 文本
  92. * @param int $chunkSize 每个块的最大字符数
  93. * @return array 分割后的文本块数组
  94. */
  95. protected function splitText(string $text, int $chunkSize): array
  96. {
  97. $paragraphs = preg_split("/\r?\n\r?\n/", $text); // 按空行拆段落
  98. $chunks = [];
  99. $currentChunk = '';
  100. foreach ($paragraphs as $para) {
  101. $para = trim($para);
  102. if ($para === '') {
  103. continue;
  104. }
  105. // 如果单段落超长,按 chunkSize 截断
  106. if (mb_strlen($para) > $chunkSize) {
  107. $subStart = 0;
  108. while ($subStart < mb_strlen($para)) {
  109. $subChunk = mb_substr($para, $subStart, $chunkSize);
  110. $chunks[] = $subChunk;
  111. $subStart += $chunkSize;
  112. }
  113. continue;
  114. }
  115. // 如果加上当前段落超过 chunkSize,则先保存当前 chunk
  116. if (mb_strlen($currentChunk) + mb_strlen($para) + 2 > $chunkSize) { // +2 保留空行
  117. $chunks[] = $currentChunk;
  118. $currentChunk = $para;
  119. } else {
  120. // 否则累加到当前 chunk
  121. $currentChunk .= ($currentChunk === '' ? '' : "\n\n") . $para;
  122. }
  123. }
  124. if ($currentChunk !== '') {
  125. $chunks[] = $currentChunk;
  126. }
  127. return $chunks;
  128. }
  129. /**
  130. * 调用 OpenAI GPT 模型生成摘要
  131. *
  132. * 带有重试机制和指数退避。
  133. * 在 429 或 500+ 错误时重试,最大重试次数为 maxRetries。
  134. * 其他错误直接返回空字符串。
  135. *
  136. * @param string $text 输入文本
  137. * @param int $maxTokens 每次请求允许的最大 tokens 数
  138. * @return string 模型返回的摘要文本
  139. */
  140. protected function callOpenAI(string $text, int $maxTokens = 200): string
  141. {
  142. $attempt = 0;
  143. $delay = 1;
  144. $payload = [
  145. 'model' => $this->modelId,
  146. 'messages' => [
  147. [
  148. 'role' => 'system',
  149. 'content' => $this->system_prompt
  150. ],
  151. [
  152. 'role' => 'user',
  153. 'content' => $text
  154. ],
  155. ],
  156. 'max_tokens' => $maxTokens,
  157. ];
  158. while ($attempt < $this->maxRetries) {
  159. try {
  160. $response = Http::timeout(100)
  161. ->withHeaders([
  162. 'Authorization' => 'Bearer ',
  163. 'Content-Type' => 'application/json',
  164. ])->post($this->apiUrl, [
  165. 'model_id' => $this->modelId,
  166. 'payload' => $payload
  167. ]);
  168. if ($response->successful()) {
  169. $data = $response->json();
  170. return $data['choices'][0]['message']['content'] ?? '';
  171. }
  172. if (in_array($response->status(), [429, 500, 502, 503, 504])) {
  173. throw new \Exception("Temporary server error: " . $response->status());
  174. }
  175. return '';
  176. } catch (\Exception $e) {
  177. $attempt++;
  178. if ($attempt >= $this->maxRetries) {
  179. return '';
  180. }
  181. sleep($delay);
  182. $delay *= 10;
  183. }
  184. }
  185. return '';
  186. }
  187. }