SummaryService.php 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. <?php
  2. namespace App\Services;
  3. use Illuminate\Support\Facades\Http;
  4. use App\Services\AIModelService;
  5. use Illuminate\Support\Facades\Cache;
  6. use Illuminate\Support\Facades\Log;
  7. class SummaryService
  8. {
  9. protected string $modelId;
  10. protected string $apiUrl = '';
  11. protected string $apiModel = 'deepseek-v3';
  12. protected int $maxRetries = 3;
  13. protected int $chunkSize = 20000; // 每段字符数,可根据模型上下文调整
  14. private string $system_prompt = '你是一个摘要写作助手.请根据用户的输入文本生成中文的摘要,直接输出摘要,无需解释说明。';
  15. /**
  16. * 创建服务实例,初始化 OpenAI API Key
  17. *
  18. * @return void
  19. */
  20. public function __construct(AIModelService $aiModels)
  21. {
  22. $models = $aiModels->getSysModels('summarize');
  23. if (isset($models[0])) {
  24. $this->modelId = $models[0]['uid'];
  25. }
  26. $this->apiUrl = config('mint.ai.proxy') . '/api/openai';
  27. }
  28. /**
  29. * 生成输入文本的摘要,并支持缓存与强制刷新。
  30. *
  31. * 此方法会根据文本长度自动拆分为多个片段,
  32. * 对每个片段调用模型生成部分摘要,
  33. * 并最终将所有部分摘要再次合并生成整体摘要。
  34. *
  35. * 同时支持缓存机制:
  36. * - 缓存键使用文本内容的 md5 计算。
  37. * - 默认缓存有效期为 1 天。
  38. * - 可通过 forceRefresh 参数强制重新生成摘要。
  39. *
  40. * @param string $text 输入的 Markdown 文本
  41. * @param int $maxTokens 每次请求允许的最大 tokens 数
  42. * @param bool $forceRefresh 是否忽略缓存并强制刷新摘要
  43. * @return string 最终生成的摘要文本
  44. */
  45. public function summarize(string $text, int $maxTokens = 500, bool $forceRefresh = false): string
  46. {
  47. // 1️⃣ 计算缓存 key
  48. $cacheKey = 'summary_' . md5($text);
  49. // 2️⃣ 检查缓存命中
  50. if (!$forceRefresh && Cache::has($cacheKey)) {
  51. Log::debug("SummaryService cache hit", ['key' => $cacheKey]);
  52. return Cache::get($cacheKey);
  53. }
  54. Log::debug("SummaryService generating new summary", [
  55. 'key' => $cacheKey,
  56. 'forceRefresh' => $forceRefresh
  57. ]);
  58. // 3️⃣ 执行摘要逻辑
  59. $chunks = $this->splitText($text, $this->chunkSize);
  60. $partialSummaries = [];
  61. foreach ($chunks as $chunk) {
  62. $summary = $this->callOpenAI($chunk, $maxTokens);
  63. if ($summary !== '') {
  64. $partialSummaries[] = $summary;
  65. }
  66. }
  67. if (count($partialSummaries) === 0) {
  68. Log::warning("SummaryService no partial summaries", ['key' => $cacheKey]);
  69. return '';
  70. }
  71. $finalSummary = '';
  72. if (count($partialSummaries) === 1) {
  73. $finalSummary = $partialSummaries[0];
  74. } else {
  75. $combinedText = implode("\n\n", $partialSummaries);
  76. $finalSummary = $this->callOpenAI($combinedText, $maxTokens);
  77. }
  78. // 4️⃣ 写入缓存(默认缓存 1 周)
  79. Cache::put($cacheKey, $finalSummary, now()->addWeek());
  80. Log::debug("SummaryService cached new summary", [
  81. 'key' => $cacheKey,
  82. 'summary' => mb_substr($finalSummary, 0, 10, 'UTF-8')
  83. ]);
  84. return $finalSummary;
  85. }
  86. /**
  87. * 按段落拆分文本
  88. *
  89. * 将 Markdown 文本按空行识别为段落,
  90. * 避免在段落中间截断。
  91. * 如果段落超过设定 chunkSize,则按字符截断。
  92. *
  93. * @param string $text 输入的 Markdown 文本
  94. * @param int $chunkSize 每个块的最大字符数
  95. * @return array 分割后的文本块数组
  96. */
  97. protected function splitText(string $text, int $chunkSize): array
  98. {
  99. $paragraphs = preg_split("/\r?\n\r?\n/", $text); // 按空行拆段落
  100. $chunks = [];
  101. $currentChunk = '';
  102. foreach ($paragraphs as $para) {
  103. $para = trim($para);
  104. if ($para === '') {
  105. continue;
  106. }
  107. // 如果单段落超长,按 chunkSize 截断
  108. if (mb_strlen($para) > $chunkSize) {
  109. $subStart = 0;
  110. while ($subStart < mb_strlen($para)) {
  111. $subChunk = mb_substr($para, $subStart, $chunkSize);
  112. $chunks[] = $subChunk;
  113. $subStart += $chunkSize;
  114. }
  115. continue;
  116. }
  117. // 如果加上当前段落超过 chunkSize,则先保存当前 chunk
  118. if (mb_strlen($currentChunk) + mb_strlen($para) + 2 > $chunkSize) { // +2 保留空行
  119. $chunks[] = $currentChunk;
  120. $currentChunk = $para;
  121. } else {
  122. // 否则累加到当前 chunk
  123. $currentChunk .= ($currentChunk === '' ? '' : "\n\n") . $para;
  124. }
  125. }
  126. if ($currentChunk !== '') {
  127. $chunks[] = $currentChunk;
  128. }
  129. return $chunks;
  130. }
  131. /**
  132. * 调用 OpenAI GPT 模型生成摘要
  133. *
  134. * 带有重试机制和指数退避。
  135. * 在 429 或 500+ 错误时重试,最大重试次数为 maxRetries。
  136. * 其他错误直接返回空字符串。
  137. *
  138. * @param string $text 输入文本
  139. * @param int $maxTokens 每次请求允许的最大 tokens 数
  140. * @return string 模型返回的摘要文本
  141. */
  142. protected function callOpenAI(string $text, int $maxTokens = 200): string
  143. {
  144. $attempt = 0;
  145. $delay = 1;
  146. $payload = [
  147. 'model' => $this->modelId,
  148. 'messages' => [
  149. [
  150. 'role' => 'system',
  151. 'content' => $this->system_prompt
  152. ],
  153. [
  154. 'role' => 'user',
  155. 'content' => $text
  156. ],
  157. ],
  158. 'max_tokens' => $maxTokens,
  159. ];
  160. while ($attempt < $this->maxRetries) {
  161. try {
  162. $response = Http::timeout(100)
  163. ->withHeaders([
  164. 'Authorization' => 'Bearer ',
  165. 'Content-Type' => 'application/json',
  166. ])->post($this->apiUrl, [
  167. 'model_id' => $this->modelId,
  168. 'payload' => $payload
  169. ]);
  170. if ($response->successful()) {
  171. $data = $response->json();
  172. return $data['choices'][0]['message']['content'] ?? '';
  173. }
  174. if (in_array($response->status(), [429, 500, 502, 503, 504])) {
  175. throw new \Exception("Temporary server error: " . $response->status());
  176. }
  177. return '';
  178. } catch (\Exception $e) {
  179. $attempt++;
  180. if ($attempt >= $this->maxRetries) {
  181. return '';
  182. }
  183. sleep($delay);
  184. $delay *= 10;
  185. }
  186. }
  187. return '';
  188. }
  189. }