ResourceService.php 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. <?php
  2. namespace App\Services;
  3. use League\CommonMark\GithubFlavoredMarkdownConverter;
  4. class ResourceService
  5. {
  6. protected $openSearch;
  7. protected $embeddingService;
  8. protected $markdownConverter;
  9. public function __construct(OpenSearchService $openSearch, EmbeddingService $embeddingService)
  10. {
  11. $this->openSearch = $openSearch;
  12. $this->embeddingService = $embeddingService;
  13. $this->markdownConverter = new GithubFlavoredMarkdownConverter();
  14. }
  15. public function store(array $data)
  16. {
  17. $doc = $this->buildDocument($data);
  18. return $this->openSearch->create('wikipali_resources', $doc['id'], $doc);
  19. }
  20. public function update($uid, array $data)
  21. {
  22. $doc = $this->buildDocument(array_merge(['uid' => $uid], $data));
  23. return $this->openSearch->create('wikipali_resources', $doc['id'], $doc); // 使用 create 覆盖更新
  24. }
  25. public function delete($uid)
  26. {
  27. $this->openSearch->delete('wikipali_resources', $uid);
  28. }
  29. public function generateEmbedding($text)
  30. {
  31. return $this->embeddingService->generate($text);
  32. }
  33. private function buildDocument(array $data)
  34. {
  35. $contentText = is_array($data['content']) ? $data['content'][0]['text'] ?? '' : $data['content'];
  36. $normalizedText = $this->normalizeMarkdown($contentText);
  37. $doc = [
  38. 'id' => $data['uid'],
  39. 'uid' => $data['uid'],
  40. 'type' => $data['type'],
  41. 'title' => $data['title'],
  42. 'content' => [
  43. [
  44. 'id' => $data['uid'],
  45. 'text' => $contentText,
  46. 'text_normalized' => $normalizedText
  47. ]
  48. ],
  49. 'confidence' => $data['confidence'] ?? 1.0,
  50. 'content_embedding' => $this->embeddingService->generate($normalizedText),
  51. 'suggest_content' => $this->extractSuggestions($contentText, $data['title']),
  52. 'metadata' => $data['metadata'] ?? []
  53. ];
  54. if (in_array($data['type'], ['sutta', 'paragraph'])) {
  55. $doc['book_id'] = $data['book_id'] ?? null;
  56. $doc['paragraph'] = $data['paragraph'] ?? null;
  57. $doc['scripture_id'] = $data['book_id'] && $data['paragraph'] ? "{$data['book_id']}-{$data['paragraph']}" : null;
  58. $doc['path_full'] = $data['path_full'] ?? '';
  59. $doc['path_embedding'] = $this->embeddingService->generate($doc['path_full']);
  60. $doc['book_name'] = $data['book_name'] ?? '';
  61. $doc['vagga'] = $data['vagga'] ?? '';
  62. $doc['chapter'] = $data['chapter'] ?? '';
  63. $doc['sutta_name'] = $data['sutta_name'] ?? $data['title'];
  64. }
  65. return $doc;
  66. }
  67. private function normalizeMarkdown($markdown)
  68. {
  69. // 转换为纯文本,去除 Markdown 标记
  70. $html = $this->markdownConverter->convert($markdown)->getContent();
  71. $text = strip_tags($html);
  72. // 简单巴利文规范化(可进一步用 ICU folding)
  73. $text = str_replace(['ā', 'ī', 'ū'], ['a', 'i', 'u'], strtolower($text));
  74. return $text;
  75. }
  76. private function extractSuggestions($markdown, $title)
  77. {
  78. $text = $this->normalizeMarkdown($markdown);
  79. // 提取标题和关键词(简单示例,可用 NLP 优化)
  80. $keywords = array_unique(array_filter(explode(' ', $text), fn($word) => strlen($word) > 2));
  81. return array_merge([$title], array_slice($keywords, 0, 5));
  82. }
  83. }