IndexTerm.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. <?php
  2. namespace App\Console\Commands;
  3. use App\Models\DhammaTerm;
  4. use Illuminate\Console\Command;
  5. use App\Services\OpenSearchService;
  6. use App\Services\TermService;
  7. use Illuminate\Support\Facades\Log;
  8. class IndexTerm extends Command
  9. {
  10. /**
  11. * The name and signature of the console command.
  12. *
  13. * @var string
  14. *
  15. * @example
  16. * php artisan opensearch:index-term
  17. * php artisan opensearch:index-term --word=anomadassī
  18. * php artisan opensearch:index-term --test
  19. */
  20. protected $signature = 'opensearch:index-term
  21. {--test}
  22. {--word= : 指定单个词条进行索引,省略则索引全部}';
  23. /**
  24. * The console command description.
  25. *
  26. * @var string
  27. */
  28. protected $description = 'Index Term data into OpenSearch';
  29. /** @var bool 是否为测试模式(只打印,不写入 OpenSearch) */
  30. private bool $isTest = false;
  31. /**
  32. * Create a new command instance.
  33. */
  34. public function __construct(
  35. protected OpenSearchService $openSearchService,
  36. protected TermService $termService,
  37. ) {
  38. parent::__construct();
  39. }
  40. /**
  41. * Execute the console command.
  42. *
  43. * 遍历所有(或指定)DhammaTerm,逐条构建文档并写入 OpenSearch。
  44. * 测试模式下(--test)只打印文档内容,不执行写入。
  45. *
  46. * @return int 0 表示成功,1 表示失败
  47. */
  48. public function handle(): int
  49. {
  50. $word = $this->option('word');
  51. if ($this->option('test')) {
  52. $this->isTest = true;
  53. $this->info('test mode');
  54. }
  55. try {
  56. [$connected, $message] = $this->openSearchService->testConnection();
  57. if (!$connected) {
  58. $this->error($message);
  59. Log::error($message);
  60. return 1;
  61. }
  62. $total = DhammaTerm::count();
  63. $terms = DhammaTerm::select(['guid', 'word'])->orderBy('updated_at', 'asc');
  64. if ($word) {
  65. $terms = $terms->where('word', $word);
  66. }
  67. $overallStatus = 0;
  68. foreach ($terms->cursor() as $key => $term) {
  69. $percent = (int) (($key * 100) / $total);
  70. $this->info("[{$percent}%]-{$key} " . $term->word);
  71. $this->indexTerm($term->guid);
  72. }
  73. return $overallStatus;
  74. } catch (\Exception $e) {
  75. $this->error('Failed to index Term data: ' . $e->getMessage());
  76. Log::error('Failed to index Term data', ['error' => $e]);
  77. return 1;
  78. }
  79. }
  80. /**
  81. * 构建单条词条文档并写入 OpenSearch
  82. *
  83. * 文档结构遵循新版 mapping:
  84. * title.text.pali / title.text.zh → 全文检索
  85. * title.suggest.pali / title.suggest.zh → 自动建议
  86. * content.text.pali / content.text.zh → 正文内容
  87. *
  88. * @param string $id DhammaTerm 的 guid
  89. * @return void
  90. */
  91. protected function indexTerm(string $id): void
  92. {
  93. $termData = $this->termService->find($id, 'text');
  94. $channelName = $termData['channel']['name'] ?? '';
  95. $isCommunity = $this->termService->isCommunity($termData['channel_id']);
  96. $content = $termData['html'] ?? $termData['meaning'];
  97. $categories = $this->extractCategories($termData['note'] ?? '');
  98. $quality = $this->extractFirstQuality($termData['note'] ?? '');
  99. $tags = [];
  100. foreach ($categories as $key => $category) {
  101. $tags[] = "category:{$category}";
  102. }
  103. if (!empty($quality)) {
  104. $tags[] = "quality:{$quality}";
  105. }
  106. $document = [
  107. 'id' => "term_{$id}",
  108. 'resource_id' => $id,
  109. 'resource_type' => 'term',
  110. 'title' => [
  111. 'text' => [
  112. 'pali' => $termData['word'],
  113. 'zh' => $termData['meaning'],
  114. ],
  115. 'suggest' => [
  116. 'pali' => [$termData['word']],
  117. 'zh' => [$termData['meaning']],
  118. ],
  119. ],
  120. 'summary' => [
  121. 'text' => $termData['summary'] ?? '',
  122. ],
  123. 'content' => [],
  124. 'bold_single' => [$termData['meaning'], $termData['word']],
  125. 'related_id' => $termData['word'],
  126. 'category' => null,
  127. 'tags' => $tags,
  128. 'language' => $termData['language'],
  129. 'updated_at' => now()->toIso8601String(),
  130. 'path' => $termData['studio']['realName'] . "/{$channelName}",
  131. 'metadata' => ['channel' => $termData['channel_id']],
  132. ];
  133. // TODO: 补充语言判断,将内容放入对应的 text.pali 或 text.zh 字段
  134. $plainText = strip_tags($content);
  135. if (str_contains($termData['language'], 'zh')) {
  136. $document['content']['text']['zh'] = $plainText;
  137. } else {
  138. $document['content']['text']['zh'] = $plainText;
  139. }
  140. $document['content']['display'] = $content; // 展示
  141. if ($this->isTest) {
  142. $this->info($document['title']['text']['pali']);
  143. $this->info($document['summary']['text']);
  144. } else {
  145. $this->openSearchService->create($document['id'], $document);
  146. }
  147. }
  148. /**
  149. * 提取 Markdown 中的 {{category|...}} 分类标签
  150. *
  151. * @param string $content
  152. * @return array
  153. */
  154. private function extractCategories(string $content): array
  155. {
  156. if (empty($content)) {
  157. return [];
  158. }
  159. preg_match_all('/\{\{category\|([^}]+)\}\}/u', $content, $matches);
  160. return array_values(array_filter(array_map(
  161. fn($item) => trim($item),
  162. $matches[1] ?? []
  163. )));
  164. }
  165. /**
  166. * 提取 Markdown 中第一个 {{quality|...}} 标签内的内容
  167. *
  168. * @param string $content
  169. * @return string
  170. */
  171. private function extractFirstQuality(string $content): string
  172. {
  173. if (empty($content)) {
  174. return '';
  175. }
  176. preg_match('/\{\{quality\|([^}]+)\}\}/u', $content, $matches);
  177. return isset($matches[1]) ? trim($matches[1]) : '';
  178. }
  179. }