2
0

OpenSearchService.php 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. <?php
  2. namespace App\Services;
  3. use OpenSearch\GuzzleClientFactory;
  4. use Illuminate\Support\Facades\Log;
  5. class OpenSearchService
  6. {
  7. protected $client;
  8. private $indexDefinition = [
  9. 'settings' => [
  10. // 必须启用 knn,否则 knn_vector 无法使用
  11. 'index' => [
  12. 'knn' => true,
  13. ],
  14. 'analysis' => [
  15. 'analyzer' => [
  16. // Pali + 中文 搜索时的分析器:中文用 IK,Pali 用小写化 + 同义词
  17. 'pali_query_analyzer' => [
  18. 'tokenizer' => 'ik_max_word', // IK 分词器,中文有效
  19. 'filter' => [
  20. 'lowercase', // 小写化,保证 Pali 一致性
  21. 'pali_synonyms', // Pali 同义词扩展
  22. ],
  23. ],
  24. // Pali 索引时的分析器:只做小写化
  25. 'pali_index_analyzer' => [
  26. 'tokenizer' => 'standard',
  27. 'filter' => ['lowercase'],
  28. ],
  29. ],
  30. 'filter' => [
  31. // Pali 同义词过滤器,基于文件定义
  32. 'pali_synonyms' => [
  33. 'type' => 'synonym_graph',
  34. 'synonyms_path' => 'analysis/pali_synonyms.txt',
  35. ],
  36. ],
  37. ],
  38. ],
  39. 'mappings' => [
  40. 'properties' => [
  41. 'id' => ['type' => 'keyword'], // OpenSearch 唯一 ID
  42. 'resource_id' => ['type' => 'keyword'], // 数据库 UUID
  43. 'resource_type' => ['type' => 'keyword'], // 资源类型
  44. 'title' => [
  45. 'properties' => [
  46. 'display' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
  47. 'text' => [
  48. 'type' => 'text',
  49. 'analyzer' => 'ik_smart', // 中文分词
  50. 'search_analyzer' => 'ik_smart',
  51. ]
  52. ],
  53. ],
  54. 'summary' => [
  55. 'type' => 'text',
  56. 'analyzer' => 'ik_smart',
  57. 'search_analyzer' => 'ik_smart',
  58. ],
  59. 'content' => [
  60. 'properties' => [
  61. 'display' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
  62. 'text' => [
  63. 'type' => 'text',
  64. 'analyzer' => 'pali_index_analyzer', // 索引时 Pali 小写化
  65. 'search_analyzer' => 'pali_query_analyzer', // 查询时 中文 IK + Pali 同义词
  66. ],
  67. 'exact' => ['type' => 'text', 'analyzer' => 'standard'],
  68. 'tokens' => [
  69. 'type' => 'nested',
  70. 'properties' => [
  71. 'surface' => ['type' => 'keyword'],
  72. 'lemma' => ['type' => 'keyword'],
  73. 'compound_parts' => ['type' => 'keyword'],
  74. 'case' => ['type' => 'keyword'],
  75. ],
  76. ],
  77. 'vector' => [
  78. 'type' => 'knn_vector',
  79. 'dimension' => 1536,
  80. 'method' => [
  81. 'name' => 'hnsw',
  82. 'space_type' => 'cosinesimil',
  83. 'engine' => 'nmslib',
  84. ],
  85. ],
  86. ],
  87. ],
  88. 'related_id' => ['type' => 'keyword'],
  89. 'bold_single' => ['type' => 'text', 'analyzer' => 'standard'],
  90. 'bold_multi' => ['type' => 'text', 'analyzer' => 'standard'],
  91. 'path' => ['type' => 'text', 'analyzer' => 'standard'],
  92. 'page_refs' => ['type' => 'keyword'],
  93. 'tags' => ['type' => 'keyword'],
  94. 'category' => ['type' => 'keyword'],
  95. 'author' => [
  96. 'type' => 'text',
  97. ],
  98. 'language' => ['type' => 'keyword'],
  99. 'updated_at' => ['type' => 'date'],
  100. 'granularity' => ['type' => 'keyword'],
  101. 'metadata' => [
  102. 'properties' => [
  103. 'APA' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
  104. 'MLA' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
  105. ],
  106. ],
  107. ],
  108. ],
  109. ];
  110. public function __construct()
  111. {
  112. $config = config('mint.opensearch.config');
  113. // 构建主机 URL
  114. $hostUrl = "{$config['scheme']}://{$config['host']}:{$config['port']}";
  115. // 使用 GuzzleClientFactory 创建客户端
  116. $this->client = (new GuzzleClientFactory())->create([
  117. 'base_uri' => $hostUrl, // 如 http://127.0.0.1:9200 或 https://your-host:9200
  118. 'auth' => [$config['username'], $config['password']], // 基本认证
  119. 'verify' => $config['ssl_verification'], // SSL 验证
  120. ]);
  121. }
  122. public function testConnection()
  123. {
  124. try {
  125. $info = $this->client->info();
  126. $message = 'OpenSearch 连接成功: ' . json_encode($info['version']['number']);
  127. Log::info($message);
  128. return [true, $message];
  129. } catch (\Exception $e) {
  130. $message = 'OpenSearch 连接失败: ' . $e->getMessage();
  131. Log::error($message);
  132. return [false, $message];
  133. }
  134. }
  135. /**
  136. * 创建 OpenSearch 索引
  137. * @param string $index 索引名称(如 wikipali_resources)
  138. * @param array $settings 索引设置和映射
  139. * @return array 响应结果
  140. * [
  141. * 'acknowledged' => true, // 表示请求是否被服务器接受并处理
  142. * 'shards_acknowledged' => true, // 表示分片是否成功初始化
  143. * 'index' => '索引名称' // 创建的索引名称
  144. * ]
  145. */
  146. public function createIndex()
  147. {
  148. $index = config('mint.opensearch.index');
  149. // 检查索引是否存在
  150. try {
  151. $exists = $this->client->indices()->exists(['index' => $index]);
  152. if ($exists) {
  153. throw new \Exception("Index [$index] already exists. Use updateIndex() to modify settings or mappings.");
  154. }
  155. } catch (\Exception $e) {
  156. // 如果存在,直接抛出异常或处理
  157. throw $e;
  158. }
  159. // 创建索引
  160. return $this->client->indices()->create([
  161. 'index' => $index,
  162. 'body' => $this->indexDefinition
  163. ]);
  164. }
  165. /**
  166. * 更新 OpenSearch 索引的设置或映射
  167. * @param string $index 索引名称
  168. * @param array $settings 更新的设置或映射
  169. * @return array 响应结果
  170. */
  171. public function updateIndex()
  172. {
  173. $index = config('mint.opensearch.index');
  174. // 分离设置和映射
  175. $settings = isset($this->indexDefinition['settings']) ? $this->indexDefinition['settings'] : [];
  176. $mappings = isset($this->indexDefinition['mappings']) ? $this->indexDefinition['mappings'] : [];
  177. $response = [];
  178. // 更新设置(需要先关闭索引)
  179. if (!empty($settings)) {
  180. try {
  181. $this->client->indices()->close(['index' => $index]);
  182. $response['settings'] = $this->client->indices()->putSettings([
  183. 'index' => $index,
  184. 'body' => ['settings' => $settings]
  185. ]);
  186. $this->client->indices()->open(['index' => $index]);
  187. } catch (\Exception $e) {
  188. throw new \Exception("Failed to update settings for index [$index]: " . $e->getMessage());
  189. }
  190. }
  191. // 更新映射
  192. if (!empty($mappings)) {
  193. try {
  194. $response['mappings'] = $this->client->indices()->putMapping([
  195. 'index' => $index,
  196. 'body' => $mappings
  197. ]);
  198. } catch (\Exception $e) {
  199. throw new \Exception("Failed to update mappings for index [$index]: " . $e->getMessage());
  200. }
  201. }
  202. return $response;
  203. }
  204. /**
  205. * 删除 OpenSearch 索引
  206. * @param string $index 索引名称
  207. * @return array 响应结果
  208. */
  209. public function deleteIndex()
  210. {
  211. $index = config('mint.opensearch.index');
  212. return $this->client->indices()->delete(['index' => $index]);
  213. }
  214. /**
  215. * 索引单个文档
  216. * @param string $index 索引名称(如 wikipali_resources)
  217. * @param string $id 文档 ID
  218. * @param array $body 文档内容
  219. * @return array 响应结果
  220. */
  221. public function create(string $id, array $body)
  222. {
  223. return $this->client->index([
  224. 'index' => config('mint.opensearch.index'),
  225. 'id' => $id,
  226. 'body' => $body
  227. ]);
  228. }
  229. public function search($dsl)
  230. {
  231. return $this->client->search(['index' => config('mint.opensearch.index'), 'body' => $dsl]);
  232. }
  233. public function delete($id)
  234. {
  235. return $this->client->delete(['index' => config('mint.opensearch.index'), 'id' => $id]);
  236. }
  237. }