| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- <?php
- namespace App\Services;
- use OpenSearch\GuzzleClientFactory;
- use Illuminate\Support\Facades\Log;
- class OpenSearchService
- {
- protected $client;
- private $indexDefinition = [
- 'settings' => [
- // 必须启用 knn,否则 knn_vector 无法使用
- 'index' => [
- 'knn' => true,
- ],
- 'analysis' => [
- 'analyzer' => [
- // Pali + 中文 搜索时的分析器:中文用 IK,Pali 用小写化 + 同义词
- 'pali_query_analyzer' => [
- 'tokenizer' => 'ik_max_word', // IK 分词器,中文有效
- 'filter' => [
- 'lowercase', // 小写化,保证 Pali 一致性
- 'pali_synonyms', // Pali 同义词扩展
- ],
- ],
- // Pali 索引时的分析器:只做小写化
- 'pali_index_analyzer' => [
- 'tokenizer' => 'standard',
- 'filter' => ['lowercase'],
- ],
- ],
- 'filter' => [
- // Pali 同义词过滤器,基于文件定义
- 'pali_synonyms' => [
- 'type' => 'synonym_graph',
- 'synonyms_path' => 'analysis/pali_synonyms.txt',
- ],
- ],
- ],
- ],
- 'mappings' => [
- 'properties' => [
- 'id' => ['type' => 'keyword'], // OpenSearch 唯一 ID
- 'resource_id' => ['type' => 'keyword'], // 数据库 UUID
- 'resource_type' => ['type' => 'keyword'], // 资源类型
- 'title' => [
- 'properties' => [
- 'display' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
- 'text' => [
- 'type' => 'text',
- 'analyzer' => 'ik_smart', // 中文分词
- 'search_analyzer' => 'ik_smart',
- ]
- ],
- ],
- 'summary' => [
- 'type' => 'text',
- 'analyzer' => 'ik_smart',
- 'search_analyzer' => 'ik_smart',
- ],
- 'content' => [
- 'properties' => [
- 'display' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
- 'text' => [
- 'type' => 'text',
- 'analyzer' => 'pali_index_analyzer', // 索引时 Pali 小写化
- 'search_analyzer' => 'pali_query_analyzer', // 查询时 中文 IK + Pali 同义词
- ],
- 'exact' => ['type' => 'text', 'analyzer' => 'standard'],
- 'tokens' => [
- 'type' => 'nested',
- 'properties' => [
- 'surface' => ['type' => 'keyword'],
- 'lemma' => ['type' => 'keyword'],
- 'compound_parts' => ['type' => 'keyword'],
- 'case' => ['type' => 'keyword'],
- ],
- ],
- 'vector' => [
- 'type' => 'knn_vector',
- 'dimension' => 1536,
- 'method' => [
- 'name' => 'hnsw',
- 'space_type' => 'cosinesimil',
- 'engine' => 'nmslib',
- ],
- ],
- ],
- ],
- 'related_id' => ['type' => 'keyword'],
- 'bold_single' => ['type' => 'text', 'analyzer' => 'standard'],
- 'bold_multi' => ['type' => 'text', 'analyzer' => 'standard'],
- 'path' => ['type' => 'text', 'analyzer' => 'standard'],
- 'page_refs' => ['type' => 'keyword'],
- 'tags' => ['type' => 'keyword'],
- 'category' => ['type' => 'keyword'],
- 'author' => [
- 'type' => 'text',
- ],
- 'language' => ['type' => 'keyword'],
- 'updated_at' => ['type' => 'date'],
- 'granularity' => ['type' => 'keyword'],
- 'metadata' => [
- 'properties' => [
- 'APA' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
- 'MLA' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
- ],
- ],
- ],
- ],
- ];
- public function __construct()
- {
- $config = config('mint.opensearch.config');
- // 构建主机 URL
- $hostUrl = "{$config['scheme']}://{$config['host']}:{$config['port']}";
- // 使用 GuzzleClientFactory 创建客户端
- $this->client = (new GuzzleClientFactory())->create([
- 'base_uri' => $hostUrl, // 如 http://127.0.0.1:9200 或 https://your-host:9200
- 'auth' => [$config['username'], $config['password']], // 基本认证
- 'verify' => $config['ssl_verification'], // SSL 验证
- ]);
- }
- public function testConnection()
- {
- try {
- $info = $this->client->info();
- $message = 'OpenSearch 连接成功: ' . json_encode($info['version']['number']);
- Log::info($message);
- return [true, $message];
- } catch (\Exception $e) {
- $message = 'OpenSearch 连接失败: ' . $e->getMessage();
- Log::error($message);
- return [false, $message];
- }
- }
- /**
- * 创建 OpenSearch 索引
- * @param string $index 索引名称(如 wikipali_resources)
- * @param array $settings 索引设置和映射
- * @return array 响应结果
- * [
- * 'acknowledged' => true, // 表示请求是否被服务器接受并处理
- * 'shards_acknowledged' => true, // 表示分片是否成功初始化
- * 'index' => '索引名称' // 创建的索引名称
- * ]
- */
- public function createIndex()
- {
- $index = config('mint.opensearch.index');
- // 检查索引是否存在
- try {
- $exists = $this->client->indices()->exists(['index' => $index]);
- if ($exists) {
- throw new \Exception("Index [$index] already exists. Use updateIndex() to modify settings or mappings.");
- }
- } catch (\Exception $e) {
- // 如果存在,直接抛出异常或处理
- throw $e;
- }
- // 创建索引
- return $this->client->indices()->create([
- 'index' => $index,
- 'body' => $this->indexDefinition
- ]);
- }
- /**
- * 更新 OpenSearch 索引的设置或映射
- * @param string $index 索引名称
- * @param array $settings 更新的设置或映射
- * @return array 响应结果
- */
- public function updateIndex()
- {
- $index = config('mint.opensearch.index');
- // 分离设置和映射
- $settings = isset($this->indexDefinition['settings']) ? $this->indexDefinition['settings'] : [];
- $mappings = isset($this->indexDefinition['mappings']) ? $this->indexDefinition['mappings'] : [];
- $response = [];
- // 更新设置(需要先关闭索引)
- if (!empty($settings)) {
- try {
- $this->client->indices()->close(['index' => $index]);
- $response['settings'] = $this->client->indices()->putSettings([
- 'index' => $index,
- 'body' => ['settings' => $settings]
- ]);
- $this->client->indices()->open(['index' => $index]);
- } catch (\Exception $e) {
- throw new \Exception("Failed to update settings for index [$index]: " . $e->getMessage());
- }
- }
- // 更新映射
- if (!empty($mappings)) {
- try {
- $response['mappings'] = $this->client->indices()->putMapping([
- 'index' => $index,
- 'body' => $mappings
- ]);
- } catch (\Exception $e) {
- throw new \Exception("Failed to update mappings for index [$index]: " . $e->getMessage());
- }
- }
- return $response;
- }
- /**
- * 删除 OpenSearch 索引
- * @param string $index 索引名称
- * @return array 响应结果
- */
- public function deleteIndex()
- {
- $index = config('mint.opensearch.index');
- return $this->client->indices()->delete(['index' => $index]);
- }
- /**
- * 索引单个文档
- * @param string $index 索引名称(如 wikipali_resources)
- * @param string $id 文档 ID
- * @param array $body 文档内容
- * @return array 响应结果
- */
- public function create(string $id, array $body)
- {
- return $this->client->index([
- 'index' => config('mint.opensearch.index'),
- 'id' => $id,
- 'body' => $body
- ]);
- }
- public function search($dsl)
- {
- return $this->client->search(['index' => config('mint.opensearch.index'), 'body' => $dsl]);
- }
- public function delete($id)
- {
- return $this->client->delete(['index' => config('mint.opensearch.index'), 'id' => $id]);
- }
- }
|