visuddhinanda 6 달 전
부모
커밋
f95f17af5c
1개의 변경된 파일263개의 추가작업 그리고 0개의 파일을 삭제
  1. 263 0
      api-v8/app/Services/OpenSearchService.php

+ 263 - 0
api-v8/app/Services/OpenSearchService.php

@@ -0,0 +1,263 @@
+<?php
+
+namespace App\Services;
+
+use OpenSearch\GuzzleClientFactory;
+use Illuminate\Support\Facades\Log;
+
+class OpenSearchService
+{
+    protected $client;
+    private $indexDefinition = [
+        'settings' => [
+            // 必须启用 knn,否则 knn_vector 无法使用
+            'index' => [
+                'knn' => true,
+            ],
+            'analysis' => [
+                'analyzer' => [
+                    // Pali + 中文 搜索时的分析器:中文用 IK,Pali 用小写化 + 同义词
+                    'pali_query_analyzer' => [
+                        'tokenizer' => 'ik_max_word',   // IK 分词器,中文有效
+                        'filter' => [
+                            'lowercase',                // 小写化,保证 Pali 一致性
+                            'pali_synonyms',            // Pali 同义词扩展
+                        ],
+                    ],
+                    // Pali 索引时的分析器:只做小写化
+                    'pali_index_analyzer' => [
+                        'tokenizer' => 'standard',
+                        'filter' => ['lowercase'],
+                    ],
+                ],
+                'filter' => [
+                    // Pali 同义词过滤器,基于文件定义
+                    'pali_synonyms' => [
+                        'type' => 'synonym_graph',
+                        'synonyms_path' => 'analysis/pali_synonyms.txt',
+                    ],
+                ],
+            ],
+        ],
+        'mappings' => [
+            'properties' => [
+                'id' => ['type' => 'keyword'],             // OpenSearch 唯一 ID
+                'resource_id' => ['type' => 'keyword'],    // 数据库 UUID
+                'resource_type' => ['type' => 'keyword'],  // 资源类型
+
+                'title' => [
+                    'properties' => [
+                        'display' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
+                        'text' => [
+                            'type' => 'text',
+                            'analyzer' => 'ik_smart',           // 中文分词
+                            'search_analyzer' => 'ik_smart',
+                        ]
+                    ],
+                ],
+
+                'summary' => [
+                    'type' => 'text',
+                    'analyzer' => 'ik_smart',
+                    'search_analyzer' => 'ik_smart',
+                ],
+
+                'content' => [
+                    'properties' => [
+                        'display' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
+                        'text' => [
+                            'type' => 'text',
+                            'analyzer' => 'pali_index_analyzer',      // 索引时 Pali 小写化
+                            'search_analyzer' => 'pali_query_analyzer', // 查询时 中文 IK + Pali 同义词
+                        ],
+                        'exact' => ['type' => 'text', 'analyzer' => 'standard'],
+                        'tokens' => [
+                            'type' => 'nested',
+                            'properties' => [
+                                'surface' => ['type' => 'keyword'],
+                                'lemma' => ['type' => 'keyword'],
+                                'compound_parts' => ['type' => 'keyword'],
+                                'case' => ['type' => 'keyword'],
+                            ],
+                        ],
+                        'vector' => [
+                            'type' => 'knn_vector',
+                            'dimension' => 1536,
+                            'method' => [
+                                'name'       => 'hnsw',
+                                'space_type' => 'cosinesimil',
+                                'engine'     => 'nmslib',
+                            ],
+                        ],
+                    ],
+                ],
+
+                'related_id' => ['type' => 'keyword'],
+                'bold_single' => ['type' => 'text', 'analyzer' => 'standard'],
+                'bold_multi' => ['type' => 'text', 'analyzer' => 'standard'],
+                'path' => ['type' => 'text', 'analyzer' => 'standard'],
+                'page_refs' => ['type' => 'keyword'],
+                'tags' => ['type' => 'keyword'],
+                'category' => ['type' => 'keyword'],
+                'author' => [
+                    'type' => 'text',
+                ],
+                'language' => ['type' => 'keyword'],
+                'updated_at' => ['type' => 'date'],
+                'granularity' => ['type' => 'keyword'],
+                'metadata' => [
+                    'properties' => [
+                        'APA' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
+                        'MLA' => ['type' => 'text', 'analyzer' => 'ik_max_word'],
+                    ],
+                ],
+            ],
+        ],
+    ];
+
+
+
+
+    public function __construct()
+    {
+        $config = config('mint.opensearch.config');
+
+        // 构建主机 URL
+        $hostUrl = "{$config['scheme']}://{$config['host']}:{$config['port']}";
+
+        // 使用 GuzzleClientFactory 创建客户端
+        $this->client = (new GuzzleClientFactory())->create([
+            'base_uri' => $hostUrl, // 如 http://127.0.0.1:9200 或 https://your-host:9200
+            'auth' => [$config['username'], $config['password']], // 基本认证
+            'verify' => $config['ssl_verification'], // SSL 验证
+        ]);
+    }
+
+    public function testConnection()
+    {
+        try {
+            $info = $this->client->info();
+            $message = 'OpenSearch 连接成功: ' . json_encode($info['version']['number']);
+            Log::info($message);
+            return [true, $message];
+        } catch (\Exception $e) {
+            $message = 'OpenSearch 连接失败: ' . $e->getMessage();
+            Log::error($message);
+            return [false, $message];
+        }
+    }
+
+    /**
+     * 创建 OpenSearch 索引
+     * @param string $index 索引名称(如 wikipali_resources)
+     * @param array $settings 索引设置和映射
+     * @return array 响应结果
+     * [
+     *       'acknowledged' => true, // 表示请求是否被服务器接受并处理
+     *       'shards_acknowledged' => true, // 表示分片是否成功初始化
+     *       'index' => '索引名称' // 创建的索引名称
+     *   ]
+     */
+    public function createIndex()
+    {
+        $index = config('mint.opensearch.index');
+
+        // 检查索引是否存在
+        try {
+            $exists = $this->client->indices()->exists(['index' => $index]);
+            if ($exists) {
+                throw new \Exception("Index [$index] already exists. Use updateIndex() to modify settings or mappings.");
+            }
+        } catch (\Exception $e) {
+            // 如果存在,直接抛出异常或处理
+            throw $e;
+        }
+
+        // 创建索引
+        return $this->client->indices()->create([
+            'index' => $index,
+            'body' => $this->indexDefinition
+        ]);
+    }
+
+    /**
+     * 更新 OpenSearch 索引的设置或映射
+     * @param string $index 索引名称
+     * @param array $settings 更新的设置或映射
+     * @return array 响应结果
+     */
+    public function updateIndex()
+    {
+        $index = config('mint.opensearch.index');
+
+        // 分离设置和映射
+        $settings = isset($this->indexDefinition['settings']) ? $this->indexDefinition['settings'] : [];
+        $mappings = isset($this->indexDefinition['mappings']) ? $this->indexDefinition['mappings'] : [];
+
+        $response = [];
+
+        // 更新设置(需要先关闭索引)
+        if (!empty($settings)) {
+            try {
+                $this->client->indices()->close(['index' => $index]);
+                $response['settings'] = $this->client->indices()->putSettings([
+                    'index' => $index,
+                    'body' => ['settings' => $settings]
+                ]);
+                $this->client->indices()->open(['index' => $index]);
+            } catch (\Exception $e) {
+                throw new \Exception("Failed to update settings for index [$index]: " . $e->getMessage());
+            }
+        }
+
+        // 更新映射
+        if (!empty($mappings)) {
+            try {
+                $response['mappings'] = $this->client->indices()->putMapping([
+                    'index' => $index,
+                    'body' => $mappings
+                ]);
+            } catch (\Exception $e) {
+                throw new \Exception("Failed to update mappings for index [$index]: " . $e->getMessage());
+            }
+        }
+
+        return $response;
+    }
+
+    /**
+     * 删除 OpenSearch 索引
+     * @param string $index 索引名称
+     * @return array 响应结果
+     */
+    public function deleteIndex()
+    {
+        $index = config('mint.opensearch.index');
+        return $this->client->indices()->delete(['index' => $index]);
+    }
+    /**
+     * 索引单个文档
+     * @param string $index 索引名称(如 wikipali_resources)
+     * @param string $id 文档 ID
+     * @param array $body 文档内容
+     * @return array 响应结果
+     */
+    public function create(string $id, array $body)
+    {
+        return $this->client->index([
+            'index' => config('mint.opensearch.index'),
+            'id' => $id,
+            'body' => $body
+        ]);
+    }
+
+    public function search($dsl)
+    {
+        return $this->client->search(['index' => config('mint.opensearch.index'), 'body' => $dsl]);
+    }
+
+    public function delete($id)
+    {
+        return $this->client->delete(['index' => config('mint.opensearch.index'), 'id' => $id]);
+    }
+}