visuddhinanda 6 maanden geleden
bovenliggende
commit
b0f68555ca

+ 45 - 0
api-v8/app/Console/Commands/ClearEmbeddingsCache.php

@@ -0,0 +1,45 @@
+<?php
+
+namespace App\Console\Commands;
+
+use Illuminate\Console\Command;
+use App\Services\OpenSearchService;
+
+class ClearEmbeddingsCache extends Command
+{
+    /**
+     * 命令名称
+     *
+     * @var string
+     */
+    protected $signature = 'embeddings:clear {text? : 指定要清理的文本,不传则清理全部缓存}';
+
+    /**
+     * 命令描述
+     *
+     * @var string
+     */
+    protected $description = '清理 Redis 中的 embedding 缓存';
+
+    /**
+     * 执行命令
+     */
+    public function handle(OpenSearchService $service)
+    {
+        $text = $this->argument('text');
+
+        if ($text) {
+            $ok = $service->clearEmbeddingCache($text);
+            if ($ok) {
+                $this->info("已清理指定文本的缓存: \"{$text}\"");
+            } else {
+                $this->warn("缓存不存在: \"{$text}\"");
+            }
+        } else {
+            $count = $service->clearAllEmbeddingCache();
+            $this->info("已清理所有 embedding 缓存,共 {$count} 条");
+        }
+
+        return 0;
+    }
+}

+ 89 - 0
api-v8/app/Console/Commands/CreateOpenSearchIndex.php

@@ -0,0 +1,89 @@
+<?php
+
+namespace App\Console\Commands;
+
+use App\Services\OpenSearchService;
+use Illuminate\Console\Command;
+
+class CreateOpenSearchIndex extends Command
+{
+    /**
+     * The name and signature of the console command.
+     * php artisan create:opensearch.index
+     * @var string
+     */
+    protected $signature = 'create:opensearch.index';
+
+    /**
+     * The console command description.
+     *
+     * @var string
+     */
+    protected $description = 'Command description';
+
+    /**
+     * Create a new command instance.
+     *
+     * @return void
+     */
+    public function __construct()
+    {
+        parent::__construct();
+    }
+
+    /**
+     * Execute the console command.
+     *
+     * @return int
+     */
+    public function handle()
+    {
+        $openSearch = app(OpenSearchService::class);
+
+        // Test OpenSearch connection
+        $open = $openSearch->testConnection();
+        if ($open[0]) {
+            $this->info($open[1]);
+        } else {
+            $this->error($open[1]);
+            return 1; // Exit with error code
+        }
+
+        // Attempt to create or update index
+        try {
+            $crate = $openSearch->createIndex();
+            if ($crate['acknowledged']) {
+                $this->info('Index created successfully: ' . $crate['index']);
+            }
+            if ($crate['shards_acknowledged']) {
+                $this->info('Shards initialized successfully for index: ' . $crate['index']);
+            } else {
+                $this->error('Shard initialization failed for index: ' . $crate['index']);
+                return 1;
+            }
+        } catch (\Exception $e) {
+            if (str_contains($e->getMessage(), 'exists')) {
+                $this->warn('Index already exists, attempting to update...');
+                try {
+                    $update = $openSearch->updateIndex();
+                    if (!empty($update['settings']) && $update['settings']['acknowledged']) {
+                        $this->info('Index settings updated successfully');
+                    }
+                    if (!empty($update['mappings']) && $update['mappings']['acknowledged']) {
+                        $this->info('Index mappings updated successfully');
+                    }
+                    if (empty($update['settings']) && empty($update['mappings'])) {
+                        $this->warn('No settings or mappings provided for update');
+                    }
+                } catch (\Exception $updateException) {
+                    $this->error('Failed to update index: ' . $updateException->getMessage());
+                    return 1;
+                }
+            } else {
+                $this->error('Failed to create index: ' . $e->getMessage());
+                return 1;
+            }
+        }
+        return 0;
+    }
+}

+ 68 - 0
api-v8/app/Console/Commands/ExportIKPaliTeam.php

@@ -0,0 +1,68 @@
+<?php
+
+namespace App\Console\Commands;
+
+use Illuminate\Console\Command;
+use App\Models\DhammaTerm;
+use Illuminate\Support\Facades\Redis;
+use Illuminate\Support\Facades\Log;
+
+class ExportIKPaliTeam extends Command
+{
+    /**
+     * The name and signature of the console command.
+     * php artisan export:ik.pali.team
+     * @var string
+     */
+    protected $signature = 'export:ik.pali.team';
+
+    /**
+     * The console command description.
+     *
+     * @var string
+     */
+    protected $description = 'Command description';
+
+    /**
+     * Create a new command instance.
+     *
+     * @return void
+     */
+    public function __construct()
+    {
+        parent::__construct();
+    }
+
+    /**
+     * Execute the console command.
+     *
+     * @return int
+     */
+    public function handle()
+    {
+        $path = storage_path('app/export/fts');
+        if (!is_dir($path)) {
+            $res = mkdir($path, 0700, true);
+            if (!$res) {
+                Log::error('mkdir fail path=' . $path);
+                return 1;
+            }
+        }
+        $filename = "/pali_term.txt";
+        $fp = fopen($path . $filename, 'w') or die("Unable to open file!");
+        $wordsList = [];
+        $teams = DhammaTerm::select(['meaning', 'other_meaning'])->get();
+        foreach ($teams as $term) {
+            if (!empty($term->meaning)) {
+                $wordsList[$term->meaning] = 1;
+            }
+        }
+        foreach ($wordsList as $word => $value) {
+            fwrite($fp, $word . PHP_EOL);
+        }
+        // 关闭文件
+        fclose($fp);
+        $this->info('done');
+        return 0;
+    }
+}

+ 97 - 0
api-v8/app/Console/Commands/ExportPaliSynonyms.php

@@ -0,0 +1,97 @@
+<?php
+
+namespace App\Console\Commands;
+
+use Illuminate\Console\Command;
+use App\Http\Api\DictApi;
+use App\Models\UserDict;
+use App\Models\DhammaTerm;
+use Illuminate\Support\Facades\Redis;
+use Illuminate\Support\Facades\Log;
+
+class ExportPaliSynonyms extends Command
+{
+    /**
+     * The name and signature of the console command.
+     * php artisan export:pali.synonyms
+     * @var string
+     */
+    protected $signature = 'export:pali.synonyms';
+
+    /**
+     * The console command description.
+     *
+     * @var string
+     */
+    protected $description = 'Command description';
+
+    /**
+     * Create a new command instance.
+     *
+     * @return void
+     */
+    public function __construct()
+    {
+        parent::__construct();
+    }
+
+    /**
+     * Execute the console command.
+     *
+     * @return int
+     */
+    public function handle()
+    {
+        //irregular
+        $dictId = ['4d3a0d92-0adc-4052-80f5-512a2603d0e8'];
+        //regular
+        $dictId[] = DictApi::getSysDict('system_regular');
+        $path = storage_path('app/export/fts');
+        if (!is_dir($path)) {
+            $res = mkdir($path, 0700, true);
+            if (!$res) {
+                Log::error('mkdir fail path=' . $path);
+                return 1;
+            }
+        }
+
+        $filename = "/pali_synonyms.txt";
+        $fp = fopen($path . $filename, 'w') or die("Unable to open file!");
+        foreach ($dictId as $key => $dict) {
+            $parents = UserDict::where('dict_id', $dict)
+                ->select('parent')
+                ->groupBy('parent')->cursor();
+
+            foreach ($parents as $key => $parent) {
+                $words = UserDict::where('dict_id', $dict)
+                    ->where('parent', $parent->parent)
+                    ->select('word')
+                    ->groupBy('word')->get();
+                $wordsList = [];
+                foreach ($words as $word) {
+                    $wordsList[$word->word] = 1;
+                }
+                $teams = DhammaTerm::where('word', $parent->parent)
+                    ->select(['meaning'])->get();
+                foreach ($teams as $term) {
+                    $wordsList[$term->meaning] = 1;
+                }
+                $this->info("[{$parent->parent}] " . count($words) . " team=" . count($teams));
+                // 合并 $parent->parent, $words->word, $team->meaning 为一个字符串数组
+                $combinedArray = [];
+                $combinedArray[] = $parent->parent;
+                foreach ($wordsList as $word => $value) {
+                    $combinedArray[] = $word;
+                }
+
+                // 将 $combinedArray 写入 CSV 文件
+                fputcsv($fp, $combinedArray);
+            }
+        }
+
+        // 关闭文件
+        fclose($fp);
+        $this->info('done');
+        return 0;
+    }
+}

+ 200 - 0
api-v8/app/Console/Commands/IndexPaliText.php

@@ -0,0 +1,200 @@
+<?php
+
+namespace App\Console\Commands;
+
+use Illuminate\Console\Command;
+use App\Services\SearchPaliDataService;
+use App\Services\OpenSearchService;
+use Illuminate\Support\Facades\Log;
+
+class IndexPaliText extends Command
+{
+    /**
+     * The name and signature of the console command.
+     * php artisan opensearch:index-pali 93
+     * @var string
+     */
+    protected $signature = 'opensearch:index-pali {book : The book ID to index data for} {--granularity= : The granularity to index (paragraph, sutta, sentence; omit to index all)}';
+
+    /**
+     * The console command description.
+     *
+     * @var string
+     */
+    protected $description = 'Index Pali data into OpenSearch for a specified book and optional granularity (all granularities if not specified)';
+
+    protected $searchPaliDataService;
+    protected $openSearchService;
+
+    /**
+     * Create a new command instance.
+     *
+     * @return void
+     */
+    public function __construct(SearchPaliDataService $searchPaliDataService, OpenSearchService $openSearchService)
+    {
+        parent::__construct();
+        $this->searchPaliDataService = $searchPaliDataService;
+        $this->openSearchService = $openSearchService;
+    }
+
+    /**
+     * Execute the console command.
+     *
+     * @return int
+     */
+    public function handle()
+    {
+        $book = $this->argument('book');
+        $granularity = $this->option('granularity');
+
+        try {
+            // Test OpenSearch connection
+            [$connected, $message] = $this->openSearchService->testConnection();
+            if (!$connected) {
+                $this->error($message);
+                Log::error($message);
+                return 1;
+            }
+
+            // Define all possible granularities
+            $granularities = ['paragraph', 'sutta', 'sentence'];
+
+            // If granularity is not set, index all granularities; otherwise, index the specified one
+            $targetGranularities = empty($granularity) ? $granularities : [$granularity];
+
+            $overallStatus = 0; // Track overall command status (0 for success, 1 for any failure)
+
+            foreach ($targetGranularities as $gran) {
+                // Validate granularity
+                if (!in_array($gran, $granularities)) {
+                    $this->error("Invalid granularity: $gran. Supported values: " . implode(', ', $granularities));
+                    Log::error("Invalid granularity provided: $gran");
+                    $overallStatus = 1;
+                    continue;
+                }
+
+                // Route to appropriate indexing method
+                switch ($gran) {
+                    case 'paragraph':
+                        $status = $this->indexPaliParagraphs($book);
+                        break;
+                    case 'sutta':
+                        $status = $this->indexPaliSutta($book);
+                        break;
+                    case 'sentence':
+                        $status = $this->indexPaliSentences($book);
+                        break;
+                    default:
+                        $status = 1; // Should not reach here due to validation
+                }
+
+                // Update overall status if any indexing fails
+                $overallStatus = max($overallStatus, $status);
+            }
+
+            if ($overallStatus === 0) {
+                $this->info("Successfully completed indexing for book: $book");
+            } else {
+                $this->warn("Indexing completed with errors for book: $book");
+            }
+
+            return $overallStatus;
+        } catch (\Exception $e) {
+            $this->error("Failed to index Pali data: " . $e->getMessage());
+            Log::error("Failed to index Pali data for book: $book, granularity: " . ($granularity ?: 'all'), ['error' => $e->getMessage()]);
+            return 1;
+        }
+    }
+
+    /**
+     * Index Pali paragraphs for a given book.
+     *
+     * @param int $book
+     * @return int
+     */
+    protected function indexPaliParagraphs($book)
+    {
+        $this->info("Starting to index paragraphs for book: $book");
+
+        // Fetch all paragraphs for the book
+        $result = $this->searchPaliDataService->getPaliData($book, 1, null);
+        $paragraphs = $result['rows'];
+        $total = count($paragraphs);
+
+        if ($total === 0) {
+            $this->warn("No paragraphs found for book: $book");
+            return 0;
+        }
+
+        $this->info("Found $total paragraphs to index");
+
+        // Create progress bar
+        $bar = $this->output->createProgressBar($total);
+        $bar->start();
+
+        foreach ($paragraphs as $paragraph) {
+            // Map paragraph data to OpenSearch document structure
+            $document = [
+                'id' => "pali_para_{$book}_{$paragraph['paragraph']}",
+                'resource_id' => $paragraph['uid'], // Use uid from getPaliData for resource_id
+                'resource_type' => 'paragraph',
+                'title' => [
+                    'display' => "Paragraph {$paragraph['paragraph']} of Book {$book}"
+                ],
+                'summary' => [
+                    'text' => $paragraph['text']
+                ],
+                'content' => [
+                    'display' => $paragraph['markdown'],
+                    'text' => $paragraph['text'], // Remove markdown for plain text
+                    'exact' => $paragraph['text'],
+                ],
+                'bold_single' => $paragraph['bold1'],
+                'bold_multi' => $paragraph['bold2'] . ' ' . $paragraph['bold3'],
+                'related_id' => $paragraph['pcd_book_id'],
+                'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
+                'language' => 'pali',
+                'updated_at' => now()->toIso8601String(),
+                'granularity' => 'paragraph',
+            ];
+
+            // Index the document in OpenSearch
+            $this->openSearchService->create($document['id'], $document);
+            $bar->advance();
+        }
+
+        $bar->finish();
+        $this->newLine();
+        $this->info("Successfully indexed $total paragraphs for book: $book");
+        Log::info("Indexed $total paragraphs for book: $book");
+
+        return 0;
+    }
+
+    /**
+     * Index Pali suttas for a given book (placeholder for future implementation).
+     *
+     * @param int $book
+     * @return int
+     */
+    protected function indexPaliSutta($book)
+    {
+        $this->warn("Sutta indexing is not yet implemented for book: $book");
+        Log::warning("Sutta indexing not implemented for book: $book");
+        return 1;
+    }
+
+    /**
+     * Index Pali sentences for a given book (placeholder for future implementation).
+     *
+     * @param int $book
+     * @return int
+     */
+    protected function indexPaliSentences($book)
+    {
+        $this->warn("Sentence indexing is not yet implemented for book: $book");
+        Log::warning("Sentence indexing not implemented for book: $book");
+        return 1;
+    }
+}

+ 136 - 0
api-v8/app/Http/Controllers/SearchPlusController.php

@@ -0,0 +1,136 @@
+<?php
+
+namespace App\Http\Controllers;
+
+use App\Services\ResourceService;
+use App\Services\OpenSearchService;
+use Illuminate\Http\Request;
+
+class SearchPlusController extends Controller
+{
+    protected $searchService;
+
+    /**
+     * 构造函数,注入 OpenSearchService
+     *
+     * @param  \App\Services\OpenSearchService  $searchService
+     */
+    public function __construct(OpenSearchService $searchService)
+    {
+        $this->searchService = $searchService;
+    }
+    /**
+     * Display a listing of the resource.
+     *
+     * 处理搜索请求,支持 fuzzy / exact / semantic / hybrid 四种模式。
+     * 接收查询参数并调用 OpenSearchService 执行搜索。
+     *
+     * @param  \Illuminate\Http\Request  $request
+     *   - q (string): 搜索关键词
+     *   - resource_type (string): 资源类型 (article|term|dictionary|translation|origin_text|nissaya) ✅ 已更新
+     *   - granularity (string): 文档颗粒度 (book|chapter|sutta|section|paragraph|sentence) ✅ 已更新
+     *   - language (string): 语言,如 pali, zh-Hans, zh-Hant, en-US, my ✅ 已更新
+     *   - category (string): 文档分类 (pali|commentary|subcommentary) ✅ 已更新
+     *   - tags (array): 标签过滤
+     *   - page_refs (array): 页码标记 ["V3.81","M3.58"] ✅ 已更新
+     *   - related_id (array): 关联 ID,如 ["chapter_93-5","m.n. 38"] ✅ 新增
+     *   - author (string): 作者或译者 (metadata.author) ✅ 新增
+     *   - channel (string): 来源渠道 (metadata.channel) ✅ 新增
+     *   - page (int): 页码,默认 1
+     *   - page_size (int): 每页数量,默认 20,最大 100
+     *   - search_mode (string): fuzzy|exact|semantic|hybrid,默认 fuzzy
+     *
+     * @return \Illuminate\Http\JsonResponse
+     */
+    public function index(Request $request)
+    {
+        // 基础参数
+        $query        = $request->input('q', '');
+        $page         = max(1, (int) $request->input('page', 1));
+        $pageSize     = min(100, (int) $request->input('page_size', 20));
+        $searchMode   = $request->input('search_mode', 'fuzzy');
+        $resourceType = $request->input('resource_type'); // 资源类型
+        $granularity  = $request->input('granularity');   // 文档颗粒度
+        $language     = $request->input('language');      // 语言
+        $category     = $request->input('category');      // 分类
+        $tags         = $request->input('tags', []);      // 标签
+        $pageRefs     = $request->input('page_refs', []); // 页码标记
+        $relatedId    = $request->input('related_id', []); // 关联 ID
+        $author       = $request->input('author');        // 作者/译者 (metadata.author)
+        $channel      = $request->input('channel');       // 来源渠道 (metadata.channel)
+
+        // 组装搜索参数
+        $params = [
+            'query'        => $query,
+            'page'         => $page,
+            'pageSize'     => $pageSize,
+            'searchMode'   => $searchMode,
+            'resourceType' => $resourceType,
+            'granularity'  => $granularity,
+            'language'     => $language,
+            'category'     => $category,
+            'tags'         => $tags,
+            'pageRefs'     => $pageRefs,
+            'relatedId'    => $relatedId,
+            'author'       => $author,
+            'channel'      => $channel,
+        ];
+
+        try {
+            // 调用 OpenSearchService 执行搜索
+            $result = $this->searchService->search($params);
+
+            return response()->json([
+                'success' => true,
+                'data'    => $result,
+                'query_info' => [
+                    'original_query' => $query,
+                    'search_mode'    => $searchMode,
+                ],
+            ]);
+        } catch (\Exception $e) {
+            return response()->json([
+                'success' => false,
+                'error'   => $e->getMessage(),
+            ], 500);
+        }
+    }
+
+    /**
+     * Store a newly created resource in storage.
+     *     * 添加资源
+     * @route POST /api/search
+     * @param JSON: OpenSearch 格式数据 (e.g., {type, title, content, path_full, ...})
+     * @param  \Illuminate\Http\Request  $request
+     * @return \Illuminate\Http\Response
+     */
+    public function store(Request $request) {}
+
+    /**
+     * Display the specified resource.
+     *
+     * @param  int  $id
+     * @return \Illuminate\Http\Response
+     */
+    public function show($id)
+    {
+        //
+    }
+
+    /**
+     * 更新资源
+     * @route PUT /api/search/{uid}
+     * @param JSON: OpenSearch 格式数据
+     * @return \Illuminate\Http\Response
+     */
+    public function update(Request $request, $uid) {}
+
+    /**
+     * Remove the specified resource from storage.
+     *
+     * 删除资源
+     * @route DELETE /api/search/{uid}
+     * @return \Illuminate\Http\Response
+     */
+    public function destroy($uid) {}
+}

+ 105 - 0
api-v8/app/Http/Controllers/SearchSuggestController.php

@@ -0,0 +1,105 @@
+<?php
+
+namespace App\Http\Controllers;
+
+use App\Http\Controllers\Controller;
+use Illuminate\Http\Request;
+use App\Services\OpenSearchService;
+
+
+/**
+ * 返回示例
+ * 请求:GET /api/v2/suggest?q=慈&type=term&limit=5
+ * 返回:
+ * {
+  "success": true,
+  "data": {
+    "suggestions": [
+      {
+        "text": "慈悲",
+        "resource_type": "translation",
+        "language": "zh-Hans"
+      },
+      {
+        "text": "mettā",
+        "resource_type": "origin_text",
+        "language": "pali"
+      },
+      {
+        "text": "compassion",
+        "resource_type": "translation",
+        "language": "en-US"
+      }
+    ]
+  }
+}
+
+ */
+class SearchSuggestController extends Controller
+{
+    protected $searchService;
+
+    /**
+     * 构造函数,注入 OpenSearchService
+     *
+     * @param  \App\Services\OpenSearchService  $searchService
+     */
+    public function __construct(OpenSearchService $searchService)
+    {
+        $this->searchService = $searchService;
+    }
+
+    /**
+     * Display a listing of suggestions.
+     *
+     * 自动建议接口,基于 OpenSearch completion suggester。
+     * 支持术语、巴利罗马化拼写、页码等建议。
+     *
+     * @param  \Illuminate\Http\Request  $request
+     *   - q (string): 输入的部分文本(必填)
+     *   - type (string): 建议类型,可选值 term|pali_romanized|page_ref,默认 term
+     *   - language (string): 语言过滤,可选
+     *   - limit (int): 返回数量,默认 10,最大 50
+     *
+     * @return \Illuminate\Http\JsonResponse
+     */
+    public function index(Request $request)
+    {
+        $query    = $request->input('q', '');
+        $type     = $request->input('type', 'term');
+        $language = $request->input('language');
+        $limit    = min(50, (int) $request->input('limit', 10));
+
+        if (empty($query)) {
+            return response()->json([
+                'success' => false,
+                'error'   => '缺少参数 q'
+            ], 400);
+        }
+
+        try {
+            $rawSuggestions = $this->searchService->suggest($query, $type, $language, $limit);
+
+            // 格式化返回结果:包含 text + resource_type + language
+            $suggestions = collect($rawSuggestions)->map(function ($item) {
+                return [
+                    'text'          => $item['text'] ?? '',
+                    'resource_type' => $item['resource_type'] ?? null,
+                    'language'      => $item['language'] ?? null,
+                ];
+            })->all();
+
+            return response()->json([
+                'success' => true,
+                'data'    => [
+                    'suggestions' => $suggestions
+                ]
+            ]);
+        } catch (\Exception $e) {
+            return response()->json([
+                'success' => false,
+                'error'   => $e->getMessage(),
+            ], 500);
+        }
+    }
+}

+ 18 - 0
api-v8/app/Services/EmbeddingService.php

@@ -0,0 +1,18 @@
+<?php
+
+namespace App\Services;
+
+use Symfony\Component\Process\Process;
+
+class EmbeddingService
+{
+    public function generate($text)
+    {
+        $process = new Process(['python3', 'scripts/generate_embedding.py', $text]);
+        $process->run();
+        if (!$process->isSuccessful()) {
+            throw new \Exception('Embedding generation failed');
+        }
+        return json_decode($process->getOutput(), true);
+    }
+}

+ 97 - 0
api-v8/app/Services/ResourceService.php

@@ -0,0 +1,97 @@
+<?php
+
+namespace App\Services;
+
+use League\CommonMark\GithubFlavoredMarkdownConverter;
+
+class ResourceService
+{
+    protected $openSearch;
+    protected $embeddingService;
+    protected $markdownConverter;
+
+    public function __construct(OpenSearchService $openSearch, EmbeddingService $embeddingService)
+    {
+        $this->openSearch = $openSearch;
+        $this->embeddingService = $embeddingService;
+        $this->markdownConverter = new GithubFlavoredMarkdownConverter();
+    }
+
+    public function store(array $data)
+    {
+        $doc = $this->buildDocument($data);
+        return $this->openSearch->create('wikipali_resources', $doc['id'], $doc);
+    }
+
+    public function update($uid, array $data)
+    {
+        $doc = $this->buildDocument(array_merge(['uid' => $uid], $data));
+        return $this->openSearch->create('wikipali_resources', $doc['id'], $doc); // 使用 create 覆盖更新
+    }
+
+    public function delete($uid)
+    {
+        $this->openSearch->delete('wikipali_resources', $uid);
+    }
+
+    public function generateEmbedding($text)
+    {
+        return $this->embeddingService->generate($text);
+    }
+
+    private function buildDocument(array $data)
+    {
+        $contentText = is_array($data['content']) ? $data['content'][0]['text'] ?? '' : $data['content'];
+        $normalizedText = $this->normalizeMarkdown($contentText);
+
+        $doc = [
+            'id' => $data['uid'],
+            'uid' => $data['uid'],
+            'type' => $data['type'],
+            'title' => $data['title'],
+            'content' => [
+                [
+                    'id' => $data['uid'],
+                    'text' => $contentText,
+                    'text_normalized' => $normalizedText
+                ]
+            ],
+            'confidence' => $data['confidence'] ?? 1.0,
+            'content_embedding' => $this->embeddingService->generate($normalizedText),
+            'suggest_content' => $this->extractSuggestions($contentText, $data['title']),
+            'metadata' => $data['metadata'] ?? []
+        ];
+
+        if (in_array($data['type'], ['sutta', 'paragraph'])) {
+            $doc['book_id'] = $data['book_id'] ?? null;
+            $doc['paragraph'] = $data['paragraph'] ?? null;
+            $doc['scripture_id'] = $data['book_id'] && $data['paragraph'] ? "{$data['book_id']}-{$data['paragraph']}" : null;
+            $doc['path_full'] = $data['path_full'] ?? '';
+            $doc['path_embedding'] = $this->embeddingService->generate($doc['path_full']);
+            $doc['book_name'] = $data['book_name'] ?? '';
+            $doc['vagga'] = $data['vagga'] ?? '';
+            $doc['chapter'] = $data['chapter'] ?? '';
+            $doc['sutta_name'] = $data['sutta_name'] ?? $data['title'];
+        }
+
+        return $doc;
+    }
+
+    private function normalizeMarkdown($markdown)
+    {
+        // 转换为纯文本,去除 Markdown 标记
+        $html = $this->markdownConverter->convert($markdown)->getContent();
+        $text = strip_tags($html);
+        // 简单巴利文规范化(可进一步用 ICU folding)
+        $text = str_replace(['ā', 'ī', 'ū'], ['a', 'i', 'u'], strtolower($text));
+        return $text;
+    }
+
+    private function extractSuggestions($markdown, $title)
+    {
+        $text = $this->normalizeMarkdown($markdown);
+        // 提取标题和关键词(简单示例,可用 NLP 优化)
+        $keywords = array_unique(array_filter(explode(' ', $text), fn($word) => strlen($word) > 2));
+        return array_merge([$title], array_slice($keywords, 0, 5));
+    }
+}

+ 201 - 0
api-v8/app/Services/SearchPaliDataService.php

@@ -0,0 +1,201 @@
+<?php
+
+namespace App\Services;
+
+use App\Models\BookTitle;
+use App\Models\WbwTemplate;
+use App\Models\PaliText;
+use App\Models\PaliSentence;
+
+class SearchPaliDataService
+{
+    /**
+     * Retrieve paginated Pali data for search.
+     *
+     * @param int $book
+     * @param int $start
+     * @param int $pageSize
+     * @return array
+     */
+    public function getPaliData($book, $start = 1, $pageSize = null)
+    {
+        $maxParagraph = WbwTemplate::where('book', $book)->max('paragraph');
+        $output = [];
+        $pageSize = $pageSize === null ? $maxParagraph : $pageSize;
+        // Calculate end paragraph for pagination
+        $endOfPara = min($start + $pageSize, $maxParagraph + 1);
+
+        for ($iPara = $start; $iPara < $endOfPara; $iPara++) {
+            $content = $this->getParaContent($book, $iPara);
+            // Retrieve bold words
+            $words = WbwTemplate::where('book', $book)
+                ->where('paragraph', $iPara)
+                ->orderBy('wid')
+                ->get();
+
+            $bold1 = [];
+            $bold2 = [];
+            $bold3 = [];
+            $currBold = [];
+
+            foreach ($words as $word) {
+                if ($word->style === 'bld') {
+                    $currBold[] = $word->real;
+                } else {
+                    $countBold = count($currBold);
+                    if ($countBold === 1) {
+                        $bold1[] = $currBold[0];
+                    } elseif ($countBold === 2) {
+                        $bold2 = array_merge($bold2, $currBold);
+                    } elseif ($countBold > 0) {
+                        $bold3 = array_merge($bold3, $currBold);
+                    }
+                    $currBold = [];
+                }
+            }
+
+            // Handle any remaining bold words
+            $countBold = count($currBold);
+            if ($countBold === 1) {
+                $bold1[] = $currBold[0];
+            } elseif ($countBold === 2) {
+                $bold2 = array_merge($bold2, $currBold);
+            } elseif ($countBold > 0) {
+                $bold3 = array_merge($bold3, $currBold);
+            }
+
+            // Retrieve book ID
+            $pcd_book = BookTitle::where('book', $book)
+                ->where('paragraph', '<=', $iPara)
+                ->orderBy('paragraph', 'desc')
+                ->first();
+
+            $pcd_book_id = $pcd_book ? $pcd_book->sn : BookTitle::where('book', $book)
+                ->orderBy('paragraph')
+                ->value('sn');
+
+            $output[] = [
+                'uid' => PaliText::where('book', $book)->where('paragraph', $iPara)->value('uid'),
+                'book' => $book,
+                'paragraph' => $iPara,
+                'bold1' => implode(' ', $bold1),
+                'bold2' => implode(' ', $bold2),
+                'bold3' => implode(' ', $bold3),
+                'content' => $content['markdown'],
+                'markdown' => $content['markdown'],
+                'text' => $content['text'],
+                'pcd_book_id' => $pcd_book_id
+            ];
+        }
+
+        return ['rows' => $output, 'count' => $maxParagraph];
+    }
+
+    /**
+     * Generate content string for a given book and paragraph.
+     *
+     * @param int $book
+     * @param int $para
+     * @return string
+     */
+    private function getContent($book, $para)
+    {
+        $words = WbwTemplate::where('book', $book)
+            ->where('paragraph', $para)
+            ->where('type', '<>', '.ctl.')
+            ->orderBy('wid')
+            ->get();
+
+        $content = '';
+        foreach ($words as $word) {
+            if ($word->style === 'bld') {
+                if (strpos($word->word, '{') === false) {
+                    $content .= "**{$word->word}** ";
+                } else {
+                    $content .= str_replace(['{', '}'], ['**', '** '], $word->word);
+                }
+            } elseif ($word->style === 'note') {
+                $content .= " _{$word->word}_ ";
+            } else {
+                $content .= $word->word . ' ';
+            }
+        }
+
+        return trim($content);
+    }
+
+    /**
+     * Generate paragraph sentence list for a given book and paragraph.
+     *
+     * @param int $book
+     * @param int $para
+     * @return array $sentences
+     */
+    public function getParaContent($book, $para)
+    {
+        $sentences = PaliSentence::where('book', $book)
+            ->where('paragraph', $para)
+            ->orderBy('word_begin')
+            ->get();
+        if (!$sentences) {
+            return null;
+        }
+        $markdown = [];
+        $text = [];
+        foreach ($sentences as $key => $sentence) {
+            $content = $this->getSentenceText($book, $para, $sentence->word_begin, $sentence->word_end);
+            $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
+            $markdown[] = $content['markdown'];
+            $text[] = $content['text'];
+        }
+        return [
+            'markdown' => implode("\n", $markdown),
+            'text' => implode("", $text),
+        ];
+    }
+
+    /**
+     * Generate paragraph sentence list for a given book and paragraph.
+     *
+     * @param int $book
+     * @param int $para
+     * @return array $sentence
+     */
+    private function getSentenceText($book, $para, $start, $end)
+    {
+        $words = WbwTemplate::where('book', $book)
+            ->where('paragraph', $para)
+            ->where('type', '<>', '.ctl.')
+            ->whereBetween('wid', [$start, $end])
+            ->orderBy('wid')
+            ->get();
+
+        $text = [];
+        $markdown = '';
+        foreach ($words as $word) {
+            $text[] = str_replace(['{', '}'], ['', ''], $word->word);
+            if ($word->style === 'bld') {
+                if (strpos($word->word, '{') === false) {
+                    $markdown .= "**{$word->word}** ";
+                } else {
+                    $markdown .= str_replace(['{', '}'], ['**', '** '], $word->word);
+                }
+            } elseif ($word->style === 'note') {
+                $markdown .= " ~~{$word->word}~~ ";
+            } else {
+                $markdown .= $word->word . ' ';
+            }
+        }
+
+        return [
+            'markdown' => $this->abbrReplace(trim(str_replace(['~~  ~~', '** **'], [' ', ' '], $markdown))),
+            'text' => $this->abbrReplace(implode(' ', $text)),
+        ];
+    }
+    private function abbrReplace($input)
+    {
+        $abbr = ['sī .', 'syā .', 'kaṃ .', 'pī .'];
+        $abbrTo = ['sī.', 'syā.', 'kaṃ.', 'pī.'];
+        return str_replace($abbr, $abbrTo, $input);
+    }
+}

+ 172 - 0
dashboard-v4/dashboard/src/services/agentApi.ts

@@ -0,0 +1,172 @@
+import {
+  SearchByQueryArgs,
+  SearchByPageRefArgs,
+  GetTermDefinitionArgs,
+  SearchPaliArgs,
+  SearchResponse,
+  AICallbackFunction,
+} from "../types/agent"; // 假设你的类型定义文件名为 apiTypes.ts
+
+/**
+ * 基础 API URL
+ * 请替换为你的实际后端 API 地址
+ */
+const API_BASE_URL = "http://localhost:8000/api/v3";
+
+// ---------------------------------------------------------------- //
+//                  低层 API 客户端(使用 fetch)                  //
+// ---------------------------------------------------------------- //
+
+const apiClient = async <T>(
+  endpoint: string,
+  params: Record<string, any>
+): Promise<T> => {
+  const searchParams = new URLSearchParams();
+  for (const key in params) {
+    if (params[key] !== undefined && params[key] !== null) {
+      if (Array.isArray(params[key])) {
+        searchParams.append(key, params[key].join(","));
+      } else {
+        searchParams.append(key, String(params[key]));
+      }
+    }
+  }
+
+  const url = `${API_BASE_URL}${endpoint}?${searchParams.toString()}`;
+
+  try {
+    const response = await fetch(url);
+    if (!response.ok) {
+      throw new Error(`HTTP error! Status: ${response.status}`);
+    }
+
+    const data: SearchResponse = await response.json();
+    if (data.success) {
+      return data.data as T;
+    } else {
+      throw new Error("API request was not successful.");
+    }
+  } catch (error) {
+    console.error("API call failed:", error);
+    throw new Error("An unexpected error occurred.");
+  }
+};
+
+// ---------------------------------------------------------------- //
+//                  封装的搜索函数(每个函数对应一个意图)          //
+// ---------------------------------------------------------------- //
+
+/**
+ * 通用搜索函数,处理模糊和语义查询。
+ */
+const searchByQuery = async (
+  args: SearchByQueryArgs
+): Promise<SearchResponse> => {
+  return apiClient<SearchResponse>("/search", {
+    q: args.query,
+    search_mode: args.search_mode,
+    resource_type: args.resource_type,
+    language: args.language,
+  });
+};
+
+/**
+ * 专门处理页码搜索的函数。
+ */
+const searchByPageRef = async (
+  args: SearchByPageRefArgs
+): Promise<SearchResponse> => {
+  return apiClient<SearchResponse>("/search", {
+    q: args.page_refs, // query参数使用页码
+    search_mode: "page_search", // 固定搜索模式为页码搜索
+    page_refs: args.page_refs,
+  });
+};
+
+/**
+ * 专门用于获取术语定义的函数。
+ */
+const getTermDefinition = async (
+  args: GetTermDefinitionArgs
+): Promise<SearchResponse> => {
+  return apiClient<SearchResponse>("/search", {
+    q: args.term,
+    search_mode: "exact", // 固定为精确搜索
+    resource_type: ["dictionary"], // 仅搜索字典类型
+  });
+};
+
+/**
+ * 专门用于巴利文精确搜索的函数。
+ */
+const searchPali = async (args: SearchPaliArgs): Promise<SearchResponse> => {
+  return apiClient<SearchResponse>("/search", {
+    q: args.query,
+    search_mode: "exact", // 巴利文搜索通常是精确的
+    language: ["pali"], // 仅搜索巴利文
+  });
+};
+
+// ---------------------------------------------------------------- //
+//               核心 Function Calling 处理函数                     //
+// ---------------------------------------------------------------- //
+
+/**
+ * 核心函数:根据 AI 助手返回的函数调用对象,执行相应的操作。
+ *
+ * @param functionCall AI 助手返回的函数调用对象。
+ * @returns 返回一个 Promise,包含搜索结果。
+ */
+export const handleFunctionCall = async (
+  functionCall: AICallbackFunction
+): Promise<SearchResponse> => {
+  switch (functionCall.name) {
+    case "search_by_query":
+      return searchByQuery(functionCall.arguments as SearchByQueryArgs);
+
+    case "search_by_page_ref":
+      return searchByPageRef(functionCall.arguments as SearchByPageRefArgs);
+
+    case "get_term_definition":
+      return getTermDefinition(functionCall.arguments as GetTermDefinitionArgs);
+
+    case "search_pali":
+      return searchPali(functionCall.arguments as SearchPaliArgs);
+
+    default:
+      throw new Error(`Unknown function call: ${functionCall.name}`);
+  }
+};
+
+// ---------------------------------------------------------------- //
+//                  使用示例                                       //
+// ---------------------------------------------------------------- //
+/**
+ * 
+ * 
+
+const main = async () => {
+  // 模拟从 AI 助手获得的函数调用对象
+  const mockCalls: AICallbackFunction[] = [
+    {
+      name: "search_by_query",
+      arguments: { query: "佛陀关于慈悲的教导", search_mode: "semantic" },
+    },
+    { name: "search_by_page_ref", arguments: { page_refs: "M3.58" } },
+    { name: "get_term_definition", arguments: { term: "四圣谛" } },
+    { name: "search_pali", arguments: { query: "mettā" } },
+  ];
+
+  for (const call of mockCalls) {
+    try {
+      console.log(`\n正在处理函数调用:${call.name}`);
+      const result = await handleFunctionCall(call);
+      console.log("搜索成功,找到结果数量:", result.data.hits.total.value);
+      // 根据你的需求,你可以在这里处理并展示结果
+    } catch (error) {
+      console.error(`处理函数调用失败:${call.name}`, error);
+    }
+  }
+};
+ */
+// main();

+ 160 - 0
dashboard-v4/dashboard/src/types/agent.ts

@@ -0,0 +1,160 @@
+// ---------------------------------------------------------------- //
+//             核心搜索函数参数类型:search_documents               //
+// ---------------------------------------------------------------- //
+
+/**
+ * 搜索模式的枚举,用于指定不同类型的搜索。
+ * - 'fuzzy': 模糊搜索,支持巴利文变音符号、简繁体等。
+ * - 'exact': 精确匹配,用于专有名词或特定短语。
+ * - 'semantic': 语义搜索,基于向量检索。
+ * - 'hybrid': 混合搜索 fuzzy+semantic。
+ */
+export type SearchMode = "fuzzy" | "exact" | "semantic" | "hybrid";
+
+/**
+ * 文档类型的枚举,用于筛选不同来源的文档。
+ */
+export type ResourceType =
+  | "article"
+  | "term"
+  | "dictionary"
+  | "translation"
+  | "original_text"
+  | "nissaya";
+
+/**
+ * 语言代码的枚举。
+ */
+export type Language = "pali" | "zh-Hans" | "zh-Hant" | "en-US" | "my";
+
+/**
+ * search_documents 函数的参数类型。
+ * 这将作为 Function Calling 的 `arguments` 参数传递。
+ */
+export interface SearchDocumentsArgs {
+  /**
+   * 用户的搜索关键词或句子。
+   */
+  query: string;
+
+  /**
+   * 指定搜索模式,由 AI 助手根据用户意图判断。
+   */
+  search_mode: SearchMode;
+
+  /**
+   * 文档类型数组,用于过滤搜索结果。
+   */
+  resource_type?: ResourceType[];
+
+  /**
+   * 语言数组,用于过滤搜索结果。
+   */
+  language?: Language[];
+
+  /**
+   * 页码标记,仅在 search_mode 为 'page_search' 时使用。
+   */
+  page_refs?: string;
+
+  /**
+   * 主题标签数组,用于进一步过滤。
+   */
+  tags?: string[];
+}
+
+// ---------------------------------------------------------------- //
+//             术语定义函数参数类型:get_term_definition            //
+// ---------------------------------------------------------------- //
+
+/**
+ * get_term_definition 函数的参数类型。
+ */
+export interface GetTermDefinitionArgs {
+  /**
+   * 需要查询的佛教术语或词汇。
+   */
+  term: string;
+}
+
+// ---------------------------------------------------------------- //
+//              AI 助手返回的 Function Call 类型                    //
+// ---------------------------------------------------------------- //
+
+/**
+ * AI 助手返回的函数调用对象。
+ */
+
+export interface SearchByQueryArgs {
+  query: string;
+  search_mode: SearchMode;
+  resource_type?: ResourceType[];
+  language?: Language[];
+}
+
+export interface SearchByPageRefArgs {
+  page_refs: string;
+}
+
+export interface GetTermDefinitionArgs {
+  term: string;
+}
+
+export interface SearchPaliArgs {
+  query: string;
+}
+
+export type AICallbackFunction = {
+  name:
+    | "search_by_query"
+    | "search_by_page_ref"
+    | "get_term_definition"
+    | "search_pali";
+  arguments:
+    | SearchByQueryArgs
+    | SearchByPageRefArgs
+    | GetTermDefinitionArgs
+    | SearchPaliArgs;
+};
+
+// ---------------------------------------------------------------- //
+//              后端 API 响应类型(示例)                           //
+// ---------------------------------------------------------------- //
+
+/**
+ * 核心文档卡片的数据结构。
+ */
+export interface DocumentResult {
+  id: string;
+  resource_id: string;
+  resource_type: ResourceType;
+  title: string;
+  content: {
+    display: string;
+    text: string;
+    vector?: number[];
+  };
+  related_id: string[];
+  page_refs?: string[];
+  language: Language;
+  score: number;
+  similarity?: number;
+}
+
+/**
+ * API 搜索响应的完整结构。
+ */
+export interface SearchResponse {
+  success: boolean;
+  data: {
+    total: number;
+    page: number;
+    page_size: number;
+    took: string;
+    results: DocumentResult[];
+  };
+  query_info: {
+    original_query: string;
+    search_type: SearchMode | "term_definition";
+  };
+}