|
|
@@ -6,15 +6,16 @@ use Illuminate\Console\Command;
|
|
|
use App\Services\SearchPaliDataService;
|
|
|
use App\Services\OpenSearchService;
|
|
|
use Illuminate\Support\Facades\Log;
|
|
|
+use App\Models\PaliText;
|
|
|
|
|
|
class IndexPaliText extends Command
|
|
|
{
|
|
|
/**
|
|
|
* The name and signature of the console command.
|
|
|
- * php artisan opensearch:index-pali 93
|
|
|
+ * php artisan opensearch:index-pali 93 --para=5
|
|
|
* @var string
|
|
|
*/
|
|
|
- protected $signature = 'opensearch:index-pali {book : The book ID to index data for} {--granularity= : The granularity to index (paragraph, sutta, sentence; omit to index all)}';
|
|
|
+ protected $signature = 'opensearch:index-pali {book : The book ID to index data for} {--para= : index paragraph No. omit to all} {--granularity= : The granularity to index (paragraph, sutta, sentence; omit to index all)}';
|
|
|
|
|
|
/**
|
|
|
* The console command description.
|
|
|
@@ -31,8 +32,10 @@ class IndexPaliText extends Command
|
|
|
*
|
|
|
* @return void
|
|
|
*/
|
|
|
- public function __construct(SearchPaliDataService $searchPaliDataService, OpenSearchService $openSearchService)
|
|
|
- {
|
|
|
+ public function __construct(
|
|
|
+ SearchPaliDataService $searchPaliDataService,
|
|
|
+ OpenSearchService $openSearchService
|
|
|
+ ) {
|
|
|
parent::__construct();
|
|
|
$this->searchPaliDataService = $searchPaliDataService;
|
|
|
$this->openSearchService = $openSearchService;
|
|
|
@@ -47,6 +50,7 @@ class IndexPaliText extends Command
|
|
|
{
|
|
|
$book = $this->argument('book');
|
|
|
$granularity = $this->option('granularity');
|
|
|
+ $paragraph = $this->option('para');
|
|
|
|
|
|
try {
|
|
|
// Test OpenSearch connection
|
|
|
@@ -56,116 +60,165 @@ class IndexPaliText extends Command
|
|
|
Log::error($message);
|
|
|
return 1;
|
|
|
}
|
|
|
-
|
|
|
- // Define all possible granularities
|
|
|
- $granularities = ['paragraph', 'sutta', 'sentence'];
|
|
|
-
|
|
|
- // If granularity is not set, index all granularities; otherwise, index the specified one
|
|
|
- $targetGranularities = empty($granularity) ? $granularities : [$granularity];
|
|
|
-
|
|
|
$overallStatus = 0; // Track overall command status (0 for success, 1 for any failure)
|
|
|
|
|
|
- foreach ($targetGranularities as $gran) {
|
|
|
- // Validate granularity
|
|
|
- if (!in_array($gran, $granularities)) {
|
|
|
- $this->error("Invalid granularity: $gran. Supported values: " . implode(', ', $granularities));
|
|
|
- Log::error("Invalid granularity provided: $gran");
|
|
|
- $overallStatus = 1;
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // Route to appropriate indexing method
|
|
|
- switch ($gran) {
|
|
|
- case 'paragraph':
|
|
|
- $status = $this->indexPaliParagraphs($book);
|
|
|
- break;
|
|
|
- case 'sutta':
|
|
|
- $status = $this->indexPaliSutta($book);
|
|
|
- break;
|
|
|
- case 'sentence':
|
|
|
- $status = $this->indexPaliSentences($book);
|
|
|
- break;
|
|
|
- default:
|
|
|
- $status = 1; // Should not reach here due to validation
|
|
|
- }
|
|
|
-
|
|
|
- // Update overall status if any indexing fails
|
|
|
- $overallStatus = max($overallStatus, $status);
|
|
|
- }
|
|
|
-
|
|
|
- if ($overallStatus === 0) {
|
|
|
- $this->info("Successfully completed indexing for book: $book");
|
|
|
+ if ((int)$book === 0) {
|
|
|
+ $maxBookId = PaliText::max('book');
|
|
|
+ $booksId = range(1, $maxBookId);
|
|
|
} else {
|
|
|
- $this->warn("Indexing completed with errors for book: $book");
|
|
|
+ $booksId = [$book];
|
|
|
+ }
|
|
|
+ foreach ($booksId as $key => $bookId) {
|
|
|
+ $this->indexPaliParagraphs($bookId, $paragraph);
|
|
|
}
|
|
|
|
|
|
return $overallStatus;
|
|
|
} catch (\Exception $e) {
|
|
|
$this->error("Failed to index Pali data: " . $e->getMessage());
|
|
|
- Log::error("Failed to index Pali data for book: $book, granularity: " . ($granularity ?: 'all'), ['error' => $e->getMessage()]);
|
|
|
+ Log::error("Failed to index Pali data for book: $book, granularity: " . ($granularity ?: 'all'), ['error' => $e]);
|
|
|
return 1;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ */
|
|
|
+ protected function indexPaliParagraph($paraInfo, $paraContent, $related_id)
|
|
|
+ {
|
|
|
+ $paraId = $paraInfo['book'] . '_' . $paraInfo['paragraph'];
|
|
|
+ $resource_id = $paraInfo['uid'];
|
|
|
+ $path = json_decode($paraInfo['path']);
|
|
|
+ if (is_array($path) && count($path) > 0) {
|
|
|
+ $title = end($path)->title;
|
|
|
+ } else {
|
|
|
+ $title = '';
|
|
|
+ }
|
|
|
+ $document = [
|
|
|
+ 'id' => "pali_para_{$paraId}",
|
|
|
+ 'resource_id' => $resource_id, // Use uid from getPaliData for resource_id
|
|
|
+ 'resource_type' => 'original_text',
|
|
|
+ 'title' => [
|
|
|
+ 'pali' => $title,
|
|
|
+ ],
|
|
|
+ 'summary' => [
|
|
|
+ 'text' => ''
|
|
|
+ ],
|
|
|
+ 'content' => [
|
|
|
+ 'pali' => $paraContent['markdown'],
|
|
|
+ 'suggest' => $paraContent['words'],
|
|
|
+ ],
|
|
|
+ 'bold_single' => implode(' ', $paraContent['bold1']),
|
|
|
+ 'bold_multi' => implode(' ', array_merge($paraContent['bold2'], $paraContent['bold3'])),
|
|
|
+ 'related_id' => $related_id,
|
|
|
+ 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
|
|
|
+ 'language' => 'pali',
|
|
|
+ 'updated_at' => now()->toIso8601String(),
|
|
|
+ 'granularity' => 'paragraph',
|
|
|
+ 'path' => $this->getPathTitle($path),
|
|
|
+ ];
|
|
|
+ if ($paraInfo['level'] < 8) {
|
|
|
+ $document['title']['suggest'] = $paraContent['words'];
|
|
|
+ }
|
|
|
+ return $this->openSearchService->create($document['id'], $document);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ */
|
|
|
+ protected function indexPaliSession($paraInfo, $contents, $currChapter, $related_id)
|
|
|
+ {
|
|
|
+ $markdown = [];
|
|
|
+ $text = [];
|
|
|
+ $bold_single = [];
|
|
|
+ $bold_multi = [];
|
|
|
+ foreach ($contents as $key => $content) {
|
|
|
+ $markdown[] = $content['markdown'];
|
|
|
+ $text[] = $content['text'];
|
|
|
+ $bold_single = array_merge($bold_single, $content['bold1']);
|
|
|
+ $bold_multi = array_merge($bold_multi, $content['bold2'], $content['bold3']);
|
|
|
+ }
|
|
|
+ $document = [
|
|
|
+ 'id' => "pali_session_{$related_id}",
|
|
|
+ 'resource_id' => $paraInfo['uid'], // Use uid from getPaliData for resource_id
|
|
|
+ 'resource_type' => 'original_text',
|
|
|
+ 'title' => [
|
|
|
+ 'pali' => "{$currChapter} paragraph {$paraInfo['paragraph']}"
|
|
|
+ ],
|
|
|
+ 'summary' => [
|
|
|
+ 'text' => ''
|
|
|
+ ],
|
|
|
+ 'content' => [
|
|
|
+ 'pali' => implode("\n\n", $markdown),
|
|
|
+ ],
|
|
|
+ 'bold_single' => implode(" ", $bold_single),
|
|
|
+ 'bold_multi' => implode(" ", $bold_multi),
|
|
|
+ 'related_id' => $related_id,
|
|
|
+ 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
|
|
|
+ 'language' => 'pali',
|
|
|
+ 'updated_at' => now()->toIso8601String(),
|
|
|
+ 'granularity' => 'session',
|
|
|
+ 'path' => $this->getPathTitle(json_decode($paraInfo['path'])),
|
|
|
+ ];
|
|
|
+ return $this->openSearchService->create($document['id'], $document);
|
|
|
+ }
|
|
|
+
|
|
|
+ private function getPathTitle(array $input)
|
|
|
+ {
|
|
|
+ $output = [];
|
|
|
+ foreach ($input as $key => $node) {
|
|
|
+ $output[] = $node->title;
|
|
|
+ }
|
|
|
+ return implode('/', $output);
|
|
|
+ }
|
|
|
/**
|
|
|
* Index Pali paragraphs for a given book.
|
|
|
*
|
|
|
* @param int $book
|
|
|
* @return int
|
|
|
*/
|
|
|
- protected function indexPaliParagraphs($book)
|
|
|
+ protected function indexPaliParagraphs($book, $paragraph)
|
|
|
{
|
|
|
$this->info("Starting to index paragraphs for book: $book");
|
|
|
-
|
|
|
- // Fetch all paragraphs for the book
|
|
|
- $result = $this->searchPaliDataService->getPaliData($book, 1, null);
|
|
|
- $paragraphs = $result['rows'];
|
|
|
- $total = count($paragraphs);
|
|
|
-
|
|
|
- if ($total === 0) {
|
|
|
- $this->warn("No paragraphs found for book: $book");
|
|
|
- return 0;
|
|
|
+ $total = 0;
|
|
|
+ if ($paragraph) {
|
|
|
+ $paragraphs = PaliText::where('book', $book)
|
|
|
+ ->where('paragraph', $paragraph)
|
|
|
+ ->orderBy('paragraph')->cursor();
|
|
|
+ } else {
|
|
|
+ $paragraphs = PaliText::where('book', $book)
|
|
|
+ ->orderBy('paragraph')->cursor();
|
|
|
}
|
|
|
|
|
|
- $this->info("Found $total paragraphs to index");
|
|
|
-
|
|
|
- // Create progress bar
|
|
|
- $bar = $this->output->createProgressBar($total);
|
|
|
- $bar->start();
|
|
|
-
|
|
|
- foreach ($paragraphs as $paragraph) {
|
|
|
- // Map paragraph data to OpenSearch document structure
|
|
|
- $document = [
|
|
|
- 'id' => "pali_para_{$book}_{$paragraph['paragraph']}",
|
|
|
- 'resource_id' => $paragraph['uid'], // Use uid from getPaliData for resource_id
|
|
|
- 'resource_type' => 'paragraph',
|
|
|
- 'title' => [
|
|
|
- 'display' => "Paragraph {$paragraph['paragraph']} of Book {$book}"
|
|
|
- ],
|
|
|
- 'summary' => [
|
|
|
- 'text' => $paragraph['text']
|
|
|
- ],
|
|
|
- 'content' => [
|
|
|
- 'display' => $paragraph['markdown'],
|
|
|
- 'text' => $paragraph['text'], // Remove markdown for plain text
|
|
|
- 'exact' => $paragraph['text'],
|
|
|
- ],
|
|
|
- 'bold_single' => $paragraph['bold1'],
|
|
|
- 'bold_multi' => $paragraph['bold2'] . ' ' . $paragraph['bold3'],
|
|
|
- 'related_id' => $paragraph['pcd_book_id'],
|
|
|
- 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
|
|
|
- 'language' => 'pali',
|
|
|
- 'updated_at' => now()->toIso8601String(),
|
|
|
- 'granularity' => 'paragraph',
|
|
|
- ];
|
|
|
-
|
|
|
- // Index the document in OpenSearch
|
|
|
- $this->openSearchService->create($document['id'], $document);
|
|
|
- $bar->advance();
|
|
|
+ $headings = [];
|
|
|
+ $currChapterTitle = '';
|
|
|
+ $commentaryId = '';
|
|
|
+ $currSession = [];
|
|
|
+ foreach ($paragraphs as $key => $para) {
|
|
|
+ $total++;
|
|
|
+ if ($para->level < 8) {
|
|
|
+ $currChapterTitle = $para->toc;
|
|
|
+ }
|
|
|
+ if ($para->class === 'nikaya') {
|
|
|
+ $nikaya = $para->text;
|
|
|
+ }
|
|
|
+ $paraContent = $this->searchPaliDataService
|
|
|
+ ->getParaContent($para['book'], $para['paragraph']);
|
|
|
+ if (!empty($commentaryId)) {
|
|
|
+ $currSession[] = $paraContent;
|
|
|
+ }
|
|
|
+ if (isset($paraContent['commentary'])) {
|
|
|
+ if (!empty($commentaryId)) {
|
|
|
+ //保存 session
|
|
|
+ $this->indexPaliSession($para->toArray(), $currSession, $currChapterTitle, $commentaryId);
|
|
|
+ $currSession = [];
|
|
|
+ }
|
|
|
+ $commentaryId = $paraContent['commentary'];
|
|
|
+ }
|
|
|
+ $this->indexPaliParagraph($para->toArray(), $paraContent, $commentaryId);
|
|
|
+ $this->info("{$para['book']}-[{$para['paragraph']}]-[{$commentaryId}]");
|
|
|
+ //usleep(200 * 1000);
|
|
|
}
|
|
|
|
|
|
- $bar->finish();
|
|
|
- $this->newLine();
|
|
|
$this->info("Successfully indexed $total paragraphs for book: $book");
|
|
|
Log::info("Indexed $total paragraphs for book: $book");
|
|
|
|