IndexPaliText.php 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Services\SearchPaliDataService;
  5. use App\Services\OpenSearchService;
  6. use App\Services\SummaryService;
  7. use Illuminate\Support\Facades\Log;
  8. use App\Models\PaliText;
  9. class IndexPaliText extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. * php artisan opensearch:index-pali 93 --para=6
  14. * @var string
  15. */
  16. protected $signature = 'opensearch:index-pali {book : The book ID to index data for} {--test} {--para= : index paragraph No. omit to all} {--summary=on} {--granularity= : The granularity to index (paragraph, sutta, sentence; omit to index all)}';
  17. /**
  18. * The console command description.
  19. *
  20. * @var string
  21. */
  22. protected $description = 'Index Pali data into OpenSearch for a specified book and optional granularity (all granularities if not specified)';
  23. protected $searchPaliDataService;
  24. protected $openSearchService;
  25. protected $summaryService;
  26. private $isTest = false;
  27. private $summary = 'on';
  28. /**
  29. * Create a new command instance.
  30. *
  31. * @return void
  32. */
  33. public function __construct(
  34. SearchPaliDataService $searchPaliDataService,
  35. OpenSearchService $openSearchService,
  36. SummaryService $summaryService
  37. ) {
  38. parent::__construct();
  39. $this->searchPaliDataService = $searchPaliDataService;
  40. $this->openSearchService = $openSearchService;
  41. $this->summaryService = $summaryService;
  42. }
  43. /**
  44. * Execute the console command.
  45. *
  46. * @return int
  47. */
  48. public function handle()
  49. {
  50. $book = $this->argument('book');
  51. $granularity = $this->option('granularity');
  52. $paragraph = $this->option('para');
  53. $this->summary = $this->option('summary');
  54. if ($this->option('test')) {
  55. $this->isTest = true;
  56. $this->info('test mode');
  57. }
  58. try {
  59. // Test OpenSearch connection
  60. [$connected, $message] = $this->openSearchService->testConnection();
  61. if (!$connected) {
  62. $this->error($message);
  63. Log::error($message);
  64. return 1;
  65. }
  66. $overallStatus = 0; // Track overall command status (0 for success, 1 for any failure)
  67. if ((int)$book === 0) {
  68. $maxBookId = PaliText::max('book');
  69. $booksId = range(1, $maxBookId);
  70. } else {
  71. $booksId = [$book];
  72. }
  73. foreach ($booksId as $key => $bookId) {
  74. $this->indexPaliParagraphs($bookId, $paragraph);
  75. }
  76. return $overallStatus;
  77. } catch (\Exception $e) {
  78. $this->error("Failed to index Pali data: " . $e->getMessage());
  79. Log::error("Failed to index Pali data for book: $book, granularity: " . ($granularity ?: 'all'), ['error' => $e]);
  80. return 1;
  81. }
  82. }
  83. /**
  84. *
  85. */
  86. protected function indexPaliParagraph($paraInfo, $paraContent, $related_id)
  87. {
  88. $paraId = $paraInfo['book'] . '_' . $paraInfo['paragraph'];
  89. $resource_id = $paraInfo['uid'];
  90. $path = json_decode($paraInfo['path']);
  91. if (is_array($path) && count($path) > 0) {
  92. $title = end($path)->title;
  93. } else {
  94. $title = '';
  95. }
  96. $document = [
  97. 'id' => "pali_para_{$paraId}",
  98. 'resource_id' => $resource_id, // Use uid from getPaliData for resource_id
  99. 'resource_type' => 'original_text',
  100. 'title' => [
  101. 'pali' => $title,
  102. ],
  103. 'summary' => [
  104. 'text' => $this->summary === 'on' ? $this->summaryService->summarize($paraContent['markdown']) : ''
  105. ],
  106. 'content' => [
  107. 'pali' => $paraContent['markdown'],
  108. 'suggest' => $paraContent['words'],
  109. ],
  110. 'bold_single' => implode(' ', $paraContent['bold1']),
  111. 'bold_multi' => implode(' ', array_merge($paraContent['bold2'], $paraContent['bold3'])),
  112. 'related_id' => $related_id,
  113. 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
  114. 'language' => 'pali',
  115. 'updated_at' => now()->toIso8601String(),
  116. 'granularity' => 'paragraph',
  117. 'path' => $this->getPathTitle($path),
  118. ];
  119. if ($paraInfo['level'] < 8) {
  120. $document['title']['suggest'] = $paraContent['words'];
  121. }
  122. if ($this->isTest) {
  123. $this->info($document['title']['pali']);
  124. $this->info($document['summary']['text']);
  125. } else {
  126. $this->openSearchService->create($document['id'], $document);
  127. }
  128. return;
  129. }
  130. /**
  131. *
  132. */
  133. protected function indexPaliSession($paraInfo, $contents, $currChapter, $related_id)
  134. {
  135. $markdown = [];
  136. $text = [];
  137. $bold_single = [];
  138. $bold_multi = [];
  139. foreach ($contents as $key => $content) {
  140. $markdown[] = $content['markdown'];
  141. $text[] = $content['text'];
  142. $bold_single = array_merge($bold_single, $content['bold1']);
  143. $bold_multi = array_merge($bold_multi, $content['bold2'], $content['bold3']);
  144. }
  145. $document = [
  146. 'id' => "pali_session_{$related_id}",
  147. 'resource_id' => $paraInfo['uid'], // Use uid from getPaliData for resource_id
  148. 'resource_type' => 'original_text',
  149. 'title' => [
  150. 'pali' => "{$currChapter} paragraph {$paraInfo['paragraph']}"
  151. ],
  152. 'summary' => [
  153. 'text' => $this->summary ? $this->summaryService->summarize($content['markdown']) : ''
  154. ],
  155. 'content' => [
  156. 'pali' => implode("\n\n", $markdown),
  157. ],
  158. 'bold_single' => implode(" ", $bold_single),
  159. 'bold_multi' => implode(" ", $bold_multi),
  160. 'related_id' => $related_id,
  161. 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
  162. 'language' => 'pali',
  163. 'updated_at' => now()->toIso8601String(),
  164. 'granularity' => 'session',
  165. 'path' => $this->getPathTitle(json_decode($paraInfo['path'])),
  166. ];
  167. if ($this->isTest) {
  168. $this->info($document['title']['pali']);
  169. $this->info($document['summary']['text']);
  170. } else {
  171. $this->openSearchService->create($document['id'], $document);
  172. }
  173. return;
  174. }
  175. private function getPathTitle(array $input)
  176. {
  177. $output = [];
  178. foreach ($input as $key => $node) {
  179. $output[] = $node->title;
  180. }
  181. return implode('/', $output);
  182. }
  183. /**
  184. * Index Pali paragraphs for a given book.
  185. *
  186. * @param int $book
  187. * @return int
  188. */
  189. protected function indexPaliParagraphs($book, $paragraph)
  190. {
  191. $this->info("Starting to index paragraphs for book: $book");
  192. $total = 0;
  193. if ($paragraph) {
  194. $paragraphs = PaliText::where('book', $book)
  195. ->where('paragraph', $paragraph)
  196. ->orderBy('paragraph')->cursor();
  197. } else {
  198. $paragraphs = PaliText::where('book', $book)
  199. ->orderBy('paragraph')->cursor();
  200. }
  201. $headings = [];
  202. $currChapterTitle = '';
  203. $commentaryId = '';
  204. $currSession = [];
  205. foreach ($paragraphs as $key => $para) {
  206. $total++;
  207. if ($para->level < 8) {
  208. $currChapterTitle = $para->toc;
  209. }
  210. if ($para->class === 'nikaya') {
  211. $nikaya = $para->text;
  212. }
  213. $paraContent = $this->searchPaliDataService
  214. ->getParaContent($para['book'], $para['paragraph']);
  215. if (!empty($commentaryId)) {
  216. $currSession[] = $paraContent;
  217. }
  218. if (isset($paraContent['commentary'])) {
  219. if (!empty($commentaryId)) {
  220. //保存 session
  221. $this->indexPaliSession($para->toArray(), $currSession, $currChapterTitle, $commentaryId);
  222. $currSession = [];
  223. }
  224. $commentaryId = $paraContent['commentary'];
  225. }
  226. $this->indexPaliParagraph($para->toArray(), $paraContent, $commentaryId);
  227. $this->info("{$para['book']}-[{$para['paragraph']}]-[{$commentaryId}]");
  228. usleep(100);
  229. }
  230. $this->info("Successfully indexed $total paragraphs for book: $book");
  231. Log::info("Indexed $total paragraphs for book: $book");
  232. return 0;
  233. }
  234. /**
  235. * Index Pali suttas for a given book (placeholder for future implementation).
  236. *
  237. * @param int $book
  238. * @return int
  239. */
  240. protected function indexPaliSutta($book)
  241. {
  242. $this->warn("Sutta indexing is not yet implemented for book: $book");
  243. Log::warning("Sutta indexing not implemented for book: $book");
  244. return 1;
  245. }
  246. /**
  247. * Index Pali sentences for a given book (placeholder for future implementation).
  248. *
  249. * @param int $book
  250. * @return int
  251. */
  252. protected function indexPaliSentences($book)
  253. {
  254. $this->warn("Sentence indexing is not yet implemented for book: $book");
  255. Log::warning("Sentence indexing not implemented for book: $book");
  256. return 1;
  257. }
  258. }