IndexPaliText.php 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Services\SearchPaliDataService;
  5. use App\Services\OpenSearchService;
  6. use App\Services\SummaryService;
  7. use Illuminate\Support\Facades\Log;
  8. use App\Models\PaliText;
  9. class IndexPaliText extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. * php artisan opensearch:index-pali 93 --para=6
  14. * @var string
  15. */
  16. protected $signature = 'opensearch:index-pali {book : The book ID to index data for}
  17. {--test}
  18. {--para= : index paragraph No. omit to all}
  19. {--summary=on}
  20. {--resume}
  21. {--granularity= : The granularity to index (paragraph, sutta, sentence; omit to index all)}';
  22. /**
  23. * The console command description.
  24. *
  25. * @var string
  26. */
  27. protected $description = 'Index Pali data into OpenSearch for a specified book and optional granularity (all granularities if not specified)';
  28. protected $searchPaliDataService;
  29. protected $openSearchService;
  30. protected $summaryService;
  31. private $isTest = false;
  32. private $summary = false;
  33. /**
  34. * Create a new command instance.
  35. *
  36. * @return void
  37. */
  38. public function __construct(
  39. SearchPaliDataService $searchPaliDataService,
  40. OpenSearchService $openSearchService,
  41. SummaryService $summaryService
  42. ) {
  43. parent::__construct();
  44. $this->searchPaliDataService = $searchPaliDataService;
  45. $this->openSearchService = $openSearchService;
  46. $this->summaryService = $summaryService;
  47. }
  48. /**
  49. * Execute the console command.
  50. *
  51. * @return int
  52. */
  53. public function handle()
  54. {
  55. $book = (int)$this->argument('book');
  56. $granularity = $this->option('granularity');
  57. $paragraph = $this->option('para');
  58. $this->summary = $this->option('summary') === 'on';
  59. if ($this->option('test')) {
  60. $this->isTest = true;
  61. $this->info('test mode');
  62. }
  63. try {
  64. // Test OpenSearch connection
  65. [$connected, $message] = $this->openSearchService->testConnection();
  66. if (!$connected) {
  67. $this->error($message);
  68. Log::error($message);
  69. return 1;
  70. }
  71. $overallStatus = 0; // Track overall command status (0 for success, 1 for any failure)
  72. $maxBookId = PaliText::max('book');
  73. if ($book === 0) {
  74. $booksId = range(1, $maxBookId);
  75. } else if ($this->option('resume')) {
  76. $booksId = range($book, $maxBookId);
  77. } else {
  78. $booksId = [$book];
  79. }
  80. foreach ($booksId as $key => $bookId) {
  81. $this->indexPaliParagraphs($bookId, $paragraph);
  82. }
  83. return $overallStatus;
  84. } catch (\Exception $e) {
  85. $this->error("Failed to index Pali data: " . $e->getMessage());
  86. Log::error("Failed to index Pali data for book: $book, granularity: " . ($granularity ?: 'all'), ['error' => $e]);
  87. return 1;
  88. }
  89. }
  90. /**
  91. *
  92. */
  93. protected function indexPaliParagraph($paraInfo, $paraContent, $related_id)
  94. {
  95. $paraId = $paraInfo['book'] . '_' . $paraInfo['paragraph'];
  96. $resource_id = $paraInfo['uid'];
  97. $path = json_decode($paraInfo['path']);
  98. if (is_array($path) && count($path) > 0) {
  99. $title = end($path)->title;
  100. } else {
  101. $title = '';
  102. }
  103. $document = [
  104. 'id' => "pali_para_{$paraId}",
  105. 'resource_id' => $resource_id, // Use uid from getPaliData for resource_id
  106. 'resource_type' => 'original_text',
  107. 'title' => [
  108. 'pali' => $title,
  109. ],
  110. 'summary' => [
  111. 'text' => $this->summary ? $this->summaryService->summarize($paraContent['markdown']) : ''
  112. ],
  113. 'content' => [
  114. 'pali' => $paraContent['markdown'],
  115. 'suggest' => $paraContent['words'],
  116. ],
  117. 'bold_single' => implode(' ', $paraContent['bold1']),
  118. 'bold_multi' => implode(' ', array_merge($paraContent['bold2'], $paraContent['bold3'])),
  119. 'related_id' => $related_id,
  120. 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
  121. 'language' => 'pali',
  122. 'updated_at' => now()->toIso8601String(),
  123. 'granularity' => 'paragraph',
  124. 'path' => $this->getPathTitle($path),
  125. ];
  126. if ($paraInfo['level'] < 8) {
  127. $document['title']['suggest'] = $paraContent['words'];
  128. }
  129. if ($this->isTest) {
  130. $this->info($document['title']['pali']);
  131. $this->info($document['summary']['text']);
  132. } else {
  133. $this->openSearchService->create($document['id'], $document);
  134. }
  135. return;
  136. }
  137. /**
  138. *
  139. */
  140. protected function indexPaliSession($paraInfo, $contents, $currChapter, $related_id)
  141. {
  142. $markdown = [];
  143. $text = [];
  144. $bold_single = [];
  145. $bold_multi = [];
  146. foreach ($contents as $key => $content) {
  147. $markdown[] = $content['markdown'];
  148. $text[] = $content['text'];
  149. $bold_single = array_merge($bold_single, $content['bold1']);
  150. $bold_multi = array_merge($bold_multi, $content['bold2'], $content['bold3']);
  151. }
  152. $document = [
  153. 'id' => "pali_session_{$related_id}",
  154. 'resource_id' => $paraInfo['uid'], // Use uid from getPaliData for resource_id
  155. 'resource_type' => 'original_text',
  156. 'title' => [
  157. 'pali' => "{$currChapter} paragraph {$paraInfo['paragraph']}"
  158. ],
  159. 'summary' => [
  160. 'text' => $this->summary ? $this->summaryService->summarize($content['markdown']) : ''
  161. ],
  162. 'content' => [
  163. 'pali' => implode("\n\n", $markdown),
  164. ],
  165. 'bold_single' => implode(" ", $bold_single),
  166. 'bold_multi' => implode(" ", $bold_multi),
  167. 'related_id' => $related_id,
  168. 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
  169. 'language' => 'pali',
  170. 'updated_at' => now()->toIso8601String(),
  171. 'granularity' => 'session',
  172. 'path' => $this->getPathTitle(json_decode($paraInfo['path'])),
  173. ];
  174. if ($this->isTest) {
  175. $this->info($document['title']['pali']);
  176. $this->info($document['summary']['text']);
  177. } else {
  178. $this->openSearchService->create($document['id'], $document);
  179. }
  180. return;
  181. }
  182. private function getPathTitle(array $input)
  183. {
  184. $output = [];
  185. foreach ($input as $key => $node) {
  186. $output[] = $node->title;
  187. }
  188. return implode('/', $output);
  189. }
  190. /**
  191. * Index Pali paragraphs for a given book.
  192. *
  193. * @param int $book
  194. * @return int
  195. */
  196. protected function indexPaliParagraphs($book, $paragraph)
  197. {
  198. $this->info("Starting to index paragraphs for book: $book");
  199. $total = 0;
  200. if ($paragraph) {
  201. $paragraphs = PaliText::where('book', $book)
  202. ->where('paragraph', $paragraph)
  203. ->orderBy('paragraph')->cursor();
  204. } else {
  205. $paragraphs = PaliText::where('book', $book)
  206. ->orderBy('paragraph')->cursor();
  207. }
  208. $headings = [];
  209. $currChapterTitle = '';
  210. $commentaryId = '';
  211. $currSession = [];
  212. foreach ($paragraphs as $key => $para) {
  213. $total++;
  214. if ($para->level < 8) {
  215. $currChapterTitle = $para->toc;
  216. }
  217. if ($para->class === 'nikaya') {
  218. $nikaya = $para->text;
  219. }
  220. $paraContent = $this->searchPaliDataService
  221. ->getParaContent($para['book'], $para['paragraph']);
  222. if (!empty($commentaryId)) {
  223. $currSession[] = $paraContent;
  224. }
  225. if (isset($paraContent['commentary'])) {
  226. if (!empty($commentaryId)) {
  227. //保存 session
  228. $this->indexPaliSession($para->toArray(), $currSession, $currChapterTitle, $commentaryId);
  229. $currSession = [];
  230. }
  231. $commentaryId = $paraContent['commentary'];
  232. }
  233. $this->indexPaliParagraph($para->toArray(), $paraContent, $commentaryId);
  234. $this->info("{$para['book']}-[{$para['paragraph']}]-[{$commentaryId}]");
  235. usleep(10000);
  236. }
  237. $this->info("Successfully indexed $total paragraphs for book: $book");
  238. Log::info("Indexed $total paragraphs for book: $book");
  239. return 0;
  240. }
  241. /**
  242. * Index Pali suttas for a given book (placeholder for future implementation).
  243. *
  244. * @param int $book
  245. * @return int
  246. */
  247. protected function indexPaliSutta($book)
  248. {
  249. $this->warn("Sutta indexing is not yet implemented for book: $book");
  250. Log::warning("Sutta indexing not implemented for book: $book");
  251. return 1;
  252. }
  253. /**
  254. * Index Pali sentences for a given book (placeholder for future implementation).
  255. *
  256. * @param int $book
  257. * @return int
  258. */
  259. protected function indexPaliSentences($book)
  260. {
  261. $this->warn("Sentence indexing is not yet implemented for book: $book");
  262. Log::warning("Sentence indexing not implemented for book: $book");
  263. return 1;
  264. }
  265. }