IndexPaliText.php 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Services\SearchPaliDataService;
  5. use App\Services\OpenSearchService;
  6. use App\Services\SummaryService;
  7. use App\Services\TagService;
  8. use Illuminate\Support\Facades\Log;
  9. use App\Models\PaliText;
  10. class IndexPaliText extends Command
  11. {
  12. /**
  13. * The name and signature of the console command.
  14. * php artisan opensearch:index-pali 93 --para=6
  15. * @var string
  16. */
  17. protected $signature = 'opensearch:index-pali {book : The book ID to index data for}
  18. {--test}
  19. {--para= : index paragraph No. omit to all}
  20. {--summary=on}
  21. {--resume}
  22. {--granularity= : The granularity to index (paragraph, sutta, sentence; omit to index all)}';
  23. /**
  24. * The console command description.
  25. *
  26. * @var string
  27. */
  28. protected $description = 'Index Pali data into OpenSearch for a specified book and optional granularity (all granularities if not specified)';
  29. protected $searchPaliDataService;
  30. protected $openSearchService;
  31. protected $summaryService;
  32. protected $tagService;
  33. private $isTest = false;
  34. private $summary = false;
  35. /**
  36. * Create a new command instance.
  37. *
  38. * @return void
  39. */
  40. public function __construct(
  41. SearchPaliDataService $searchPaliDataService,
  42. OpenSearchService $openSearchService,
  43. SummaryService $summaryService,
  44. TagService $tagService
  45. ) {
  46. parent::__construct();
  47. $this->searchPaliDataService = $searchPaliDataService;
  48. $this->openSearchService = $openSearchService;
  49. $this->summaryService = $summaryService;
  50. $this->tagService = $tagService;
  51. }
  52. /**
  53. * Execute the console command.
  54. *
  55. * @return int
  56. */
  57. public function handle()
  58. {
  59. $book = (int)$this->argument('book');
  60. $granularity = $this->option('granularity');
  61. $paragraph = $this->option('para');
  62. $this->summary = $this->option('summary') === 'on';
  63. if ($this->option('test')) {
  64. $this->isTest = true;
  65. $this->info('test mode');
  66. }
  67. try {
  68. // Test OpenSearch connection
  69. [$connected, $message] = $this->openSearchService->testConnection();
  70. if (!$connected) {
  71. $this->error($message);
  72. Log::error($message);
  73. return 1;
  74. }
  75. $overallStatus = 0; // Track overall command status (0 for success, 1 for any failure)
  76. $maxBookId = PaliText::max('book');
  77. if ($book === 0) {
  78. $booksId = range(1, $maxBookId);
  79. } else if ($this->option('resume')) {
  80. $booksId = range($book, $maxBookId);
  81. } else {
  82. $booksId = [$book];
  83. }
  84. foreach ($booksId as $key => $bookId) {
  85. $this->indexTipitakaParagraphs($bookId, $paragraph);
  86. }
  87. return $overallStatus;
  88. } catch (\Exception $e) {
  89. $this->error("Failed to index Pali data: " . $e->getMessage());
  90. Log::error("Failed to index Pali data for book: $book, granularity: " . ($granularity ?: 'all'), ['error' => $e]);
  91. return 1;
  92. }
  93. }
  94. /**
  95. * Index Pali paragraphs for a given book.
  96. *
  97. * @param int $book
  98. * @return int
  99. */
  100. protected function indexTipitakaParagraphs($book, $paragraph = null)
  101. {
  102. $this->info("Starting to index paragraphs for book: $book");
  103. $total = 0;
  104. if ($paragraph) {
  105. $paragraphs = PaliText::where('book', $book)
  106. ->where('paragraph', $paragraph)
  107. ->orderBy('paragraph')->cursor();
  108. } else {
  109. $paragraphs = PaliText::where('book', $book)
  110. ->orderBy('paragraph')->cursor();
  111. }
  112. $bookUid = PaliText::where('book', $book)->where('level', 1)->first()->uid;
  113. $category = $this->tagService->getTagsName($bookUid);
  114. $headings = [];
  115. $currChapterTitle = '';
  116. $commentaryId = '';
  117. $currSession = [];
  118. foreach ($paragraphs as $key => $para) {
  119. $total++;
  120. if ($para->level < 8) {
  121. $currChapterTitle = $para->toc;
  122. }
  123. if ($para->class === 'nikaya') {
  124. $nikaya = $para->text;
  125. }
  126. $paraContent = $this->searchPaliDataService
  127. ->getParaContent($para['book'], $para['paragraph']);
  128. if (!empty($commentaryId)) {
  129. $currSession[] = $paraContent;
  130. }
  131. if (isset($paraContent['commentary'])) {
  132. if (!empty($commentaryId)) {
  133. //保存 session
  134. $this->indexPaliSession($para->toArray(), $currSession, $currChapterTitle, $commentaryId);
  135. $currSession = [];
  136. }
  137. $commentaryId = $paraContent['commentary'];
  138. }
  139. $this->indexParagraph($para->toArray(), $paraContent, $commentaryId, $category);
  140. $this->info("{$para['book']}-[{$para['paragraph']}]-[{$commentaryId}]");
  141. usleep(10000);
  142. }
  143. $this->info("Successfully indexed $total paragraphs for book: $book");
  144. Log::info("Indexed $total paragraphs for book: $book");
  145. return 0;
  146. }
  147. /**
  148. *
  149. */
  150. protected function indexParagraph($paraInfo, $paraContent, $related_id, array $category)
  151. {
  152. $paraId = $paraInfo['book'] . '-' . $paraInfo['paragraph'];
  153. $resource_id = $paraInfo['uid'];
  154. $path = json_decode($paraInfo['path']);
  155. if (is_array($path) && count($path) > 0) {
  156. $title = end($path)->title;
  157. } else {
  158. $title = '';
  159. }
  160. $document = [
  161. 'id' => "tipitaka_paragraph_pi_{$paraId}",
  162. 'resource_id' => $resource_id, // Use uid from getPaliData for resource_id
  163. 'resource_type' => 'tipitaka',
  164. 'title' => [
  165. 'pali' => $title,
  166. ],
  167. 'summary' => [
  168. 'text' => $this->summary ? $this->summaryService->summarize($paraContent['markdown']) : ''
  169. ],
  170. 'content' => [
  171. 'pali' => $paraContent['text'],
  172. 'suggest' => $paraContent['words'],
  173. ],
  174. 'bold_single' => implode(' ', $paraContent['bold1']),
  175. 'bold_multi' => implode(' ', array_merge($paraContent['bold2'], $paraContent['bold3'])),
  176. 'related_id' => $paraId,
  177. 'category' => $category, // Assuming Pali paragraphs are sutta; adjust as needed
  178. 'language' => 'pi',
  179. 'updated_at' => now()->toIso8601String(),
  180. 'granularity' => 'paragraph',
  181. 'path' => $this->getPathTitle($path),
  182. ];
  183. if ($paraInfo['level'] < 8) {
  184. $document['title']['suggest'] = $paraContent['words'];
  185. }
  186. if ($this->isTest) {
  187. $this->info($document['title']['pali']);
  188. $this->info($document['summary']['text']);
  189. } else {
  190. $this->openSearchService->create($document['id'], $document);
  191. }
  192. return;
  193. }
  194. /**
  195. *
  196. */
  197. protected function indexPaliSession($paraInfo, $contents, $currChapter, $related_id)
  198. {
  199. $markdown = [];
  200. $text = [];
  201. $bold_single = [];
  202. $bold_multi = [];
  203. foreach ($contents as $key => $content) {
  204. $markdown[] = $content['markdown'];
  205. $text[] = $content['text'];
  206. $bold_single = array_merge($bold_single, $content['bold1']);
  207. $bold_multi = array_merge($bold_multi, $content['bold2'], $content['bold3']);
  208. }
  209. $document = [
  210. 'id' => "pali_session_{$related_id}",
  211. 'resource_id' => $paraInfo['uid'], // Use uid from getPaliData for resource_id
  212. 'resource_type' => 'original_text',
  213. 'title' => [
  214. 'pali' => "{$currChapter} paragraph {$paraInfo['paragraph']}"
  215. ],
  216. 'summary' => [
  217. 'text' => $this->summary ? $this->summaryService->summarize($content['markdown']) : ''
  218. ],
  219. 'content' => [
  220. 'pali' => implode("\n\n", $markdown),
  221. ],
  222. 'bold_single' => implode(" ", $bold_single),
  223. 'bold_multi' => implode(" ", $bold_multi),
  224. 'related_id' => $related_id,
  225. 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
  226. 'language' => 'pali',
  227. 'updated_at' => now()->toIso8601String(),
  228. 'granularity' => 'session',
  229. 'path' => $this->getPathTitle(json_decode($paraInfo['path'])),
  230. ];
  231. if ($this->isTest) {
  232. $this->info($document['title']['pali']);
  233. $this->info($document['summary']['text']);
  234. } else {
  235. $this->openSearchService->create($document['id'], $document);
  236. }
  237. return;
  238. }
  239. /**
  240. * Index Pali suttas for a given book (placeholder for future implementation).
  241. *
  242. * @param int $book
  243. * @return int
  244. */
  245. protected function indexPaliSutta($book)
  246. {
  247. $this->warn("Sutta indexing is not yet implemented for book: $book");
  248. Log::warning("Sutta indexing not implemented for book: $book");
  249. return 1;
  250. }
  251. /**
  252. * Index Pali sentences for a given book (placeholder for future implementation).
  253. *
  254. * @param int $book
  255. * @return int
  256. */
  257. protected function indexPaliSentences($book)
  258. {
  259. $this->warn("Sentence indexing is not yet implemented for book: $book");
  260. Log::warning("Sentence indexing not implemented for book: $book");
  261. return 1;
  262. }
  263. private function getPathTitle(array $input)
  264. {
  265. $output = [];
  266. foreach ($input as $key => $node) {
  267. $output[] = $node->title;
  268. }
  269. return implode('/', $output);
  270. }
  271. }