IndexTipitaka.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Services\SearchPaliDataService;
  5. use App\Services\OpenSearchService;
  6. use App\Services\SummaryService;
  7. use App\Services\TagService;
  8. use Illuminate\Support\Facades\Log;
  9. use App\Models\PaliText;
  10. use App\Models\Sentence;
  11. use App\Services\PaliContentService;
  12. use App\Http\Api\ChannelApi;
  13. use App\Models\ProgressChapter;
  14. class IndexTipitaka extends Command
  15. {
  16. /**
  17. * The name and signature of the console command.
  18. * php artisan opensearch:index-tipitaka 93 --para=6 --granularity=chapter
  19. * @var string
  20. */
  21. protected $signature = 'opensearch:index-tipitaka {book : The book ID to index data for}
  22. {--test}
  23. {--para= : index paragraph No. omit to all}
  24. {--summary=on}
  25. {--resume}
  26. {--granularity=all : The granularity to index (paragraph, sutta, sentence; omit to index all)}';
  27. /**
  28. * The console command description.
  29. *
  30. * @var string
  31. */
  32. protected $description = 'Index Pali data into OpenSearch for a specified book and optional granularity (all granularities if not specified)';
  33. private $isTest = false;
  34. private $summary = false;
  35. /**
  36. * Create a new command instance.
  37. *
  38. * @return void
  39. */
  40. public function __construct(
  41. protected SearchPaliDataService $searchPaliDataService,
  42. protected OpenSearchService $openSearchService,
  43. protected SummaryService $summaryService,
  44. protected TagService $tagService
  45. ) {
  46. parent::__construct();
  47. }
  48. /**
  49. * Execute the console command.
  50. *
  51. * @return int
  52. */
  53. public function handle()
  54. {
  55. $book = (int)$this->argument('book');
  56. $granularity = $this->option('granularity');
  57. $paragraph = $this->option('para');
  58. $this->summary = $this->option('summary') === 'on';
  59. if ($this->option('test')) {
  60. $this->isTest = true;
  61. $this->info('test mode');
  62. }
  63. try {
  64. // Test OpenSearch connection
  65. [$connected, $message] = $this->openSearchService->testConnection();
  66. if (!$connected) {
  67. $this->error($message);
  68. Log::error($message);
  69. return 1;
  70. }
  71. $overallStatus = 0; // Track overall command status (0 for success, 1 for any failure)
  72. $maxBookId = PaliText::max('book');
  73. if ($book === 0) {
  74. $booksId = range(1, $maxBookId);
  75. } else if ($this->option('resume')) {
  76. $booksId = range($book, $maxBookId);
  77. } else {
  78. $booksId = [$book];
  79. }
  80. foreach ($booksId as $key => $bookId) {
  81. if (
  82. $this->option('granularity') === 'chapter' ||
  83. $this->option('granularity') === 'all'
  84. ) {
  85. $this->indexChapter($bookId);
  86. }
  87. if (
  88. $this->option('granularity') === 'paragraph' ||
  89. $this->option('granularity') === 'all'
  90. ) {
  91. $this->indexTipitakaParagraph($bookId, $paragraph);
  92. }
  93. }
  94. return $overallStatus;
  95. } catch (\Exception $e) {
  96. $this->error("Failed to index Pali data: " . $e->getMessage());
  97. Log::error("Failed to index Pali data for book: $book, granularity: " . ($granularity ?: 'all'), ['error' => $e]);
  98. return 1;
  99. }
  100. }
  101. /**
  102. * Index Pali paragraphs for a given book.
  103. *
  104. * @param int $book
  105. * @return int
  106. */
  107. protected function indexTipitakaParagraph($book, $paragraph = null)
  108. {
  109. $this->info("Starting to index paragraphs for book: $book");
  110. $total = 0;
  111. if ($paragraph) {
  112. $paragraphs = PaliText::where('book', $book)
  113. ->where('paragraph', $paragraph)
  114. ->orderBy('paragraph')->cursor();
  115. } else {
  116. $paragraphs = PaliText::where('book', $book)
  117. ->orderBy('paragraph')->cursor();
  118. }
  119. $bookUid = PaliText::where('book', $book)->where('level', 1)->first()->uid;
  120. $category = $this->tagService->getTagsName($bookUid);
  121. $headings = [];
  122. $currChapterTitle = '';
  123. $commentaryId = '';
  124. $currSession = [];
  125. foreach ($paragraphs as $key => $para) {
  126. $total++;
  127. if ($para->level < 8) {
  128. $currChapterTitle = $para->toc;
  129. }
  130. if ($para->class === 'nikaya') {
  131. $nikaya = $para->text;
  132. }
  133. $paraContent = $this->searchPaliDataService
  134. ->getParaContent($para['book'], $para['paragraph']);
  135. if (!empty($commentaryId)) {
  136. $currSession[] = $paraContent;
  137. }
  138. if (isset($paraContent['commentary'])) {
  139. if (!empty($commentaryId)) {
  140. //保存 session
  141. $this->indexPaliSession($para->toArray(), $currSession, $currChapterTitle, $commentaryId);
  142. $currSession = [];
  143. }
  144. $commentaryId = $paraContent['commentary'];
  145. }
  146. $this->indexParagraph($para->toArray(), $paraContent, $commentaryId, $category);
  147. $this->info("{$para['book']}-[{$para['paragraph']}]-[{$commentaryId}]");
  148. usleep(10000);
  149. }
  150. $this->info("Successfully indexed $total paragraphs for book: $book");
  151. Log::info("Indexed $total paragraphs for book: $book");
  152. return 0;
  153. }
  154. /**
  155. *
  156. */
  157. protected function indexParagraph($paraInfo, $paraContent, $related_id, array $category)
  158. {
  159. $paraId = $paraInfo['book'] . '-' . $paraInfo['paragraph'];
  160. $resource_id = $paraInfo['uid'];
  161. $path = json_decode($paraInfo['path']);
  162. if (is_array($path) && count($path) > 0) {
  163. $title = end($path)->title;
  164. } else {
  165. $title = '';
  166. }
  167. $document = [
  168. 'id' => "tipitaka_paragraph_pi_{$paraId}",
  169. 'resource_id' => $resource_id, // Use uid from getPaliData for resource_id
  170. 'resource_type' => 'tipitaka',
  171. 'title' => [
  172. 'pali' => $title,
  173. ],
  174. 'summary' => [
  175. 'text' => $this->summary ? $this->summaryService->summarize($paraContent['markdown']) : ''
  176. ],
  177. 'content' => [
  178. 'pali' => $paraContent['text'],
  179. 'suggest' => $paraContent['words'],
  180. ],
  181. 'bold_single' => implode(' ', $paraContent['bold1']),
  182. 'bold_multi' => implode(' ', array_merge($paraContent['bold2'], $paraContent['bold3'])),
  183. 'related_id' => $paraId,
  184. 'category' => $category, // Assuming Pali paragraphs are sutta; adjust as needed
  185. 'language' => 'pi',
  186. 'updated_at' => now()->toIso8601String(),
  187. 'granularity' => 'paragraph',
  188. 'path' => $this->getPathTitle($path),
  189. ];
  190. if ($paraInfo['level'] < 8) {
  191. $document['title']['suggest'] = $paraContent['words'];
  192. }
  193. if ($this->isTest) {
  194. $this->info($document['title']['pali']);
  195. $this->info($document['summary']['text']);
  196. } else {
  197. $this->openSearchService->create($document['id'], $document);
  198. }
  199. return;
  200. }
  201. /**
  202. *
  203. */
  204. protected function indexPaliSession($paraInfo, $contents, $currChapter, $related_id)
  205. {
  206. $markdown = [];
  207. $text = [];
  208. $bold_single = [];
  209. $bold_multi = [];
  210. foreach ($contents as $key => $content) {
  211. $markdown[] = $content['markdown'];
  212. $text[] = $content['text'];
  213. $bold_single = array_merge($bold_single, $content['bold1']);
  214. $bold_multi = array_merge($bold_multi, $content['bold2'], $content['bold3']);
  215. }
  216. $document = [
  217. 'id' => "pali_session_{$related_id}",
  218. 'resource_id' => $paraInfo['uid'], // Use uid from getPaliData for resource_id
  219. 'resource_type' => 'original_text',
  220. 'title' => [
  221. 'pali' => "{$currChapter} paragraph {$paraInfo['paragraph']}"
  222. ],
  223. 'summary' => [
  224. 'text' => $this->summary ? $this->summaryService->summarize($content['markdown']) : ''
  225. ],
  226. 'content' => [
  227. 'pali' => implode("\n\n", $markdown),
  228. ],
  229. 'bold_single' => implode(" ", $bold_single),
  230. 'bold_multi' => implode(" ", $bold_multi),
  231. 'related_id' => $related_id,
  232. 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
  233. 'language' => 'pali',
  234. 'updated_at' => now()->toIso8601String(),
  235. 'granularity' => 'session',
  236. 'path' => $this->getPathTitle(json_decode($paraInfo['path'])),
  237. ];
  238. if ($this->isTest) {
  239. $this->info($document['title']['pali']);
  240. $this->info($document['summary']['text']);
  241. } else {
  242. $this->openSearchService->create($document['id'], $document);
  243. }
  244. return;
  245. }
  246. /**
  247. * Index Pali suttas for a given book (placeholder for future implementation).
  248. *
  249. * @param int $book
  250. * @return int
  251. */
  252. protected function indexChapter($book)
  253. {
  254. $this->info("Starting to index paragraphs for book: $book");
  255. $total = 0;
  256. $chapters = PaliText::where('book', $book)
  257. ->where('level', '<', 8)
  258. ->orderBy('paragraph')->get();
  259. foreach ($chapters as $key => $chapter) {
  260. if ($chapter->level === 1) {
  261. $category = $this->tagService->getTagsName($chapter->uid);
  262. }
  263. /**
  264. * 章节的起始位置算法
  265. * 从章节的标题,到下一个章节的标题之间
  266. */
  267. $start = $chapter->paragraph;
  268. if ($key === count($chapters) - 1) {
  269. $end = PaliText::where('book', $book)
  270. ->orderBy('paragraph', 'desc')->first()
  271. ->value('paragraph');
  272. } else {
  273. $end = $chapters[$key + 1]->paragraph;
  274. }
  275. //获取这个段落之间的全部channel
  276. $channels = Sentence::where('book_id', $book)
  277. ->whereBetween('paragraph', [$start, $end])
  278. ->select('channel_uid')
  279. ->groupBy('channel_uid')->get();
  280. $this->info("index chapter start={$start} end={$end}");
  281. foreach ($channels as $key => $channel) {
  282. $display = [];
  283. $content = [];
  284. $channelInfo = ChannelApi::getById($channel->channel_uid);
  285. $this->info('channel =' . $channelInfo['name']);
  286. if ($channelInfo['type'] === 'wbw') {
  287. $this->info('wbw channel skip');
  288. continue;
  289. }
  290. $paragraphsData = app(PaliContentService::class)->paragraphs(
  291. $book,
  292. $start,
  293. $end,
  294. [$channel->channel_uid],
  295. ['mode' => 'read', 'format' => 'html', 'original' => true]
  296. );
  297. //生成html数据
  298. $title = '';
  299. foreach ($paragraphsData as $key => $paragraph) {
  300. $translation = [];
  301. $original = [];
  302. foreach ($paragraph['children'] as $key => $sent) {
  303. if (isset($sent['translation'])) {
  304. foreach ($sent['translation'] as $key => $tran) {
  305. $curr = $tran['html'] ?? $tran['content'];
  306. $translation[] = "<span class='sentence'>{$curr}</span>";
  307. if ($tran['para'] === $start && !empty($curr)) {
  308. $title = $curr;
  309. }
  310. }
  311. }
  312. if (
  313. isset($sent['origin']) ||
  314. is_array($sent['origin']) ||
  315. count($sent['origin']) > 0
  316. ) {
  317. $ori = $sent['origin'][0];
  318. $curr = $ori['html'] ?? $ori['content'];
  319. $original[] = "<span class='sentence origin'>{$curr}</span>";
  320. if (empty($title) && $ori['para'] === $start && !empty($curr)) {
  321. $title = $curr;
  322. }
  323. }
  324. }
  325. $level = $paragraph['para'] === $start ? $chapter->level : 0;
  326. $strOriginal = implode('', $original);
  327. $strTranslation = implode('', $translation);
  328. if ($level > 0) {
  329. $display[] = "<div><h{$level}>{$strOriginal}</h{$level}><h{$level}>{$strTranslation}</h{$level}></div>";
  330. } else {
  331. $display[] = "<div><p>{$strOriginal}</p><p>{$strTranslation}</p></div>";
  332. }
  333. if ($channelInfo['type'] === 'original') {
  334. $content[] = $strOriginal;
  335. } else {
  336. $content[] = $strTranslation;
  337. }
  338. }
  339. $this->chapterSave([
  340. 'book' => $book,
  341. 'para' => $start,
  342. 'channel' => $channel->channel_uid,
  343. 'display' => implode('', $display),
  344. 'content' => implode('', $content),
  345. 'title' => strip_tags($title),
  346. 'cat' => $category
  347. ]);
  348. }
  349. }
  350. return 0;
  351. }
  352. protected function chapterSave(array $param)
  353. {
  354. $progress = ProgressChapter::where('book', $param['book'])
  355. ->where('para', $param['para'])
  356. ->where('channel_id', $param['channel'])
  357. ->first();
  358. $channel = ChannelApi::getById($param['channel']);
  359. $document = [
  360. 'id' => "tipitaka_chapter_{$param['book']}-{$param['para']}_{$param['channel']}",
  361. 'resource_id' => $progress ? $progress->uid : "{$param['book']}-{$param['para']}_{$param['channel']}",
  362. 'resource_type' => 'tipitaka',
  363. 'title' => [],
  364. 'summary' => [
  365. 'text' => '',
  366. ],
  367. 'content' => [],
  368. 'related_id' => "{$param['book']}-{$param['para']}",
  369. 'category' => $param['cat'],
  370. 'language' => $channel['lang'],
  371. 'updated_at' => now()->toIso8601String(),
  372. 'granularity' => 'chapter',
  373. ];
  374. // TODO: 补充语言判断,将内容放入对应的 text.pali 或 text.zh 字段
  375. $plainText = strip_tags($param['content']);
  376. $title = strip_tags($param['title']);
  377. if (str_contains($channel['lang'], 'zh')) {
  378. $document['content']['text']['zh'] = $plainText;
  379. $document['title']['text']['zh'] = $title;
  380. } else {
  381. $document['content']['text']['pali'] = $plainText;
  382. $document['title']['text']['pali'] = $title;
  383. }
  384. $document['content']['display'] = $param['display']; // 展示
  385. if ($this->isTest) {
  386. $this->info($param['content']);
  387. } else {
  388. $this->openSearchService->create($document['id'], $document);
  389. $this->info("create index {$document['id']} size=" . strlen($param['content']));
  390. }
  391. }
  392. /**
  393. * Index Pali sentences for a given book (placeholder for future implementation).
  394. *
  395. * @param int $book
  396. * @return int
  397. */
  398. protected function indexPaliSentences($book)
  399. {
  400. $this->warn("Sentence indexing is not yet implemented for book: $book");
  401. Log::warning("Sentence indexing not implemented for book: $book");
  402. return 1;
  403. }
  404. private function getPathTitle(array $input)
  405. {
  406. $output = [];
  407. foreach ($input as $key => $node) {
  408. $output[] = $node->title;
  409. }
  410. return implode('/', $output);
  411. }
  412. }