2
0

IndexTipitaka.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Services\SearchPaliDataService;
  5. use App\Services\OpenSearchService;
  6. use App\Services\SummaryService;
  7. use App\Services\TagService;
  8. use Illuminate\Support\Facades\Log;
  9. use App\Models\PaliText;
  10. use App\Models\Sentence;
  11. use App\Services\PaliContentService;
  12. use App\Http\Api\ChannelApi;
  13. use App\Models\ProgressChapter;
  14. class IndexTipitaka extends Command
  15. {
  16. /**
  17. * The name and signature of the console command.
  18. * php artisan opensearch:index-tipitaka 93 --para=6 --granularity=chapter
  19. * @var string
  20. */
  21. protected $signature = 'opensearch:index-tipitaka
  22. {book : The book ID to index data for}
  23. {--para= : index paragraph No. omit to all}
  24. {--channel= : index channel id omit to all}
  25. {--test}
  26. {--summary=on}
  27. {--resume}
  28. {--granularity=all : The granularity to index (paragraph, sutta, sentence; omit to index all)}';
  29. /**
  30. * The console command description.
  31. *
  32. * @var string
  33. */
  34. protected $description = 'Index Pali data into OpenSearch for a specified book and optional granularity (all granularities if not specified)';
  35. private $isTest = false;
  36. private $summary = false;
  37. /**
  38. * Create a new command instance.
  39. *
  40. * @return void
  41. */
  42. public function __construct(
  43. protected SearchPaliDataService $searchPaliDataService,
  44. protected OpenSearchService $openSearchService,
  45. protected SummaryService $summaryService,
  46. protected TagService $tagService
  47. ) {
  48. parent::__construct();
  49. }
  50. /**
  51. * Execute the console command.
  52. *
  53. * @return int
  54. */
  55. public function handle()
  56. {
  57. $this->line('index tipitaka start');
  58. $book = (int)$this->argument('book');
  59. $paragraph = $this->option('para');
  60. $channel = $this->option('channel');
  61. if ($channel) {
  62. $this->line('channel=' . $channel);
  63. }
  64. $granularity = $this->option('granularity');
  65. $this->summary = $this->option('summary') === 'on';
  66. if ($this->option('test')) {
  67. $this->isTest = true;
  68. $this->info('test mode');
  69. }
  70. try {
  71. // Test OpenSearch connection
  72. [$connected, $message] = $this->openSearchService->testConnection();
  73. if (!$connected) {
  74. $this->error($message);
  75. Log::error($message);
  76. return 1;
  77. }
  78. $overallStatus = 0; // Track overall command status (0 for success, 1 for any failure)
  79. $maxBookId = PaliText::max('book');
  80. if ($book === 0) {
  81. $booksId = range(1, $maxBookId);
  82. } else if ($this->option('resume')) {
  83. $booksId = range($book, $maxBookId);
  84. } else {
  85. $booksId = [$book];
  86. }
  87. foreach ($booksId as $key => $bookId) {
  88. if (
  89. $this->option('granularity') === 'chapter' ||
  90. $this->option('granularity') === 'all'
  91. ) {
  92. $this->indexChapter($bookId, $channel);
  93. }
  94. if (
  95. $this->option('granularity') === 'paragraph' ||
  96. $this->option('granularity') === 'all'
  97. ) {
  98. $this->indexTipitakaParagraph($bookId, $paragraph);
  99. }
  100. }
  101. return $overallStatus;
  102. } catch (\Exception $e) {
  103. $this->error("Failed to index Pali data: " . $e->getMessage());
  104. Log::error("Failed to index Pali data for book: $book, granularity: " . ($granularity ?: 'all'), ['error' => $e]);
  105. return 1;
  106. }
  107. }
  108. /**
  109. * Index Pali paragraphs for a given book.
  110. *
  111. * @param int $book
  112. * @return int
  113. */
  114. protected function indexTipitakaParagraph($book, $paragraph = null)
  115. {
  116. $this->info("Starting to index paragraphs for book: $book");
  117. $total = 0;
  118. if ($paragraph) {
  119. $paragraphs = PaliText::where('book', $book)
  120. ->where('paragraph', $paragraph)
  121. ->orderBy('paragraph')->cursor();
  122. } else {
  123. $paragraphs = PaliText::where('book', $book)
  124. ->orderBy('paragraph')->cursor();
  125. }
  126. $bookUid = PaliText::where('book', $book)->where('level', 1)->first()->uid;
  127. $category = $this->tagService->getTagsName($bookUid);
  128. $headings = [];
  129. $currChapterTitle = '';
  130. $commentaryId = '';
  131. $currSession = [];
  132. foreach ($paragraphs as $key => $para) {
  133. $total++;
  134. if ($para->level < 8) {
  135. $currChapterTitle = $para->toc;
  136. }
  137. if ($para->class === 'nikaya') {
  138. $nikaya = $para->text;
  139. }
  140. $paraContent = $this->searchPaliDataService
  141. ->getParaContent($para['book'], $para['paragraph']);
  142. if (!empty($commentaryId)) {
  143. $currSession[] = $paraContent;
  144. }
  145. if (isset($paraContent['commentary'])) {
  146. if (!empty($commentaryId)) {
  147. //保存 session
  148. $this->indexPaliSession($para->toArray(), $currSession, $currChapterTitle, $commentaryId);
  149. $currSession = [];
  150. }
  151. $commentaryId = $paraContent['commentary'];
  152. }
  153. $this->indexParagraph($para->toArray(), $paraContent, $commentaryId, $category);
  154. $this->info("{$para['book']}-[{$para['paragraph']}]-[{$commentaryId}]");
  155. }
  156. $this->info("Successfully indexed $total paragraphs for book: $book");
  157. Log::info("Indexed $total paragraphs for book: $book");
  158. return 0;
  159. }
  160. /**
  161. *
  162. */
  163. protected function indexParagraph($paraInfo, $paraContent, $related_id, array $category)
  164. {
  165. $paraId = $paraInfo['book'] . '-' . $paraInfo['paragraph'];
  166. $resource_id = $paraInfo['uid'];
  167. $path = json_decode($paraInfo['path']);
  168. if (is_array($path) && count($path) > 0) {
  169. $title = end($path)->title;
  170. } else {
  171. $title = '';
  172. }
  173. $document = [
  174. 'id' => "tipitaka_paragraph_pi_{$paraId}",
  175. 'resource_id' => $resource_id, // Use uid from getPaliData for resource_id
  176. 'resource_type' => 'tipitaka',
  177. 'title' => [
  178. 'text' => ['pali' => $title,],
  179. ],
  180. 'summary' => [
  181. 'text' => $this->summary ? $this->summaryService->summarize($paraContent['markdown']) : ''
  182. ],
  183. 'content' => [
  184. 'text' => ['pali' => $paraContent['text']],
  185. 'suggest' => ['pali' => $paraContent['words']],
  186. ],
  187. 'bold_single' => implode(' ', $paraContent['bold1']),
  188. 'bold_multi' => implode(' ', array_merge($paraContent['bold2'], $paraContent['bold3'])),
  189. 'related_id' => $paraId,
  190. 'category' => $category, // Assuming Pali paragraphs are sutta; adjust as needed
  191. 'language' => 'pi',
  192. 'updated_at' => now()->toIso8601String(),
  193. 'granularity' => 'paragraph',
  194. 'path' => $this->getPathTitle($path),
  195. ];
  196. if ($paraInfo['level'] < 8) {
  197. $document['title']['suggest']['pali'] = $paraContent['words'];
  198. }
  199. if ($this->isTest) {
  200. $this->info($document['title']['text']['pali']);
  201. $this->info($document['summary']['text']);
  202. } else {
  203. $this->openSearchService->create($document['id'], $document);
  204. }
  205. return;
  206. }
  207. /**
  208. *
  209. */
  210. protected function indexPaliSession($paraInfo, $contents, $currChapter, $related_id)
  211. {
  212. $markdown = [];
  213. $text = [];
  214. $bold_single = [];
  215. $bold_multi = [];
  216. foreach ($contents as $key => $content) {
  217. $markdown[] = $content['markdown'];
  218. $text[] = $content['text'];
  219. $bold_single = array_merge($bold_single, $content['bold1']);
  220. $bold_multi = array_merge($bold_multi, $content['bold2'], $content['bold3']);
  221. }
  222. $document = [
  223. 'id' => "pali_session_{$related_id}",
  224. 'resource_id' => $paraInfo['uid'], // Use uid from getPaliData for resource_id
  225. 'resource_type' => 'original_text',
  226. 'title' => [
  227. ['text' => ['pali' => "{$currChapter} paragraph {$paraInfo['paragraph']}"]]
  228. ],
  229. 'summary' => [
  230. 'text' => $this->summary ? $this->summaryService->summarize($content['markdown']) : ''
  231. ],
  232. 'content' => [
  233. ['text' => ['pali' => implode("\n\n", $markdown)]]
  234. ],
  235. 'bold_single' => implode(" ", $bold_single),
  236. 'bold_multi' => implode(" ", $bold_multi),
  237. 'related_id' => $related_id,
  238. 'category' => 'pali', // Assuming Pali paragraphs are sutta; adjust as needed
  239. 'language' => 'pali',
  240. 'updated_at' => now()->toIso8601String(),
  241. 'granularity' => 'session',
  242. 'path' => $this->getPathTitle(json_decode($paraInfo['path'])),
  243. ];
  244. if ($this->isTest) {
  245. $this->info($document['title']['pali']);
  246. $this->info($document['summary']['text']);
  247. } else {
  248. $this->openSearchService->create($document['id'], $document);
  249. }
  250. return;
  251. }
  252. /**
  253. * Index Pali suttas for a given book (placeholder for future implementation).
  254. *
  255. * @param int $book
  256. * @param ?string $channel
  257. * @return int
  258. */
  259. protected function indexChapter($book, $channelId = null)
  260. {
  261. $this->info("Starting to index paragraphs for book: $book");
  262. $total = 0;
  263. $chapters = PaliText::where('book', $book)
  264. ->where('level', '<', 8)
  265. ->orderBy('paragraph')->get();
  266. foreach ($chapters as $key => $chapter) {
  267. if ($chapter->level === 1) {
  268. $category = $this->tagService->getTagsName($chapter->uid);
  269. }
  270. /**
  271. * 章节的起始位置算法
  272. * 从章节的标题,到下一个章节的标题之间
  273. */
  274. $start = $chapter->paragraph;
  275. if ($key === count($chapters) - 1) {
  276. $end = PaliText::where('book', $book)
  277. ->orderBy('paragraph', 'desc')->first()
  278. ->value('paragraph');
  279. } else {
  280. $end = $chapters[$key + 1]->paragraph - 1;
  281. }
  282. //获取这个段落之间的全部channel
  283. $table = Sentence::where('book_id', $book)
  284. ->whereBetween('paragraph', [$start, $end]);
  285. if ($channelId) {
  286. $table = $table->where('channel_uid', $channelId);
  287. }
  288. $channels = $table->select('channel_uid')
  289. ->groupBy('channel_uid')->get();
  290. $this->info("index chapter start={$start} end={$end}");
  291. foreach ($channels as $channel) {
  292. $display = [];
  293. $content = [];
  294. $channelInfo = ChannelApi::getById($channel->channel_uid);
  295. if (!$channelInfo) {
  296. Log::error('invalid channel', ['id' => $channel->channel_uid]);
  297. continue;
  298. }
  299. $this->info('channel =' . $channelInfo['name']);
  300. if ($channelInfo['type'] === 'wbw') {
  301. $this->info('wbw channel skip');
  302. continue;
  303. }
  304. $paragraphsData = app(PaliContentService::class)->paragraphs(
  305. $book,
  306. $start,
  307. $end,
  308. [$channel->channel_uid],
  309. ['mode' => 'read', 'format' => 'html', 'original' => false]
  310. );
  311. //生成html数据
  312. $title = '';
  313. foreach ($paragraphsData as $paragraph) {
  314. $translation = [];
  315. $original = [];
  316. foreach ($paragraph['children'] as $sent) {
  317. $sid = "{$sent['book']}-{$sent['para']}-{$sent['wordStart']}-{$sent['wordEnd']}";
  318. if (isset($sent['translation'])) {
  319. foreach ($sent['translation'] as $tran) {
  320. if ($tran['channel']['id'] === $channel->channel_uid) {
  321. $html = $tran['html'] ?? $tran['content'];
  322. $translation[] = "<div class='sentence' data-sid='{$sid}'>{$html}</div>";
  323. if ($tran['para'] === $start && !empty($html)) {
  324. $title = $html;
  325. }
  326. }
  327. }
  328. }
  329. if (
  330. isset($sent['origin']) ||
  331. is_array($sent['origin']) ||
  332. count($sent['origin']) > 0
  333. ) {
  334. foreach ($sent['origin'] as $origin) {
  335. if ($origin['channel']['id'] === $channel->channel_uid) {
  336. $html = $origin['html'] ?? $origin['content'];
  337. $original[] = "<div class='sentence origin' data-sid='{$sid}'>{$html}</div>";
  338. if (empty($title) && $origin['para'] === $start && !empty($html)) {
  339. $title = $html;
  340. }
  341. }
  342. }
  343. }
  344. }
  345. $level = $paragraph['para'] === $start ? $chapter->level : 0;
  346. $strOriginal = implode('', $original);
  347. $strTranslation = implode('', $translation);
  348. if ($channelInfo['type'] === 'original') {
  349. $htmlContent = $strOriginal;
  350. } else {
  351. $htmlContent = $strTranslation;
  352. }
  353. $area = $channelInfo['type'] === 'original' ? 'original' : 'translation';
  354. if ($level > 0) {
  355. $display[] = "<div class='{$area}' data-para='{$paragraph['para']}'><h{$level}>{$htmlContent}</h{$level}></div>";
  356. } else {
  357. $display[] = "<div class='{$area}' data-para='{$paragraph['para']}'><div class='para-block'>{$htmlContent}</div></div>";
  358. }
  359. }
  360. $this->chapterSave([
  361. 'book' => $book,
  362. 'para' => $start,
  363. 'level' => $chapter->level,
  364. 'channel' => $channel->channel_uid,
  365. 'content' => implode('', $display),
  366. 'title' => strip_tags($title),
  367. 'cat' => $category ?? null
  368. ]);
  369. }
  370. }
  371. return 0;
  372. }
  373. protected function chapterSave(array $param)
  374. {
  375. $progress = ProgressChapter::where('book', $param['book'])
  376. ->where('para', $param['para'])
  377. ->where('channel_id', $param['channel'])
  378. ->first();
  379. $channel = ChannelApi::getById($param['channel']);
  380. $document = [
  381. 'id' => "tipitaka_chapter_{$param['book']}-{$param['para']}_{$param['channel']}",
  382. 'resource_id' => $progress ? $progress->uid : "{$param['book']}-{$param['para']}_{$param['channel']}",
  383. 'resource_type' => 'tipitaka',
  384. 'title' => [],
  385. 'summary' => [
  386. 'text' => '',
  387. ],
  388. 'content' => [],
  389. 'related_id' => "{$param['book']}-{$param['para']}",
  390. 'category' => $param['cat'],
  391. 'language' => $channel['lang'],
  392. 'updated_at' => now()->toIso8601String(),
  393. 'granularity' => $param['level'] === 1 ? 'book' : 'chapter',
  394. ];
  395. // TODO: 补充语言判断,将内容放入对应的 text.pali 或 text.zh 字段
  396. $plainText = strip_tags($param['content']);
  397. $title = strip_tags($param['title']);
  398. if (str_contains($channel['lang'], 'zh')) {
  399. $document['content']['text']['zh'] = $plainText;
  400. $document['title']['text']['zh'] = $title;
  401. } else {
  402. $document['content']['text']['pali'] = $plainText;
  403. $document['title']['text']['pali'] = $title;
  404. }
  405. $document['content']['display'] = $param['content']; // 展示
  406. if ($this->isTest) {
  407. $this->info($param['content']);
  408. } else {
  409. $this->openSearchService->create($document['id'], $document);
  410. $this->info("create index {$document['id']} size=" . strlen($param['content']));
  411. }
  412. }
  413. /**
  414. * Index Pali sentences for a given book (placeholder for future implementation).
  415. *
  416. * @param int $book
  417. * @return int
  418. */
  419. protected function indexPaliSentences($book)
  420. {
  421. $this->warn("Sentence indexing is not yet implemented for book: $book");
  422. Log::warning("Sentence indexing not implemented for book: $book");
  423. return 1;
  424. }
  425. private function getPathTitle(array $input)
  426. {
  427. $output = [];
  428. foreach ($input as $key => $node) {
  429. $output[] = $node->title;
  430. }
  431. return implode('/', $output);
  432. }
  433. }