SearchPaliDataService.php 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. <?php
  2. namespace App\Services;
  3. use App\Models\BookTitle;
  4. use App\Models\WbwTemplate;
  5. use App\Models\PaliText;
  6. use App\Models\PaliSentence;
  7. class SearchPaliDataService
  8. {
  9. /**
  10. * Retrieve paginated Pali data for search.
  11. *
  12. * @param int $book
  13. * @param int $start
  14. * @param int $pageSize
  15. * @return array
  16. */
  17. public function getPaliData($book, $start = 1, $pageSize = null)
  18. {
  19. $maxParagraph = WbwTemplate::where('book', $book)->max('paragraph');
  20. $output = [];
  21. $pageSize = $pageSize === null ? $maxParagraph : $pageSize;
  22. // Calculate end paragraph for pagination
  23. $endOfPara = min($start + $pageSize, $maxParagraph + 1);
  24. for ($iPara = $start; $iPara < $endOfPara; $iPara++) {
  25. $content = $this->getParaContent($book, $iPara);
  26. // Retrieve book ID
  27. $pcd_book = BookTitle::where('book', $book)
  28. ->where('paragraph', '<=', $iPara)
  29. ->orderBy('paragraph', 'desc')
  30. ->first();
  31. $pcd_book_id = $pcd_book ? $pcd_book->sn : BookTitle::where('book', $book)
  32. ->orderBy('paragraph')
  33. ->value('sn');
  34. $output[] = [
  35. 'uid' => PaliText::where('book', $book)->where('paragraph', $iPara)->value('uid'),
  36. 'book' => $book,
  37. 'paragraph' => $iPara,
  38. 'bold1' => implode(' ', $content['bold1']),
  39. 'bold2' => implode(' ', $content['bold2']),
  40. 'bold3' => implode(' ', $content['bold3']),
  41. 'content' => $content['markdown'],
  42. 'markdown' => $content['markdown'],
  43. 'text' => $content['text'],
  44. 'pcd_book_id' => $pcd_book_id
  45. ];
  46. }
  47. return ['rows' => $output, 'count' => $maxParagraph];
  48. }
  49. /**
  50. * Generate content string for a given book and paragraph.
  51. *
  52. * @param int $book
  53. * @param int $para
  54. * @return string
  55. */
  56. private function getContent($book, $para)
  57. {
  58. $words = WbwTemplate::where('book', $book)
  59. ->where('paragraph', $para)
  60. ->where('type', '<>', '.ctl.')
  61. ->orderBy('wid')
  62. ->get();
  63. $content = '';
  64. foreach ($words as $word) {
  65. if ($word->style === 'bld') {
  66. if (strpos($word->word, '{') === false) {
  67. $content .= "**{$word->word}** ";
  68. } else {
  69. $content .= str_replace(['{', '}'], ['**', '** '], $word->word);
  70. }
  71. } elseif ($word->style === 'note') {
  72. $content .= " _{$word->word}_ ";
  73. } else {
  74. $content .= $word->word . ' ';
  75. }
  76. }
  77. return trim($content);
  78. }
  79. /**
  80. * Generate paragraph sentence list for a given book and paragraph.
  81. *
  82. * @param int $book
  83. * @param int $para
  84. * @return array $sentences
  85. */
  86. public function getParaContent($book, $para)
  87. {
  88. $sentences = PaliSentence::where('book', $book)
  89. ->where('paragraph', $para)
  90. ->orderBy('word_begin')
  91. ->get();
  92. if (!$sentences) {
  93. return null;
  94. }
  95. $markdown = [];
  96. $text = [];
  97. $wordList = [];
  98. foreach ($sentences as $key => $sentence) {
  99. $content = $this->getSentenceContent($book, $para, $sentence->word_begin, $sentence->word_end);
  100. $markdown[] = $content['markdown'];
  101. $text[] = $content['text'];
  102. $wordList = array_merge($wordList, $content['words']);
  103. }
  104. // Retrieve bold words
  105. $words = WbwTemplate::where('book', $book)
  106. ->where('paragraph', $para)
  107. ->orderBy('wid')
  108. ->get();
  109. $bold1 = [];
  110. $bold2 = [];
  111. $bold3 = [];
  112. $currBold = [];
  113. foreach ($words as $word) {
  114. if ($word->type === '.ctl.') {
  115. //检测义注段落号
  116. if (preg_match('/^para\d+_[a-zA-Z].*$/', $word->real)) {
  117. $commentary = $word->real;
  118. }
  119. } else {
  120. if ($word->style === 'bld') {
  121. $currBold[] = $word->real;
  122. } else {
  123. $countBold = count($currBold);
  124. if ($countBold === 1) {
  125. $bold1[] = $currBold[0];
  126. } elseif ($countBold === 2) {
  127. $bold2 = array_merge($bold2, $currBold);
  128. } elseif ($countBold > 0) {
  129. $bold3 = array_merge($bold3, $currBold);
  130. }
  131. $currBold = [];
  132. }
  133. }
  134. }
  135. $data = [
  136. 'markdown' => implode("\n", $markdown),
  137. 'text' => implode(" ", $text),
  138. 'words' => $wordList,
  139. 'bold1' => $bold1,
  140. 'bold2' => $bold2,
  141. 'bold3' => $bold3,
  142. ];
  143. if (isset($commentary)) {
  144. $data['commentary'] = $commentary;
  145. }
  146. return $data;
  147. }
  148. /**
  149. * Generate paragraph sentence list for a given book and paragraph.
  150. *
  151. * @param int $book
  152. * @param int $para
  153. * @return array $sentence
  154. */
  155. public function getSentenceContent($book, $para, $start, $end)
  156. {
  157. $words = WbwTemplate::where('book', $book)
  158. ->where('paragraph', $para)
  159. ->where('type', '<>', '.ctl.')
  160. ->whereBetween('wid', [$start, $end])
  161. ->orderBy('wid')
  162. ->get();
  163. $arrText = [];
  164. $markdown = '';
  165. $wordList = [];
  166. foreach ($words as $word) {
  167. $arrText[] = str_replace(['{', '}'], ['', ''], $word->word);
  168. $wordList[] = $word->real;
  169. if ($word->style === 'bld') {
  170. if (strpos($word->word, '{') === false) {
  171. $markdown .= "**{$word->word}** ";
  172. } else {
  173. $markdown .= str_replace(['{', '}'], ['**', '**'], $word->word) . ' ';
  174. }
  175. } elseif ($word->style === 'note') {
  176. $markdown .= " ~~{$word->word}~~ ";
  177. } else {
  178. $markdown .= $word->word . ' ';
  179. }
  180. }
  181. //去掉多于的空格
  182. $markdown = $this->removeSpace($markdown);
  183. //合并连续的黑体
  184. $markdown = str_replace(['~~ ~~', '** **'], [' ', ' '], $markdown);
  185. $text = $this->removeSpace(implode(' ', $arrText));
  186. return [
  187. 'markdown' => $this->abbrReplace(trim($markdown)),
  188. 'text' => $this->abbrReplace($text),
  189. 'words' => $wordList,
  190. ];
  191. }
  192. private function removeSpace(string $input)
  193. {
  194. return str_replace(
  195. [' ti', ' ,', ' .', ' ?', ' ;', '[ ', ' ]', '( ', ' )', '‘ ‘ ', ' ’ ’'],
  196. ['ti', ',', '.', '?', ';', '[', ']', '(', ')', '‘‘', '’’'],
  197. $input
  198. );
  199. }
  200. private function abbrReplace($input)
  201. {
  202. $abbr = ['sī .', 'syā .', 'kaṃ .', 'pī .'];
  203. $abbrTo = ['sī.', 'syā.', 'kaṃ.', 'pī.'];
  204. return str_replace($abbr, $abbrTo, $input);
  205. }
  206. }