PaliContentService.php 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. <?php
  2. // api-v8/app/Services/OpenSearchService.php
  3. namespace App\Services;
  4. use App\Models\Sentence;
  5. use App\Models\Channel;
  6. use App\Models\PaliText;
  7. use App\Models\WbwBlock;
  8. use App\Models\Wbw;
  9. use App\Models\Discussion;
  10. use App\Models\PaliSentence;
  11. use App\Models\SentSimIndex;
  12. use Illuminate\Support\Str;
  13. use Illuminate\Support\Facades\Cache;
  14. use App\Http\Api\MdRender;
  15. use App\Http\Api\SuggestionApi;
  16. use App\Http\Api\ChannelApi;
  17. use App\Http\Api\UserApi;
  18. use Illuminate\Support\Facades\Log;
  19. use Illuminate\Support\Arr;
  20. use App\Http\Api\StudioApi;
  21. class PaliContentService
  22. {
  23. protected $result = [
  24. "uid" => '',
  25. "title" => '',
  26. "path" => [],
  27. "sub_title" => '',
  28. "summary" => '',
  29. "content" => '',
  30. "content_type" => "html",
  31. "toc" => [],
  32. "status" => 30,
  33. "lang" => "",
  34. "created_at" => "",
  35. "updated_at" => "",
  36. ];
  37. protected $wbwChannels = [];
  38. //句子需要查询的列
  39. protected $selectCol = [
  40. 'uid',
  41. 'book_id',
  42. 'paragraph',
  43. 'word_start',
  44. "word_end",
  45. 'channel_uid',
  46. 'content',
  47. 'content_type',
  48. 'editor_uid',
  49. 'acceptor_uid',
  50. 'pr_edit_at',
  51. 'fork_at',
  52. 'create_time',
  53. 'modify_time',
  54. 'created_at',
  55. 'updated_at',
  56. ];
  57. protected $userUuid = null;
  58. protected $debug = [];
  59. public static function _sentCanReadCount($book, $para, $start, $end, $userUuid = null)
  60. {
  61. $keyCanRead = "/channel/can-read/";
  62. if ($userUuid) {
  63. $keyCanRead .= $userUuid;
  64. } else {
  65. $keyCanRead .= 'guest';
  66. }
  67. $channelCanRead = Cache::remember(
  68. $keyCanRead,
  69. config('mint.cache.expire'),
  70. function () use ($userUuid) {
  71. return ChannelApi::getCanReadByUser($userUuid);
  72. }
  73. );
  74. $channels = Sentence::where('book_id', $book)
  75. ->where('paragraph', $para)
  76. ->where('word_start', $start)
  77. ->where('word_end', $end)
  78. ->where('strlen', '<>', 0)
  79. ->whereIn('channel_uid', $channelCanRead)
  80. ->select('channel_uid')
  81. ->groupBy('channel_uid')
  82. ->get();
  83. $channelList = [];
  84. foreach ($channels as $key => $value) {
  85. # code...
  86. if (Str::isUuid($value->channel_uid)) {
  87. $channelList[] = $value->channel_uid;
  88. }
  89. }
  90. $simId = PaliSentence::where('book', $book)
  91. ->where('paragraph', $para)
  92. ->where('word_begin', $start)
  93. ->where('word_end', $end)
  94. ->value('id');
  95. if ($simId) {
  96. $output["simNum"] = SentSimIndex::where('sent_id', $simId)->value('count');
  97. } else {
  98. $output["simNum"] = 0;
  99. }
  100. $channelInfo = Channel::whereIn("uid", $channelList)->select('type')->get();
  101. $output["tranNum"] = 0;
  102. $output["nissayaNum"] = 0;
  103. $output["commNum"] = 0;
  104. $output["originNum"] = 0;
  105. foreach ($channelInfo as $key => $value) {
  106. # code...
  107. switch ($value->type) {
  108. case "translation":
  109. $output["tranNum"]++;
  110. break;
  111. case "nissaya":
  112. $output["nissayaNum"]++;
  113. break;
  114. case "commentary":
  115. $output["commNum"]++;
  116. break;
  117. case "original":
  118. $output["originNum"]++;
  119. break;
  120. }
  121. }
  122. return $output;
  123. }
  124. private function newSent($book, $para, $word_start, $word_end)
  125. {
  126. $sent = [
  127. "id" => "{$book}-{$para}-{$word_start}-{$word_end}",
  128. "book" => $book,
  129. "para" => $para,
  130. "wordStart" => $word_start,
  131. "wordEnd" => $word_end,
  132. "origin" => [],
  133. "translation" => [],
  134. "commentaries" => [],
  135. ];
  136. if ($book < 1000) {
  137. #生成channel 数量列表
  138. $sentId = "{$book}-{$para}-{$word_start}-{$word_end}";
  139. $channelCount = self::_sentCanReadCount($book, $para, $word_start, $word_end, $this->userUuid);
  140. $path = json_decode(PaliText::where('book', $book)->where('paragraph', $para)->value("path"), true);
  141. $sent["path"] = [];
  142. foreach ($path as $key => $value) {
  143. # code...
  144. $value['paliTitle'] = $value['title'];
  145. $sent["path"][] = $value;
  146. }
  147. $sent["tranNum"] = $channelCount['tranNum'];
  148. $sent["nissayaNum"] = $channelCount['nissayaNum'];
  149. $sent["commNum"] = $channelCount['commNum'];
  150. $sent["originNum"] = $channelCount['originNum'];
  151. $sent["simNum"] = $channelCount['simNum'];
  152. }
  153. return $sent;
  154. }
  155. /**
  156. * 根据句子库数据生成以段落为单位的文章内容
  157. * $record 句子数据
  158. * $mode read | edit | wbw
  159. * $indexChannel channel索引
  160. * $indexedHeading 标题索引 用于给段落加标题标签 <h1> ect.
  161. */
  162. public function makeContentObj($record, $mode, $indexChannel, $format = 'react')
  163. {
  164. $content = [];
  165. //获取句子编号列表
  166. $paraIndex = [];
  167. foreach ($record as $value) {
  168. $currSentId = "{$value->book_id}-{$value->paragraph}-{$value->word_start}-{$value->word_end}";
  169. $value->sid = "{$currSentId}_{$value->channel_uid}";
  170. $currParaId = "{$value->book_id}-{$value->paragraph}";
  171. if (!isset($paraIndex[$currParaId])) {
  172. $paraIndex[$currParaId] = [];
  173. }
  174. $paraIndex[$currParaId][] = $value;
  175. }
  176. $channelsId = array();
  177. foreach ($indexChannel as $channelId => $info) {
  178. $channelsId[] = $channelId;
  179. }
  180. array_pop($channelsId);
  181. //遍历列表查找每个句子的所有channel的数据,并填充
  182. $paragraphs = [];
  183. foreach ($paraIndex as $currParaId => $sentData) {
  184. $arrParaId = explode('-', $currParaId);
  185. $sentIndex = [];
  186. foreach ($sentData as $sent) {
  187. $currSentId = "{$sent->book_id}-{$sent->paragraph}-{$sent->word_start}-{$sent->word_end}";
  188. $sentIndex[$currSentId] = [$sent->book_id, $sent->paragraph, $sent->word_start, $sent->word_end];
  189. }
  190. $sentInPara = array_values($sentIndex);
  191. $paraProps = [
  192. 'book' => $arrParaId[0],
  193. 'para' => $arrParaId[1],
  194. 'channels' => $channelsId,
  195. 'sentences' => $sentInPara,
  196. 'mode' => $mode,
  197. 'children' => [],
  198. ];
  199. //建立段落里面的句子列表
  200. foreach ($sentIndex as $ids => $arrSentId) {
  201. $sentNode = $this->newSent($arrSentId[0], $arrSentId[1], $arrSentId[2], $arrSentId[3]);
  202. foreach ($indexChannel as $channelId => $info) {
  203. # code...
  204. $sid = "{$ids}_{$channelId}";
  205. if (isset($info->studio)) {
  206. $studioInfo = $info->studio;
  207. } else {
  208. $studioInfo = null;
  209. }
  210. $newSent = [
  211. "content" => "",
  212. "html" => "",
  213. "book" => $arrSentId[0],
  214. "para" => $arrSentId[1],
  215. "wordStart" => $arrSentId[2],
  216. "wordEnd" => $arrSentId[3],
  217. "channel" => [
  218. "name" => $info->name,
  219. "type" => $info->type,
  220. "id" => $info->uid,
  221. 'lang' => $info->lang,
  222. ],
  223. "studio" => $studioInfo,
  224. "updateAt" => "",
  225. "suggestionCount" => SuggestionApi::getCountBySent($arrSentId[0], $arrSentId[1], $arrSentId[2], $arrSentId[3], $channelId),
  226. ];
  227. $row = Arr::first($sentData, function ($value, $key) use ($sid) {
  228. return $value->sid === $sid;
  229. });
  230. if ($row) {
  231. $newSent['id'] = $row->uid;
  232. $newSent['content'] = $row->content;
  233. $newSent['contentType'] = $row->content_type;
  234. $newSent['html'] = '';
  235. $newSent["editor"] = UserApi::getByUuid($row->editor_uid);
  236. /**
  237. * TODO 刷库改数据
  238. * 旧版api没有更新updated_at所以造成旧版的数据updated_at数据比modify_time 要晚
  239. */
  240. $newSent['forkAt'] = $row->fork_at; //
  241. $newSent['updateAt'] = $row->updated_at; //
  242. $newSent['updateAt'] = date("Y-m-d H:i:s.", $row->modify_time / 1000) . ($row->modify_time % 1000) . " UTC";
  243. $newSent['createdAt'] = $row->created_at;
  244. if ($mode !== "read") {
  245. if (isset($row->acceptor_uid) && !empty($row->acceptor_uid)) {
  246. $newSent["acceptor"] = UserApi::getByUuid($row->acceptor_uid);
  247. $newSent["prEditAt"] = $row->pr_edit_at;
  248. }
  249. }
  250. switch ($info->type) {
  251. case 'wbw':
  252. case 'original':
  253. //
  254. // 在编辑模式下。
  255. // 如果是原文,查看是否有逐词解析数据,
  256. // 有的话优先显示。
  257. // 阅读模式直接显示html原文
  258. // 传过来的数据一定有一个原文channel
  259. //
  260. if ($mode === "read") {
  261. $newSent['content'] = "";
  262. $newSent['html'] = MdRender::render(
  263. $row->content,
  264. [$row->channel_uid],
  265. null,
  266. $mode,
  267. "translation",
  268. $row->content_type,
  269. $format
  270. );
  271. } else {
  272. if ($row->content_type === 'json') {
  273. $newSent['channel']['type'] = "wbw";
  274. if (isset($this->wbwChannels[0])) {
  275. $newSent['channel']['name'] = $indexChannel[$this->wbwChannels[0]]->name;
  276. $newSent['channel']['lang'] = $indexChannel[$this->wbwChannels[0]]->lang;
  277. $newSent['channel']['id'] = $this->wbwChannels[0];
  278. //存在一个translation channel
  279. //尝试查找逐词解析数据。找到,替换现有数据
  280. $wbwData = $this->getWbw(
  281. $arrSentId[0],
  282. $arrSentId[1],
  283. $arrSentId[2],
  284. $arrSentId[3],
  285. $this->wbwChannels[0]
  286. );
  287. if ($wbwData) {
  288. $newSent['content'] = $wbwData;
  289. $newSent['contentType'] = 'json';
  290. $newSent['html'] = "";
  291. $newSent['studio'] = $indexChannel[$this->wbwChannels[0]]->studio;
  292. }
  293. }
  294. } else {
  295. $newSent['content'] = $row->content;
  296. $newSent['html'] = MdRender::render(
  297. $row->content,
  298. [$row->channel_uid],
  299. null,
  300. $mode,
  301. "translation",
  302. $row->content_type,
  303. $format
  304. );
  305. }
  306. }
  307. break;
  308. case 'nissaya':
  309. $newSent['html'] = Cache::remember(
  310. "/sent/{$channelId}/{$ids}/{$format}",
  311. config('mint.cache.expire'),
  312. function () use ($row, $mode, $format) {
  313. if ($row->content_type === 'markdown') {
  314. return MdRender::render(
  315. $row->content,
  316. [$row->channel_uid],
  317. null,
  318. $mode,
  319. "nissaya",
  320. $row->content_type,
  321. $format
  322. );
  323. } else {
  324. return null;
  325. }
  326. }
  327. );
  328. break;
  329. case 'commentary':
  330. $options = [
  331. 'debug' => $this->debug,
  332. 'format' => $format,
  333. 'mode' => $mode,
  334. 'channelType' => 'translation',
  335. 'contentType' => $row->content_type,
  336. ];
  337. $mdRender = new MdRender($options);
  338. $newSent['html'] = $mdRender->convert($row->content, $channelsId);
  339. break;
  340. default:
  341. $options = [
  342. 'debug' => $this->debug,
  343. 'format' => $format,
  344. 'mode' => $mode,
  345. 'channelType' => 'translation',
  346. 'contentType' => $row->content_type,
  347. ];
  348. $mdRender = new MdRender($options);
  349. $newSent['html'] = $mdRender->convert($row->content, [$row->channel_uid]);
  350. //Log::debug('md render', ['content' => $row->content, 'options' => $options, 'render' => $newSent['html']]);
  351. break;
  352. }
  353. } else {
  354. Log::warning('no sentence record');
  355. }
  356. switch ($info->type) {
  357. case 'wbw':
  358. case 'original':
  359. array_push($sentNode["origin"], $newSent);
  360. break;
  361. case 'commentary':
  362. array_push($sentNode["commentaries"], $newSent);
  363. break;
  364. default:
  365. array_push($sentNode["translation"], $newSent);
  366. break;
  367. }
  368. }
  369. $paraProps['children'][] = $sentNode;
  370. }
  371. $paragraphs[] = $paraProps;
  372. }
  373. return $paragraphs;
  374. }
  375. public function getWbw($book, $para, $start, $end, $channel)
  376. {
  377. /**
  378. * 非阅读模式下。原文使用逐词解析数据。
  379. * 优先加载第一个translation channel 如果没有。加载默认逐词解析。
  380. */
  381. //获取逐词解析数据
  382. $wbwBlock = WbwBlock::where('channel_uid', $channel)
  383. ->where('book_id', $book)
  384. ->where('paragraph', $para)
  385. ->select('uid')
  386. ->first();
  387. if (!$wbwBlock) {
  388. return false;
  389. }
  390. //找到逐词解析数据
  391. $wbwData = Wbw::where('block_uid', $wbwBlock->uid)
  392. ->whereBetween('wid', [$start, $end])
  393. ->select(['book_id', 'paragraph', 'wid', 'data', 'uid', 'editor_id', 'created_at', 'updated_at'])
  394. ->orderBy('wid')
  395. ->get();
  396. $wbwContent = [];
  397. foreach ($wbwData as $wbwrow) {
  398. $wbw = str_replace("&nbsp;", ' ', $wbwrow->data);
  399. $wbw = str_replace("<br>", ' ', $wbw);
  400. $xmlString = "<root>" . $wbw . "</root>";
  401. try {
  402. $xmlWord = simplexml_load_string($xmlString);
  403. } catch (\Exception $e) {
  404. Log::error('corpus', ['error' => $e]);
  405. continue;
  406. }
  407. $wordsList = $xmlWord->xpath('//word');
  408. foreach ($wordsList as $word) {
  409. $case = \str_replace(['#', '.'], ['$', ''], $word->case->__toString());
  410. $case = \str_replace('$$', '$', $case);
  411. $case = trim($case);
  412. $case = trim($case, "$");
  413. $wbwId = explode('-', $word->id->__toString());
  414. $wbwData = [
  415. 'uid' => $wbwrow->uid,
  416. 'book' => $wbwrow->book_id,
  417. 'para' => $wbwrow->paragraph,
  418. 'sn' => array_slice($wbwId, 2),
  419. 'word' => ['value' => $word->pali->__toString(), 'status' => 0],
  420. 'real' => ['value' => $word->real->__toString(), 'status' => 0],
  421. 'meaning' => ['value' => $word->mean->__toString(), 'status' => 0],
  422. 'type' => ['value' => $word->type->__toString(), 'status' => 0],
  423. 'grammar' => ['value' => $word->gramma->__toString(), 'status' => 0],
  424. 'case' => ['value' => $word->case->__toString(), 'status' => 0],
  425. 'parent' => ['value' => $word->parent->__toString(), 'status' => 0],
  426. 'style' => ['value' => $word->style->__toString(), 'status' => 0],
  427. 'factors' => ['value' => $word->org->__toString(), 'status' => 0],
  428. 'factorMeaning' => ['value' => $word->om->__toString(), 'status' => 0],
  429. 'confidence' => $word->cf->__toString(),
  430. 'created_at' => $wbwrow->created_at,
  431. 'updated_at' => $wbwrow->updated_at,
  432. 'hasComment' => Discussion::where('res_id', $wbwrow->uid)->exists(),
  433. ];
  434. if (isset($word->parent2)) {
  435. $wbwData['parent2']['value'] = $word->parent2->__toString();
  436. if (isset($word->parent2['status'])) {
  437. $wbwData['parent2']['status'] = (int)$word->parent2['status'];
  438. } else {
  439. $wbwData['parent2']['status'] = 0;
  440. }
  441. }
  442. if (isset($word->pg)) {
  443. $wbwData['grammar2']['value'] = $word->pg->__toString();
  444. if (isset($word->pg['status'])) {
  445. $wbwData['grammar2']['status'] = (int)$word->pg['status'];
  446. } else {
  447. $wbwData['grammar2']['status'] = 0;
  448. }
  449. }
  450. if (isset($word->rela)) {
  451. $wbwData['relation']['value'] = $word->rela->__toString();
  452. if (isset($word->rela['status'])) {
  453. $wbwData['relation']['status'] = (int)$word->rela['status'];
  454. } else {
  455. $wbwData['relation']['status'] = 7;
  456. }
  457. }
  458. if (isset($word->bmt)) {
  459. $wbwData['bookMarkText']['value'] = $word->bmt->__toString();
  460. if (isset($word->bmt['status'])) {
  461. $wbwData['bookMarkText']['status'] = (int)$word->bmt['status'];
  462. } else {
  463. $wbwData['bookMarkText']['status'] = 7;
  464. }
  465. }
  466. if (isset($word->bmc)) {
  467. $wbwData['bookMarkColor']['value'] = $word->bmc->__toString();
  468. if (isset($word->bmc['status'])) {
  469. $wbwData['bookMarkColor']['status'] = (int)$word->bmc['status'];
  470. } else {
  471. $wbwData['bookMarkColor']['status'] = 7;
  472. }
  473. }
  474. if (isset($word->note)) {
  475. $wbwData['note']['value'] = $word->note->__toString();
  476. if (isset($word->note['status'])) {
  477. $wbwData['note']['status'] = (int)$word->note['status'];
  478. } else {
  479. $wbwData['note']['status'] = 7;
  480. }
  481. }
  482. if (isset($word->cf)) {
  483. $wbwData['confidence'] = (float)$word->cf->__toString();
  484. }
  485. if (isset($word->attachments)) {
  486. $wbwData['attachments'] = json_decode($word->attachments->__toString());
  487. }
  488. if (isset($word->pali['status'])) {
  489. $wbwData['word']['status'] = (int)$word->pali['status'];
  490. }
  491. if (isset($word->real['status'])) {
  492. $wbwData['real']['status'] = (int)$word->real['status'];
  493. }
  494. if (isset($word->mean['status'])) {
  495. $wbwData['meaning']['status'] = (int)$word->mean['status'];
  496. }
  497. if (isset($word->type['status'])) {
  498. $wbwData['type']['status'] = (int)$word->type['status'];
  499. }
  500. if (isset($word->gramma['status'])) {
  501. $wbwData['grammar']['status'] = (int)$word->gramma['status'];
  502. }
  503. if (isset($word->case['status'])) {
  504. $wbwData['case']['status'] = (int)$word->case['status'];
  505. }
  506. if (isset($word->parent['status'])) {
  507. $wbwData['parent']['status'] = (int)$word->parent['status'];
  508. }
  509. if (isset($word->org['status'])) {
  510. $wbwData['factors']['status'] = (int)$word->org['status'];
  511. }
  512. if (isset($word->om['status'])) {
  513. $wbwData['factorMeaning']['status'] = (int)$word->om['status'];
  514. }
  515. $wbwContent[] = $wbwData;
  516. }
  517. }
  518. if (count($wbwContent) === 0) {
  519. return false;
  520. }
  521. return \json_encode($wbwContent, JSON_UNESCAPED_UNICODE);
  522. }
  523. public function getChannelIndex($channels, $type = null)
  524. {
  525. #获取channel索引表
  526. $channelInfo = Channel::whereIn("uid", $channels)
  527. ->select(['uid', 'type', 'name', 'lang', 'owner_uid'])
  528. ->get();
  529. $indexChannel = [];
  530. foreach ($channels as $key => $channelId) {
  531. $channelInfo = Channel::where("uid", $channelId)
  532. ->select(['uid', 'type', 'name', 'lang', 'owner_uid'])->first();
  533. if (!$channelInfo) {
  534. Log::error('no channel id' . $channelId);
  535. continue;
  536. }
  537. if ($type !== null && $channelInfo->type !== $type) {
  538. continue;
  539. }
  540. $indexChannel[$channelId] = $channelInfo;
  541. $indexChannel[$channelId]->studio = StudioApi::getById($channelInfo->owner_uid);
  542. }
  543. return $indexChannel;
  544. }
  545. }