| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567 |
- <?php
- // api-v8/app/Services/OpenSearchService.php
- namespace App\Services;
- use App\Models\Sentence;
- use App\Models\Channel;
- use App\Models\PaliText;
- use App\Models\WbwBlock;
- use App\Models\Wbw;
- use App\Models\Discussion;
- use App\Models\PaliSentence;
- use App\Models\SentSimIndex;
- use Illuminate\Support\Str;
- use Illuminate\Support\Facades\Cache;
- use App\Http\Api\MdRender;
- use App\Http\Api\SuggestionApi;
- use App\Http\Api\ChannelApi;
- use App\Http\Api\UserApi;
- use Illuminate\Support\Facades\Log;
- use Illuminate\Support\Arr;
- use App\Http\Api\StudioApi;
- class PaliContentService
- {
- protected $result = [
- "uid" => '',
- "title" => '',
- "path" => [],
- "sub_title" => '',
- "summary" => '',
- "content" => '',
- "content_type" => "html",
- "toc" => [],
- "status" => 30,
- "lang" => "",
- "created_at" => "",
- "updated_at" => "",
- ];
- protected $wbwChannels = [];
- //句子需要查询的列
- protected $selectCol = [
- 'uid',
- 'book_id',
- 'paragraph',
- 'word_start',
- "word_end",
- 'channel_uid',
- 'content',
- 'content_type',
- 'editor_uid',
- 'acceptor_uid',
- 'pr_edit_at',
- 'fork_at',
- 'create_time',
- 'modify_time',
- 'created_at',
- 'updated_at',
- ];
- protected $userUuid = null;
- protected $debug = [];
- public static function _sentCanReadCount($book, $para, $start, $end, $userUuid = null)
- {
- $keyCanRead = "/channel/can-read/";
- if ($userUuid) {
- $keyCanRead .= $userUuid;
- } else {
- $keyCanRead .= 'guest';
- }
- $channelCanRead = Cache::remember(
- $keyCanRead,
- config('mint.cache.expire'),
- function () use ($userUuid) {
- return ChannelApi::getCanReadByUser($userUuid);
- }
- );
- $channels = Sentence::where('book_id', $book)
- ->where('paragraph', $para)
- ->where('word_start', $start)
- ->where('word_end', $end)
- ->where('strlen', '<>', 0)
- ->whereIn('channel_uid', $channelCanRead)
- ->select('channel_uid')
- ->groupBy('channel_uid')
- ->get();
- $channelList = [];
- foreach ($channels as $key => $value) {
- # code...
- if (Str::isUuid($value->channel_uid)) {
- $channelList[] = $value->channel_uid;
- }
- }
- $simId = PaliSentence::where('book', $book)
- ->where('paragraph', $para)
- ->where('word_begin', $start)
- ->where('word_end', $end)
- ->value('id');
- if ($simId) {
- $output["simNum"] = SentSimIndex::where('sent_id', $simId)->value('count');
- } else {
- $output["simNum"] = 0;
- }
- $channelInfo = Channel::whereIn("uid", $channelList)->select('type')->get();
- $output["tranNum"] = 0;
- $output["nissayaNum"] = 0;
- $output["commNum"] = 0;
- $output["originNum"] = 0;
- foreach ($channelInfo as $key => $value) {
- # code...
- switch ($value->type) {
- case "translation":
- $output["tranNum"]++;
- break;
- case "nissaya":
- $output["nissayaNum"]++;
- break;
- case "commentary":
- $output["commNum"]++;
- break;
- case "original":
- $output["originNum"]++;
- break;
- }
- }
- return $output;
- }
- private function newSent($book, $para, $word_start, $word_end)
- {
- $sent = [
- "id" => "{$book}-{$para}-{$word_start}-{$word_end}",
- "book" => $book,
- "para" => $para,
- "wordStart" => $word_start,
- "wordEnd" => $word_end,
- "origin" => [],
- "translation" => [],
- "commentaries" => [],
- ];
- if ($book < 1000) {
- #生成channel 数量列表
- $sentId = "{$book}-{$para}-{$word_start}-{$word_end}";
- $channelCount = self::_sentCanReadCount($book, $para, $word_start, $word_end, $this->userUuid);
- $path = json_decode(PaliText::where('book', $book)->where('paragraph', $para)->value("path"), true);
- $sent["path"] = [];
- foreach ($path as $key => $value) {
- # code...
- $value['paliTitle'] = $value['title'];
- $sent["path"][] = $value;
- }
- $sent["tranNum"] = $channelCount['tranNum'];
- $sent["nissayaNum"] = $channelCount['nissayaNum'];
- $sent["commNum"] = $channelCount['commNum'];
- $sent["originNum"] = $channelCount['originNum'];
- $sent["simNum"] = $channelCount['simNum'];
- }
- return $sent;
- }
- /**
- * 根据句子库数据生成以段落为单位的文章内容
- * $record 句子数据
- * $mode read | edit | wbw
- * $indexChannel channel索引
- * $indexedHeading 标题索引 用于给段落加标题标签 <h1> ect.
- */
- public function makeContentObj($record, $mode, $indexChannel, $format = 'react')
- {
- $content = [];
- //获取句子编号列表
- $paraIndex = [];
- foreach ($record as $value) {
- $currSentId = "{$value->book_id}-{$value->paragraph}-{$value->word_start}-{$value->word_end}";
- $value->sid = "{$currSentId}_{$value->channel_uid}";
- $currParaId = "{$value->book_id}-{$value->paragraph}";
- if (!isset($paraIndex[$currParaId])) {
- $paraIndex[$currParaId] = [];
- }
- $paraIndex[$currParaId][] = $value;
- }
- $channelsId = array();
- foreach ($indexChannel as $channelId => $info) {
- $channelsId[] = $channelId;
- }
- array_pop($channelsId);
- //遍历列表查找每个句子的所有channel的数据,并填充
- $paragraphs = [];
- foreach ($paraIndex as $currParaId => $sentData) {
- $arrParaId = explode('-', $currParaId);
- $sentIndex = [];
- foreach ($sentData as $sent) {
- $currSentId = "{$sent->book_id}-{$sent->paragraph}-{$sent->word_start}-{$sent->word_end}";
- $sentIndex[$currSentId] = [$sent->book_id, $sent->paragraph, $sent->word_start, $sent->word_end];
- }
- $sentInPara = array_values($sentIndex);
- $paraProps = [
- 'book' => $arrParaId[0],
- 'para' => $arrParaId[1],
- 'channels' => $channelsId,
- 'sentences' => $sentInPara,
- 'mode' => $mode,
- 'children' => [],
- ];
- //建立段落里面的句子列表
- foreach ($sentIndex as $ids => $arrSentId) {
- $sentNode = $this->newSent($arrSentId[0], $arrSentId[1], $arrSentId[2], $arrSentId[3]);
- foreach ($indexChannel as $channelId => $info) {
- # code...
- $sid = "{$ids}_{$channelId}";
- if (isset($info->studio)) {
- $studioInfo = $info->studio;
- } else {
- $studioInfo = null;
- }
- $newSent = [
- "content" => "",
- "html" => "",
- "book" => $arrSentId[0],
- "para" => $arrSentId[1],
- "wordStart" => $arrSentId[2],
- "wordEnd" => $arrSentId[3],
- "channel" => [
- "name" => $info->name,
- "type" => $info->type,
- "id" => $info->uid,
- 'lang' => $info->lang,
- ],
- "studio" => $studioInfo,
- "updateAt" => "",
- "suggestionCount" => SuggestionApi::getCountBySent($arrSentId[0], $arrSentId[1], $arrSentId[2], $arrSentId[3], $channelId),
- ];
- $row = Arr::first($sentData, function ($value, $key) use ($sid) {
- return $value->sid === $sid;
- });
- if ($row) {
- $newSent['id'] = $row->uid;
- $newSent['content'] = $row->content;
- $newSent['contentType'] = $row->content_type;
- $newSent['html'] = '';
- $newSent["editor"] = UserApi::getByUuid($row->editor_uid);
- /**
- * TODO 刷库改数据
- * 旧版api没有更新updated_at所以造成旧版的数据updated_at数据比modify_time 要晚
- */
- $newSent['forkAt'] = $row->fork_at; //
- $newSent['updateAt'] = $row->updated_at; //
- $newSent['updateAt'] = date("Y-m-d H:i:s.", $row->modify_time / 1000) . ($row->modify_time % 1000) . " UTC";
- $newSent['createdAt'] = $row->created_at;
- if ($mode !== "read") {
- if (isset($row->acceptor_uid) && !empty($row->acceptor_uid)) {
- $newSent["acceptor"] = UserApi::getByUuid($row->acceptor_uid);
- $newSent["prEditAt"] = $row->pr_edit_at;
- }
- }
- switch ($info->type) {
- case 'wbw':
- case 'original':
- //
- // 在编辑模式下。
- // 如果是原文,查看是否有逐词解析数据,
- // 有的话优先显示。
- // 阅读模式直接显示html原文
- // 传过来的数据一定有一个原文channel
- //
- if ($mode === "read") {
- $newSent['content'] = "";
- $newSent['html'] = MdRender::render(
- $row->content,
- [$row->channel_uid],
- null,
- $mode,
- "translation",
- $row->content_type,
- $format
- );
- } else {
- if ($row->content_type === 'json') {
- $newSent['channel']['type'] = "wbw";
- if (isset($this->wbwChannels[0])) {
- $newSent['channel']['name'] = $indexChannel[$this->wbwChannels[0]]->name;
- $newSent['channel']['lang'] = $indexChannel[$this->wbwChannels[0]]->lang;
- $newSent['channel']['id'] = $this->wbwChannels[0];
- //存在一个translation channel
- //尝试查找逐词解析数据。找到,替换现有数据
- $wbwData = $this->getWbw(
- $arrSentId[0],
- $arrSentId[1],
- $arrSentId[2],
- $arrSentId[3],
- $this->wbwChannels[0]
- );
- if ($wbwData) {
- $newSent['content'] = $wbwData;
- $newSent['contentType'] = 'json';
- $newSent['html'] = "";
- $newSent['studio'] = $indexChannel[$this->wbwChannels[0]]->studio;
- }
- }
- } else {
- $newSent['content'] = $row->content;
- $newSent['html'] = MdRender::render(
- $row->content,
- [$row->channel_uid],
- null,
- $mode,
- "translation",
- $row->content_type,
- $format
- );
- }
- }
- break;
- case 'nissaya':
- $newSent['html'] = Cache::remember(
- "/sent/{$channelId}/{$ids}/{$format}",
- config('mint.cache.expire'),
- function () use ($row, $mode, $format) {
- if ($row->content_type === 'markdown') {
- return MdRender::render(
- $row->content,
- [$row->channel_uid],
- null,
- $mode,
- "nissaya",
- $row->content_type,
- $format
- );
- } else {
- return null;
- }
- }
- );
- break;
- case 'commentary':
- $options = [
- 'debug' => $this->debug,
- 'format' => $format,
- 'mode' => $mode,
- 'channelType' => 'translation',
- 'contentType' => $row->content_type,
- ];
- $mdRender = new MdRender($options);
- $newSent['html'] = $mdRender->convert($row->content, $channelsId);
- break;
- default:
- $options = [
- 'debug' => $this->debug,
- 'format' => $format,
- 'mode' => $mode,
- 'channelType' => 'translation',
- 'contentType' => $row->content_type,
- ];
- $mdRender = new MdRender($options);
- $newSent['html'] = $mdRender->convert($row->content, [$row->channel_uid]);
- //Log::debug('md render', ['content' => $row->content, 'options' => $options, 'render' => $newSent['html']]);
- break;
- }
- } else {
- Log::warning('no sentence record');
- }
- switch ($info->type) {
- case 'wbw':
- case 'original':
- array_push($sentNode["origin"], $newSent);
- break;
- case 'commentary':
- array_push($sentNode["commentaries"], $newSent);
- break;
- default:
- array_push($sentNode["translation"], $newSent);
- break;
- }
- }
- $paraProps['children'][] = $sentNode;
- }
- $paragraphs[] = $paraProps;
- }
- return $paragraphs;
- }
- public function getWbw($book, $para, $start, $end, $channel)
- {
- /**
- * 非阅读模式下。原文使用逐词解析数据。
- * 优先加载第一个translation channel 如果没有。加载默认逐词解析。
- */
- //获取逐词解析数据
- $wbwBlock = WbwBlock::where('channel_uid', $channel)
- ->where('book_id', $book)
- ->where('paragraph', $para)
- ->select('uid')
- ->first();
- if (!$wbwBlock) {
- return false;
- }
- //找到逐词解析数据
- $wbwData = Wbw::where('block_uid', $wbwBlock->uid)
- ->whereBetween('wid', [$start, $end])
- ->select(['book_id', 'paragraph', 'wid', 'data', 'uid', 'editor_id', 'created_at', 'updated_at'])
- ->orderBy('wid')
- ->get();
- $wbwContent = [];
- foreach ($wbwData as $wbwrow) {
- $wbw = str_replace(" ", ' ', $wbwrow->data);
- $wbw = str_replace("<br>", ' ', $wbw);
- $xmlString = "<root>" . $wbw . "</root>";
- try {
- $xmlWord = simplexml_load_string($xmlString);
- } catch (\Exception $e) {
- Log::error('corpus', ['error' => $e]);
- continue;
- }
- $wordsList = $xmlWord->xpath('//word');
- foreach ($wordsList as $word) {
- $case = \str_replace(['#', '.'], ['$', ''], $word->case->__toString());
- $case = \str_replace('$$', '$', $case);
- $case = trim($case);
- $case = trim($case, "$");
- $wbwId = explode('-', $word->id->__toString());
- $wbwData = [
- 'uid' => $wbwrow->uid,
- 'book' => $wbwrow->book_id,
- 'para' => $wbwrow->paragraph,
- 'sn' => array_slice($wbwId, 2),
- 'word' => ['value' => $word->pali->__toString(), 'status' => 0],
- 'real' => ['value' => $word->real->__toString(), 'status' => 0],
- 'meaning' => ['value' => $word->mean->__toString(), 'status' => 0],
- 'type' => ['value' => $word->type->__toString(), 'status' => 0],
- 'grammar' => ['value' => $word->gramma->__toString(), 'status' => 0],
- 'case' => ['value' => $word->case->__toString(), 'status' => 0],
- 'parent' => ['value' => $word->parent->__toString(), 'status' => 0],
- 'style' => ['value' => $word->style->__toString(), 'status' => 0],
- 'factors' => ['value' => $word->org->__toString(), 'status' => 0],
- 'factorMeaning' => ['value' => $word->om->__toString(), 'status' => 0],
- 'confidence' => $word->cf->__toString(),
- 'created_at' => $wbwrow->created_at,
- 'updated_at' => $wbwrow->updated_at,
- 'hasComment' => Discussion::where('res_id', $wbwrow->uid)->exists(),
- ];
- if (isset($word->parent2)) {
- $wbwData['parent2']['value'] = $word->parent2->__toString();
- if (isset($word->parent2['status'])) {
- $wbwData['parent2']['status'] = (int)$word->parent2['status'];
- } else {
- $wbwData['parent2']['status'] = 0;
- }
- }
- if (isset($word->pg)) {
- $wbwData['grammar2']['value'] = $word->pg->__toString();
- if (isset($word->pg['status'])) {
- $wbwData['grammar2']['status'] = (int)$word->pg['status'];
- } else {
- $wbwData['grammar2']['status'] = 0;
- }
- }
- if (isset($word->rela)) {
- $wbwData['relation']['value'] = $word->rela->__toString();
- if (isset($word->rela['status'])) {
- $wbwData['relation']['status'] = (int)$word->rela['status'];
- } else {
- $wbwData['relation']['status'] = 7;
- }
- }
- if (isset($word->bmt)) {
- $wbwData['bookMarkText']['value'] = $word->bmt->__toString();
- if (isset($word->bmt['status'])) {
- $wbwData['bookMarkText']['status'] = (int)$word->bmt['status'];
- } else {
- $wbwData['bookMarkText']['status'] = 7;
- }
- }
- if (isset($word->bmc)) {
- $wbwData['bookMarkColor']['value'] = $word->bmc->__toString();
- if (isset($word->bmc['status'])) {
- $wbwData['bookMarkColor']['status'] = (int)$word->bmc['status'];
- } else {
- $wbwData['bookMarkColor']['status'] = 7;
- }
- }
- if (isset($word->note)) {
- $wbwData['note']['value'] = $word->note->__toString();
- if (isset($word->note['status'])) {
- $wbwData['note']['status'] = (int)$word->note['status'];
- } else {
- $wbwData['note']['status'] = 7;
- }
- }
- if (isset($word->cf)) {
- $wbwData['confidence'] = (float)$word->cf->__toString();
- }
- if (isset($word->attachments)) {
- $wbwData['attachments'] = json_decode($word->attachments->__toString());
- }
- if (isset($word->pali['status'])) {
- $wbwData['word']['status'] = (int)$word->pali['status'];
- }
- if (isset($word->real['status'])) {
- $wbwData['real']['status'] = (int)$word->real['status'];
- }
- if (isset($word->mean['status'])) {
- $wbwData['meaning']['status'] = (int)$word->mean['status'];
- }
- if (isset($word->type['status'])) {
- $wbwData['type']['status'] = (int)$word->type['status'];
- }
- if (isset($word->gramma['status'])) {
- $wbwData['grammar']['status'] = (int)$word->gramma['status'];
- }
- if (isset($word->case['status'])) {
- $wbwData['case']['status'] = (int)$word->case['status'];
- }
- if (isset($word->parent['status'])) {
- $wbwData['parent']['status'] = (int)$word->parent['status'];
- }
- if (isset($word->org['status'])) {
- $wbwData['factors']['status'] = (int)$word->org['status'];
- }
- if (isset($word->om['status'])) {
- $wbwData['factorMeaning']['status'] = (int)$word->om['status'];
- }
- $wbwContent[] = $wbwData;
- }
- }
- if (count($wbwContent) === 0) {
- return false;
- }
- return \json_encode($wbwContent, JSON_UNESCAPED_UNICODE);
- }
- public function getChannelIndex($channels, $type = null)
- {
- #获取channel索引表
- $channelInfo = Channel::whereIn("uid", $channels)
- ->select(['uid', 'type', 'name', 'lang', 'owner_uid'])
- ->get();
- $indexChannel = [];
- foreach ($channels as $key => $channelId) {
- $channelInfo = Channel::where("uid", $channelId)
- ->select(['uid', 'type', 'name', 'lang', 'owner_uid'])->first();
- if (!$channelInfo) {
- Log::error('no channel id' . $channelId);
- continue;
- }
- if ($type !== null && $channelInfo->type !== $type) {
- continue;
- }
- $indexChannel[$channelId] = $channelInfo;
- $indexChannel[$channelId]->studio = StudioApi::getById($channelInfo->owner_uid);
- }
- return $indexChannel;
- }
- }
|