MdRender.php 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. <?php
  2. namespace App\Http\Api;
  3. use Illuminate\Support\Str;
  4. use mustache\mustache;
  5. use App\Models\DhammaTerm;
  6. use App\Models\PaliText;
  7. use App\Models\Channel;
  8. use App\Http\Controllers\CorpusController;
  9. use Illuminate\Support\Facades\Cache;
  10. use App\Tools\RedisClusters;
  11. use Illuminate\Support\Facades\Log;
  12. use App\Tools\Markdown;
  13. define("STACK_DEEP",8);
  14. class MdRender{
  15. /**
  16. * 文字渲染模式
  17. * read 阅读模式
  18. * edit 编辑模式
  19. */
  20. protected $options = [
  21. 'mode' => 'read',
  22. 'channelType'=>'translation',
  23. 'contentType'=>"markdown",
  24. 'format'=>'react',
  25. 'debug'=>[],
  26. 'studioId'=>null,
  27. 'lang'=>'zh-Hans',
  28. 'footnote'=>false,
  29. 'paragraph'=>false,
  30. ];
  31. public function __construct($options=[])
  32. {
  33. foreach ($options as $key => $value) {
  34. $this->options[$key] = $value;
  35. }
  36. }
  37. /**
  38. * 将句子模版组成的段落复制一份,为了实现巴汉逐段对读
  39. */
  40. private function preprocessingForParagraph($input){
  41. if(!$this->options['paragraph']){
  42. return $input;
  43. }
  44. $paragraphs = explode("\n\n",$input);
  45. $output = [];
  46. foreach ($paragraphs as $key => $paragraph) {
  47. # 判断是否是纯粹的句子模版
  48. $pattern = "/\{\{sent\|id=([0-9].+?)\}\}/";
  49. $replacement = '';
  50. $space = preg_replace($pattern,$replacement,$paragraph);
  51. if(empty(trim($space))){
  52. $output[] = str_replace('}}','|text=origin}}',$paragraph);
  53. $output[] = str_replace('}}','|text=translation}}',$paragraph);
  54. }else{
  55. $output[] = $paragraph;
  56. }
  57. }
  58. return implode("\n\n",$output);
  59. }
  60. /**
  61. * 按照{{}}把字符串切分成三个部分。模版之前的,模版,和模版之后的
  62. */
  63. private function tplSplit($tpl){
  64. $before = strpos($tpl,'{{');
  65. if($before === FALSE){
  66. //未找到
  67. return ['data'=>[$tpl,'',''],'error'=>0];
  68. }else{
  69. $pointer = $before;
  70. $stack = array();
  71. $stack[] = $pointer;
  72. $after = substr($tpl,$pointer+2) ;
  73. while (!empty($after) && count($stack)>0 && count($stack)<STACK_DEEP) {
  74. $nextBegin = strpos($after,"{{");
  75. $nextEnd = strpos($after,"}}");
  76. if($nextBegin !== FALSE){
  77. if($nextBegin < $nextEnd){
  78. //有嵌套找到最后一个}}
  79. $pointer = $pointer + 2 + $nextBegin;
  80. $stack[] = $pointer;
  81. $after = substr($tpl,$pointer+2);
  82. }else if($nextEnd !== FALSE){
  83. //无嵌套有结束
  84. $pointer = $pointer + 2 + $nextEnd;
  85. array_pop($stack);
  86. $after = substr($tpl,$pointer+2);
  87. }else{
  88. //无结束符 没找到
  89. break;
  90. }
  91. }else if($nextEnd !== FALSE){
  92. $pointer = $pointer + 2 + $nextEnd;
  93. array_pop($stack);
  94. $after = substr($tpl,$pointer+2);
  95. }else{
  96. //没找到
  97. break;
  98. }
  99. }
  100. if(count($stack)>0){
  101. if(count($stack) === STACK_DEEP){
  102. return ['data'=>[$tpl,'',''],'error'=>2];
  103. }else{
  104. //未关闭
  105. return ['data'=>[$tpl,'',''],'error'=>1];
  106. }
  107. }else{
  108. return ['data'=>
  109. [
  110. substr($tpl,0,$before),
  111. substr($tpl,$before,$pointer-$before+2),
  112. substr($tpl,$pointer+2)
  113. ],
  114. 'error'=>0
  115. ];
  116. }
  117. }
  118. }
  119. private function wiki2xml(string $wiki,$channelId=[]):string{
  120. /**
  121. * 渲染markdown里面的模版
  122. */
  123. $remain = $wiki;
  124. $buffer = array();
  125. do {
  126. $arrWiki = $this->tplSplit($remain);
  127. $buffer[] = $arrWiki['data'][0];
  128. $tpl = $arrWiki['data'][1];
  129. if(!empty($tpl)){
  130. /**
  131. * 处理模版 提取参数
  132. */
  133. $tpl = str_replace("|\n","|",$tpl);
  134. $pattern = "/\{\{(.+?)\|/";
  135. $replacement = '<MdTpl class="tpl" name="$1"><param>';
  136. $tpl = preg_replace($pattern,$replacement,$tpl);
  137. $tpl = str_replace("}}","</param></MdTpl>",$tpl);
  138. $tpl = str_replace("|","</param><param>",$tpl);
  139. /**
  140. * 替换变量名
  141. */
  142. $pattern = "/<param>([a-z]+?)=/";
  143. $replacement = '<param name="$1">';
  144. $tpl = preg_replace($pattern,$replacement,$tpl);
  145. //tpl to react
  146. $tpl = str_replace('<param','<span class="param"',$tpl);
  147. $tpl = str_replace('</param>','</span>',$tpl);
  148. $tpl = $this->xml2tpl($tpl,$channelId);
  149. $buffer[] = $tpl;
  150. }
  151. $remain = $arrWiki['data'][2];
  152. } while (!empty($remain));
  153. $html = implode('' , $buffer);
  154. return $html;
  155. }
  156. private function xmlQueryId(string $xml, string $id):string{
  157. try{
  158. $dom = simplexml_load_string($xml);
  159. }catch(\Exception $e){
  160. Log::error($e);
  161. return "<div></div>";
  162. }
  163. $tpl_list = $dom->xpath('//MdTpl');
  164. foreach ($tpl_list as $key => $tpl) {
  165. foreach ($tpl->children() as $param) {
  166. # 处理每个参数
  167. if($param->getName() === "param"){
  168. foreach($param->attributes() as $pa => $pa_value){
  169. $pValue = $pa_value->__toString();
  170. if($pa === "name" && $pValue === "id"){
  171. if($param->__toString() === $id){
  172. return $tpl->asXML();
  173. }
  174. }
  175. }
  176. }
  177. }
  178. }
  179. return "<div></div>";
  180. }
  181. public static function take_sentence(string $xml):array{
  182. $output = [];
  183. try{
  184. $dom = simplexml_load_string($xml);
  185. }catch(\Exception $e){
  186. Log::error($e);
  187. return $output;
  188. }
  189. $tpl_list = $dom->xpath('//MdTpl');
  190. foreach ($tpl_list as $key => $tpl) {
  191. foreach($tpl->attributes() as $a => $a_value){
  192. if($a==="name"){
  193. if($a_value->__toString() ==="sent"){
  194. foreach ($tpl->children() as $param) {
  195. # 处理每个参数
  196. if($param->getName() === "param"){
  197. $sent = $param->__toString();
  198. if(!empty($sent)){
  199. $output[] = $sent;
  200. break;
  201. }
  202. }
  203. }
  204. }
  205. }
  206. }
  207. }
  208. return $output;
  209. }
  210. private function xml2tpl(string $xml, $channelId=[]):string{
  211. /**
  212. * 解析xml
  213. * 获取模版参数
  214. * 生成react 组件参数
  215. */
  216. try{
  217. //$dom = simplexml_load_string($xml);
  218. $doc = new \DOMDocument();
  219. $xml = str_replace('MdTpl','dfn',$xml);
  220. $xml = mb_convert_encoding($xml, 'HTML-ENTITIES', "UTF-8");
  221. $ok = $doc->loadHTML($xml,LIBXML_NOERROR | LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
  222. }catch(\Exception $e){
  223. Log::error($e);
  224. Log::error($xml);
  225. return "<span>xml解析错误{$e}</span>";
  226. }
  227. if(!$ok){
  228. return "<span>xml解析错误</span>";
  229. }
  230. /*
  231. if(!$dom){
  232. Log::error($xml);
  233. return "<span>xml解析错误</span>";
  234. }
  235. */
  236. $tpl_list = $doc->getElementsByTagName('dfn');
  237. foreach ($tpl_list as $key => $tpl) {
  238. /**
  239. * 遍历 MdTpl 处理参数
  240. */
  241. $props = [];
  242. $tpl_name = '';
  243. foreach($tpl->attributes as $a => $a_value){
  244. if($a_value->nodeName==="name"){
  245. $tpl_name = $a_value->nodeValue;
  246. break;
  247. }
  248. }
  249. $param_id = 0;
  250. $child = $tpl->firstChild;
  251. while ($child) {
  252. # 处理每个参数
  253. if($child->nodeName === "span"){
  254. $param_id++;
  255. $paramName = "";
  256. foreach($child->attributes as $pa => $pa_value){
  257. if($pa_value->nodeName === "name"){
  258. $nodeText = $pa_value->nodeValue;
  259. $props["{$nodeText}"] = $child->nodeValue;
  260. $paramName = $pa_value;
  261. }
  262. }
  263. if(empty($paramName)){
  264. foreach ($child->childNodes as $param_child) {
  265. # code...
  266. if($param_child->nodeType ===3){
  267. $props["{$param_id}"] = $param_child->nodeValue;
  268. }
  269. }
  270. }
  271. }
  272. $child = $child->nextSibling;
  273. }
  274. /**
  275. * 生成模版参数
  276. *
  277. */
  278. //TODO 判断$channelId里面的是否都是uuid
  279. $channelInfo = [];
  280. foreach ($channelId as $key => $id) {
  281. $channelInfo[] = Channel::where('uid',$id)->first();
  282. }
  283. $tplRender = new TemplateRender($props,
  284. $channelInfo,
  285. $this->options['mode'],
  286. $this->options['format'],
  287. $this->options['studioId'],
  288. $this->options['debug'],
  289. $this->options['lang'],
  290. );
  291. $tplProps = $tplRender->render($tpl_name);
  292. if($this->options['format']==='react' && $tplProps){
  293. $props = $doc->createAttribute("props");
  294. $props->nodeValue = $tplProps['props'];
  295. $tpl->appendChild($props);
  296. $attTpl = $doc->createAttribute("tpl");
  297. $attTpl->nodeValue = $tplProps['tpl'];
  298. $tpl->appendChild($attTpl);
  299. $htmlElement = $doc->createElement($tplProps['tag']);
  300. $htmlElement->nodeValue=$tplProps['html'];
  301. $tpl->appendChild($htmlElement);
  302. }
  303. }
  304. $html = $doc->saveHTML();
  305. $html = str_replace(['<dfn','</dfn>'],['<MdTpl','</MdTpl>'],$html);
  306. switch ($this->options['format']) {
  307. case 'react':
  308. return trim($html);
  309. break;
  310. case 'unity':
  311. if($tplProps){
  312. return "{{"."{$tplProps['tpl']}|{$tplProps['props']}"."}}";
  313. }else{
  314. return '';
  315. }
  316. break;
  317. case 'html':
  318. if(isset($tplProps)){
  319. if(is_array($tplProps)){
  320. return '';
  321. }else{
  322. return $tplProps;
  323. }
  324. }else{
  325. Log::error('tplProps undefine');
  326. return '';
  327. }
  328. break;
  329. case 'tex':
  330. if(isset($tplProps)){
  331. if(is_array($tplProps)){
  332. return '';
  333. }else{
  334. return $tplProps;
  335. }
  336. }else{
  337. Log::error('tplProps undefine');
  338. return '';
  339. }
  340. break;
  341. default: /**text simple markdown */
  342. if(isset($tplProps)){
  343. if(is_array($tplProps)){
  344. return '';
  345. }else{
  346. return $tplProps;
  347. }
  348. }else{
  349. Log::error('tplProps undefine');
  350. return '';
  351. }
  352. break;
  353. }
  354. }
  355. /**
  356. * 将markdown文件中的模版转换为标准的wiki模版
  357. */
  358. private function markdown2wiki(string $markdown): string{
  359. //$markdown = mb_convert_encoding($markdown,'UTF-8','UTF-8');
  360. $markdown = iconv('UTF-8','UTF-8//IGNORE',$markdown);
  361. /**
  362. * nissaya
  363. * aaa=bbb\n
  364. * {{nissaya|aaa|bbb}}
  365. */
  366. if($this->options['channelType']==='nissaya'){
  367. if($this->options['contentType'] === "json"){
  368. $json = json_decode($markdown);
  369. $nissayaWord = [];
  370. if(is_array($json)){
  371. foreach ($json as $word) {
  372. if(count($word->sn) === 1){
  373. //只输出第一层级
  374. $str = "{{nissaya|";
  375. if(isset($word->word->value)){
  376. $str .= $word->word->value;
  377. }
  378. $str .= "|";
  379. if(isset($word->meaning->value)){
  380. $str .= $word->meaning->value;
  381. }
  382. $str .= "}}";
  383. $nissayaWord[] = $str;
  384. }
  385. }
  386. }else{
  387. Log::error('json data is not array',['data'=>$markdown]);
  388. }
  389. $markdown = implode('',$nissayaWord);
  390. }else if($this->options['contentType'] === "markdown"){
  391. $lines = explode("\n",$markdown);
  392. $newLines = array();
  393. foreach ($lines as $line) {
  394. if(strstr($line,'=') === FALSE){
  395. $newLines[] = $line;
  396. }else{
  397. $nissaya = explode('=',$line);
  398. $meaning = array_slice($nissaya,1);
  399. $meaning = implode('=',$meaning);
  400. $newLines[] = "{{nissaya|{$nissaya[0]}|{$meaning}}}";
  401. }
  402. }
  403. $markdown = implode("\n",$newLines);
  404. }
  405. }
  406. //$markdown = preg_replace("/\n\n/","<div></div>",$markdown);
  407. /**
  408. * 处理 mermaid
  409. */
  410. if(strpos($markdown,"```mermaid") !== false){
  411. $lines = explode("\n",$markdown);
  412. $newLines = array();
  413. $mermaidBegin = false;
  414. $mermaidString = array();
  415. foreach ($lines as $line) {
  416. if($line === "```mermaid"){
  417. $mermaidBegin = true;
  418. $mermaidString = [];
  419. continue;
  420. }
  421. if($mermaidBegin){
  422. if($line === "```"){
  423. $newLines[] = "{{mermaid|".base64_encode(\json_encode($mermaidString))."}}";
  424. $mermaidBegin = false;
  425. }else{
  426. $mermaidString[] = $line;
  427. }
  428. }else{
  429. $newLines[] = $line;
  430. }
  431. }
  432. $markdown = implode("\n",$newLines);
  433. }
  434. /**
  435. * 替换换行符
  436. * react 无法处理 <br> 替换为<div></div>代替换行符作用
  437. */
  438. //$markdown = str_replace('<br>','<div></div>',$markdown);
  439. /**
  440. * markdown -> html
  441. */
  442. /*
  443. $html = MdRender::fixHtml($html);
  444. */
  445. #替换术语
  446. $pattern = "/\[\[(.+?)\]\]/";
  447. $replacement = '{{term|$1}}';
  448. $markdown = preg_replace($pattern,$replacement,$markdown);
  449. #替换句子模版
  450. $pattern = "/\{\{([0-9].+?)\}\}/";
  451. $replacement = '{{sent|id=$1}}';
  452. $markdown = preg_replace($pattern,$replacement,$markdown);
  453. /**
  454. * 替换多行注释
  455. * ```
  456. * bla
  457. * bla
  458. * ```
  459. * {{note|
  460. * bla
  461. * bla
  462. * }}
  463. */
  464. if(strpos($markdown,"```\n") !== false){
  465. $lines = explode("\n",$markdown);
  466. $newLines = array();
  467. $noteBegin = false;
  468. $noteString = array();
  469. foreach ($lines as $line) {
  470. if($noteBegin){
  471. if($line === "```"){
  472. $newLines[] = "}}";
  473. $noteBegin = false;
  474. }else{
  475. $newLines[] = $line;
  476. }
  477. }else{
  478. if($line === "```"){
  479. $noteBegin = true;
  480. $newLines[] = "{{note|";
  481. continue;
  482. }else{
  483. $newLines[] = $line;
  484. }
  485. }
  486. }
  487. if($noteBegin){
  488. $newLines[] = "}}";
  489. }
  490. $markdown = implode("\n",$newLines);
  491. }
  492. /**
  493. * 替换单行注释
  494. * `bla bla`
  495. * {{note|bla}}
  496. */
  497. $pattern = "/`(.+?)`/";
  498. $replacement = '{{note|$1}}';
  499. $markdown = preg_replace($pattern,$replacement,$markdown);
  500. return $markdown;
  501. }
  502. private function markdownToHtml($markdown){
  503. $markdown = str_replace('MdTpl','mdtpl',$markdown);
  504. $markdown = str_replace(['<param','</param>'],['<span','</span>'],$markdown);
  505. $html = Markdown::render($markdown);
  506. if($this->options['format']==='react'){
  507. $html = $this->fixHtml($html);
  508. }
  509. $html = str_replace('<hr>','<hr />',$html);
  510. //给H1-6 添加uuid
  511. for ($i=1; $i<7 ; $i++) {
  512. if(strpos($html,"<h{$i}>")===false){
  513. continue;
  514. }
  515. $output = array();
  516. $input = $html;
  517. $hPos = strpos($input,"<h{$i}>");
  518. while ($hPos !== false) {
  519. $output[] = substr($input,0,$hPos);
  520. $output[] = "<h{$i} id='".Str::uuid()."'>";
  521. $input = substr($input,$hPos+4);
  522. $hPos = strpos($input,"<h{$i}>");
  523. }
  524. $output[] = $input;
  525. $html = implode('',$output);
  526. }
  527. $html = str_replace('mdtpl','MdTpl',$html);
  528. return $html;
  529. }
  530. private function fixHtml($html) {
  531. $doc = new \DOMDocument();
  532. libxml_use_internal_errors(true);
  533. $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
  534. $doc->loadHTML('<span>'.$html.'</span>',LIBXML_NOERROR | LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
  535. $fixed = $doc->saveHTML();
  536. $fixed = mb_convert_encoding($fixed, "UTF-8", 'HTML-ENTITIES');
  537. return $fixed;
  538. }
  539. public static function init(){
  540. $GLOBALS["MdRenderStack"] = 0;
  541. }
  542. public function convert($markdown,$channelId=[],$queryId=null){
  543. if(isset($GLOBALS["MdRenderStack"]) && is_numeric($GLOBALS["MdRenderStack"])){
  544. $GLOBALS["MdRenderStack"]++;
  545. }else{
  546. $GLOBALS["MdRenderStack"] = 1;
  547. }
  548. if($GLOBALS["MdRenderStack"]<3){
  549. $output = $this->_convert($markdown,$channelId,$queryId);
  550. }else{
  551. $output = $markdown;
  552. }
  553. $GLOBALS["MdRenderStack"]--;
  554. return $output;
  555. }
  556. private function _convert($markdown,$channelId=[],$queryId=null){
  557. if(empty($markdown)){
  558. switch ($this->options['format']) {
  559. case 'react':
  560. return "<span></span>";
  561. break;
  562. default:
  563. return "";
  564. break;
  565. }
  566. }
  567. $wiki = $this->markdown2wiki($markdown);
  568. $wiki = $this->preprocessingForParagraph($wiki);
  569. $markdownWithTpl = $this->wiki2xml($wiki,$channelId);
  570. if(!is_null($queryId)){
  571. $html = $this->xmlQueryId($markdownWithTpl, $queryId);
  572. }
  573. $html = $this->markdownToHtml($markdownWithTpl);
  574. //后期处理
  575. $output = '';
  576. switch ($this->options['format']) {
  577. case 'react':
  578. //生成可展开组件
  579. $html = str_replace("<div/>","<div></div>",$html);
  580. $pattern = '/<li><div>(.+?)<\/div><\/li>/';
  581. $replacement = '<li><MdTpl name="toggle" tpl="toggle" props=""><div>$1</div></MdTpl></li>';
  582. $output = preg_replace($pattern,$replacement,$html);
  583. break;
  584. case 'text':
  585. case 'simple':
  586. $html = strip_tags($html);
  587. $output = htmlspecialchars_decode($html,ENT_QUOTES);
  588. //$output = html_entity_decode($html);
  589. break;
  590. case 'tex':
  591. $html = strip_tags($html);
  592. $output = htmlspecialchars_decode($html,ENT_QUOTES);
  593. //$output = html_entity_decode($html);
  594. break;
  595. case 'unity':
  596. $html = str_replace(['<strong>','</strong>','<em>','</em>'],['[%b%]','[%/b%]','[%i%]','[%/i%]'],$html);
  597. $html = strip_tags($html);
  598. $html = str_replace(['[%b%]','[%/b%]','[%i%]','[%/i%]'],['<b>','</b>','<i>','</i>'],$html);
  599. $output = htmlspecialchars_decode($html,ENT_QUOTES);
  600. break;
  601. case 'html':
  602. $output = htmlspecialchars_decode($html,ENT_QUOTES);
  603. //处理脚注
  604. if($this->options['footnote'] && isset($GLOBALS['note']) && count($GLOBALS['note'])>0){
  605. $output .= '<div><h1>endnote</h1>';
  606. foreach ($GLOBALS['note'] as $footnote) {
  607. $output .= '<p><a name="footnote-'.$footnote['sn'].'">['.$footnote['sn'].']</a> '.$footnote['content'].'</p>';
  608. }
  609. $output .= '</div>';
  610. unset($GLOBALS['note']);
  611. }
  612. //处理图片链接
  613. $output = str_replace('<img src="','<img src="'.config('app.url'),$output);
  614. break;
  615. case 'markdown':
  616. //处理脚注
  617. $footnotes = array();
  618. if($this->options['footnote'] && isset($GLOBALS['note']) && count($GLOBALS['note'])>0){
  619. foreach ($GLOBALS['note'] as $footnote) {
  620. $footnotes[] = '[^'.$footnote['sn'].']: ' . $footnote['content'];
  621. }
  622. unset($GLOBALS['note']);
  623. }
  624. $output = $markdownWithTpl . "\n\n" . implode("\n\n",$footnotes);
  625. break;
  626. }
  627. return $output;
  628. }
  629. /**
  630. * string[] $channelId
  631. */
  632. public static function render($markdown,$channelId,$queryId=null,$mode='read',$channelType='translation',$contentType="markdown",$format='react'){
  633. $mdRender = new MdRender(
  634. [
  635. 'mode'=>$mode,
  636. 'channelType'=>$channelType,
  637. 'contentType'=>$contentType,
  638. 'format'=>$format
  639. ]);
  640. $output = $mdRender->convert($markdown,$channelId,$queryId);
  641. return $output;
  642. }
  643. }