MdRender.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. <?php
  2. namespace App\Http\Api;
  3. use Illuminate\Support\Str;
  4. use mustache\mustache;
  5. use App\Models\DhammaTerm;
  6. use App\Models\PaliText;
  7. use App\Models\Channel;
  8. use App\Http\Controllers\CorpusController;
  9. use Illuminate\Support\Facades\Cache;
  10. use App\Tools\RedisClusters;
  11. use Illuminate\Support\Facades\Log;
  12. use App\Tools\Markdown;
  13. define("STACK_DEEP",8);
  14. class MdRender{
  15. /**
  16. * 文字渲染模式
  17. * read 阅读模式
  18. * edit 编辑模式
  19. */
  20. protected $options = [
  21. 'mode' => 'read',
  22. 'channelType'=>'translation',
  23. 'contentType'=>"markdown",
  24. 'format'=>'react',
  25. 'debug'=>[],
  26. 'studioId'=>null,
  27. 'lang'=>'zh-Hans',
  28. ];
  29. public function __construct($options=[])
  30. {
  31. foreach ($options as $key => $value) {
  32. $this->options[$key] = $value;
  33. }
  34. }
  35. /**
  36. * 按照{{}}把字符串切分成三个部分。模版之前的,模版,和模版之后的
  37. */
  38. private function tplSplit($tpl){
  39. $before = strpos($tpl,'{{');
  40. if($before === FALSE){
  41. //未找到
  42. return ['data'=>[$tpl,'',''],'error'=>0];
  43. }else{
  44. $pointer = $before;
  45. $stack = array();
  46. $stack[] = $pointer;
  47. $after = substr($tpl,$pointer+2) ;
  48. while (!empty($after) && count($stack)>0 && count($stack)<STACK_DEEP) {
  49. $nextBegin = strpos($after,"{{");
  50. $nextEnd = strpos($after,"}}");
  51. if($nextBegin !== FALSE){
  52. if($nextBegin < $nextEnd){
  53. //有嵌套找到最后一个}}
  54. $pointer = $pointer + 2 + $nextBegin;
  55. $stack[] = $pointer;
  56. $after = substr($tpl,$pointer+2);
  57. }else if($nextEnd !== FALSE){
  58. //无嵌套有结束
  59. $pointer = $pointer + 2 + $nextEnd;
  60. array_pop($stack);
  61. $after = substr($tpl,$pointer+2);
  62. }else{
  63. //无结束符 没找到
  64. break;
  65. }
  66. }else if($nextEnd !== FALSE){
  67. $pointer = $pointer + 2 + $nextEnd;
  68. array_pop($stack);
  69. $after = substr($tpl,$pointer+2);
  70. }else{
  71. //没找到
  72. break;
  73. }
  74. }
  75. if(count($stack)>0){
  76. if(count($stack) === STACK_DEEP){
  77. return ['data'=>[$tpl,'',''],'error'=>2];
  78. }else{
  79. //未关闭
  80. return ['data'=>[$tpl,'',''],'error'=>1];
  81. }
  82. }else{
  83. return ['data'=>
  84. [
  85. substr($tpl,0,$before),
  86. substr($tpl,$before,$pointer-$before+2),
  87. substr($tpl,$pointer+2)
  88. ],
  89. 'error'=>0
  90. ];
  91. }
  92. }
  93. }
  94. private function wiki2xml(string $wiki,$channelId=[]):string{
  95. /**
  96. * 把模版转换为xml
  97. */
  98. $remain = $wiki;
  99. $buffer = array();
  100. do {
  101. $arrWiki = $this->tplSplit($remain);
  102. $buffer[] = $arrWiki['data'][0];
  103. $tpl = $arrWiki['data'][1];
  104. if(!empty($tpl)){
  105. /**
  106. * 处理模版 提取参数
  107. */
  108. $tpl = str_replace("|\n","|",$tpl);
  109. $pattern = "/\{\{(.+?)\|/";
  110. $replacement = '<MdTpl class="tpl" name="$1"><param>';
  111. $tpl = preg_replace($pattern,$replacement,$tpl);
  112. $tpl = str_replace("}}","</param></MdTpl>",$tpl);
  113. $tpl = str_replace("|","</param><param>",$tpl);
  114. /**
  115. * 替换变量名
  116. */
  117. $pattern = "/<param>([a-z]+?)=/";
  118. $replacement = '<param name="$1">';
  119. $tpl = preg_replace($pattern,$replacement,$tpl);
  120. //tpl to react
  121. $tpl = str_replace('<param','<span class="param"',$tpl);
  122. $tpl = str_replace('</param>','</span>',$tpl);
  123. $tpl = $this->xml2tpl($tpl,$channelId);
  124. $buffer[] = $tpl;
  125. }
  126. $remain = $arrWiki['data'][2];
  127. } while (!empty($remain));
  128. $html = implode('' , $buffer);
  129. return $html;
  130. }
  131. private function xmlQueryId(string $xml, string $id):string{
  132. try{
  133. $dom = simplexml_load_string($xml);
  134. }catch(\Exception $e){
  135. Log::error($e);
  136. return "<div></div>";
  137. }
  138. $tpl_list = $dom->xpath('//MdTpl');
  139. foreach ($tpl_list as $key => $tpl) {
  140. foreach ($tpl->children() as $param) {
  141. # 处理每个参数
  142. if($param->getName() === "param"){
  143. foreach($param->attributes() as $pa => $pa_value){
  144. $pValue = $pa_value->__toString();
  145. if($pa === "name" && $pValue === "id"){
  146. if($param->__toString() === $id){
  147. return $tpl->asXML();
  148. }
  149. }
  150. }
  151. }
  152. }
  153. }
  154. return "<div></div>";
  155. }
  156. public static function take_sentence(string $xml):array{
  157. $output = [];
  158. try{
  159. $dom = simplexml_load_string($xml);
  160. }catch(\Exception $e){
  161. Log::error($e);
  162. return $output;
  163. }
  164. $tpl_list = $dom->xpath('//MdTpl');
  165. foreach ($tpl_list as $key => $tpl) {
  166. foreach($tpl->attributes() as $a => $a_value){
  167. if($a==="name"){
  168. if($a_value->__toString() ==="sent"){
  169. foreach ($tpl->children() as $param) {
  170. # 处理每个参数
  171. if($param->getName() === "param"){
  172. $sent = $param->__toString();
  173. if(!empty($sent)){
  174. $output[] = $sent;
  175. break;
  176. }
  177. }
  178. }
  179. }
  180. }
  181. }
  182. }
  183. return $output;
  184. }
  185. private function xml2tpl(string $xml, $channelId=[]):string{
  186. /**
  187. * 解析xml
  188. * 获取模版参数
  189. * 生成react 组件参数
  190. */
  191. try{
  192. //$dom = simplexml_load_string($xml);
  193. $doc = new \DOMDocument();
  194. $xml = str_replace('MdTpl','dfn',$xml);
  195. $xml = mb_convert_encoding($xml, 'HTML-ENTITIES', "UTF-8");
  196. $ok = $doc->loadHTML($xml,LIBXML_NOERROR | LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
  197. }catch(\Exception $e){
  198. Log::error($e);
  199. Log::error($xml);
  200. return "<span>xml解析错误{$e}</span>";
  201. }
  202. if(!$ok){
  203. return "<span>xml解析错误</span>";
  204. }
  205. /*
  206. if(!$dom){
  207. Log::error($xml);
  208. return "<span>xml解析错误</span>";
  209. }
  210. */
  211. //$tpl_list = $dom->xpath('//MdTpl');
  212. $tpl_list = $doc->getElementsByTagName('dfn');
  213. foreach ($tpl_list as $key => $tpl) {
  214. /**
  215. * 遍历 MdTpl 处理参数
  216. */
  217. $props = [];
  218. $tpl_name = '';
  219. foreach($tpl->attributes as $a => $a_value){
  220. if($a_value->nodeName==="name"){
  221. $tpl_name = $a_value->nodeValue;
  222. break;
  223. }
  224. }
  225. $param_id = 0;
  226. $child = $tpl->firstChild;
  227. while ($child) {
  228. # 处理每个参数
  229. if($child->nodeName === "span"){
  230. $param_id++;
  231. $paramName = "";
  232. foreach($child->attributes as $pa => $pa_value){
  233. if($pa_value->nodeName === "name"){
  234. $nodeText = $pa_value->nodeValue;
  235. $props["{$nodeText}"] = $child->nodeValue;
  236. $paramName = $pa_value;
  237. }
  238. }
  239. if(empty($paramName)){
  240. foreach ($child->childNodes as $param_child) {
  241. # code...
  242. if($param_child->nodeType ===3){
  243. $props["{$param_id}"] = $param_child->nodeValue;
  244. }
  245. }
  246. }
  247. }
  248. $child = $child->nextSibling;
  249. }
  250. /**
  251. * 生成模版参数
  252. *
  253. */
  254. //TODO 判断$channelId里面的是否都是uuid
  255. $channelInfo = [];
  256. foreach ($channelId as $key => $id) {
  257. $channelInfo[] = Channel::where('uid',$id)->first();
  258. }
  259. $tplRender = new TemplateRender($props,
  260. $channelInfo,
  261. $this->options['mode'],
  262. $this->options['format'],
  263. $this->options['studioId'],
  264. $this->options['debug'],
  265. $this->options['lang'],
  266. );
  267. $tplProps = $tplRender->render($tpl_name);
  268. if($this->options['format']==='react' && $tplProps){
  269. $props = $doc->createAttribute("props");
  270. $props->nodeValue = $tplProps['props'];
  271. $tpl->appendChild($props);
  272. $attTpl = $doc->createAttribute("tpl");
  273. $attTpl->nodeValue = $tplProps['tpl'];
  274. $tpl->appendChild($attTpl);
  275. $htmlElement = $doc->createElement($tplProps['tag']);
  276. $htmlElement->nodeValue=$tplProps['html'];
  277. $tpl->appendChild($htmlElement);
  278. }
  279. }
  280. $html = $doc->saveHTML();
  281. $html = str_replace(['<dfn','</dfn>'],['<MdTpl','</MdTpl>'],$html);
  282. switch ($this->options['format']) {
  283. case 'react':
  284. return trim($html);
  285. break;
  286. case 'unity':
  287. if($tplProps){
  288. return "{{"."{$tplProps['tpl']}|{$tplProps['props']}"."}}";
  289. }else{
  290. return '';
  291. }
  292. break;
  293. case 'html':
  294. if(isset($tplProps)){
  295. if(is_array($tplProps)){
  296. return '';
  297. }else{
  298. return $tplProps;
  299. }
  300. }else{
  301. Log::error('tplProps undefine');
  302. return '';
  303. }
  304. break;
  305. case 'text':
  306. case 'simple':
  307. if(isset($tplProps)){
  308. if(is_array($tplProps)){
  309. return '';
  310. }else{
  311. return $tplProps;
  312. }
  313. }else{
  314. Log::error('tplProps undefine');
  315. return '';
  316. }
  317. break;
  318. case 'tex':
  319. if(isset($tplProps)){
  320. if(is_array($tplProps)){
  321. return '';
  322. }else{
  323. return $tplProps;
  324. }
  325. }else{
  326. Log::error('tplProps undefine');
  327. return '';
  328. }
  329. break;
  330. default:
  331. return '';
  332. break;
  333. }
  334. }
  335. private function markdown2wiki(string $markdown): string{
  336. //$markdown = mb_convert_encoding($markdown,'UTF-8','UTF-8');
  337. $markdown = iconv('UTF-8','UTF-8//IGNORE',$markdown);
  338. /**
  339. * nissaya
  340. * aaa=bbb\n
  341. * {{nissaya|aaa|bbb}}
  342. */
  343. if($this->options['channelType']==='nissaya'){
  344. if($this->options['contentType'] === "json"){
  345. $json = json_decode($markdown);
  346. $nissayaWord = [];
  347. if(is_array($json)){
  348. foreach ($json as $word) {
  349. if(count($word->sn) === 1){
  350. //只输出第一层级
  351. $str = "{{nissaya|";
  352. if(isset($word->word->value)){
  353. $str .= $word->word->value;
  354. }
  355. $str .= "|";
  356. if(isset($word->meaning->value)){
  357. $str .= $word->meaning->value;
  358. }
  359. $str .= "}}";
  360. $nissayaWord[] = $str;
  361. }
  362. }
  363. }else{
  364. Log::error('json data is not array',['data'=>$markdown]);
  365. }
  366. $markdown = implode('',$nissayaWord);
  367. }else if($this->options['contentType'] === "markdown"){
  368. $lines = explode("\n",$markdown);
  369. $newLines = array();
  370. foreach ($lines as $line) {
  371. if(strstr($line,'=') === FALSE){
  372. $newLines[] = $line;
  373. }else{
  374. $nissaya = explode('=',$line);
  375. $meaning = array_slice($nissaya,1);
  376. $meaning = implode('=',$meaning);
  377. $newLines[] = "{{nissaya|{$nissaya[0]}|{$meaning}}}";
  378. }
  379. }
  380. $markdown = implode("\n",$newLines);
  381. }
  382. }
  383. //$markdown = preg_replace("/\n\n/","<div></div>",$markdown);
  384. /**
  385. * 处理 mermaid
  386. */
  387. if(strpos($markdown,"```mermaid") !== false){
  388. $lines = explode("\n",$markdown);
  389. $newLines = array();
  390. $mermaidBegin = false;
  391. $mermaidString = array();
  392. foreach ($lines as $line) {
  393. if($line === "```mermaid"){
  394. $mermaidBegin = true;
  395. $mermaidString = [];
  396. continue;
  397. }
  398. if($mermaidBegin){
  399. if($line === "```"){
  400. $newLines[] = "{{mermaid|".base64_encode(\json_encode($mermaidString))."}}";
  401. $mermaidBegin = false;
  402. }else{
  403. $mermaidString[] = $line;
  404. }
  405. }else{
  406. $newLines[] = $line;
  407. }
  408. }
  409. $markdown = implode("\n",$newLines);
  410. }
  411. /**
  412. * 替换换行符
  413. * react 无法处理 <br> 替换为<div></div>代替换行符作用
  414. */
  415. //$markdown = str_replace('<br>','<div></div>',$markdown);
  416. /**
  417. * markdown -> html
  418. */
  419. /*
  420. $html = MdRender::fixHtml($html);
  421. */
  422. #替换术语
  423. $pattern = "/\[\[(.+?)\]\]/";
  424. $replacement = '{{term|$1}}';
  425. $markdown = preg_replace($pattern,$replacement,$markdown);
  426. #替换句子模版
  427. $pattern = "/\{\{([0-9].+?)\}\}/";
  428. $replacement = '{{sent|$1}}';
  429. $markdown = preg_replace($pattern,$replacement,$markdown);
  430. /**
  431. * 替换多行注释
  432. * ```
  433. * bla
  434. * bla
  435. * ```
  436. * {{note|
  437. * bla
  438. * bla
  439. * }}
  440. */
  441. if(strpos($markdown,"```\n") !== false){
  442. $lines = explode("\n",$markdown);
  443. $newLines = array();
  444. $noteBegin = false;
  445. $noteString = array();
  446. foreach ($lines as $line) {
  447. if($noteBegin){
  448. if($line === "```"){
  449. $newLines[] = "}}";
  450. $noteBegin = false;
  451. }else{
  452. $newLines[] = $line;
  453. }
  454. }else{
  455. if($line === "```"){
  456. $noteBegin = true;
  457. $newLines[] = "{{note|";
  458. continue;
  459. }else{
  460. $newLines[] = $line;
  461. }
  462. }
  463. }
  464. if($noteBegin){
  465. $newLines[] = "}}";
  466. }
  467. $markdown = implode("\n",$newLines);
  468. }
  469. /**
  470. * 替换单行注释
  471. * `bla bla`
  472. * {{note|bla}}
  473. */
  474. $pattern = "/`(.+?)`/";
  475. $replacement = '{{note|$1}}';
  476. $markdown = preg_replace($pattern,$replacement,$markdown);
  477. return $markdown;
  478. }
  479. private function markdownToHtml($markdown){
  480. $markdown = str_replace('MdTpl','mdtpl',$markdown);
  481. $markdown = str_replace(['<param','</param>'],['<span','</span>'],$markdown);
  482. $html = Markdown::render($markdown);
  483. if($this->options['format']==='react'){
  484. $html = $this->fixHtml($html);
  485. }
  486. $html = str_replace('<hr>','<hr />',$html);
  487. //给H1-6 添加uuid
  488. for ($i=1; $i<7 ; $i++) {
  489. if(strpos($html,"<h{$i}>")===false){
  490. continue;
  491. }
  492. $output = array();
  493. $input = $html;
  494. $hPos = strpos($input,"<h{$i}>");
  495. while ($hPos !== false) {
  496. $output[] = substr($input,0,$hPos);
  497. $output[] = "<h{$i} id='".Str::uuid()."'>";
  498. $input = substr($input,$hPos+4);
  499. $hPos = strpos($input,"<h{$i}>");
  500. }
  501. $output[] = $input;
  502. $html = implode('',$output);
  503. }
  504. $html = str_replace('mdtpl','MdTpl',$html);
  505. return $html;
  506. }
  507. private function fixHtml($html) {
  508. $doc = new \DOMDocument();
  509. libxml_use_internal_errors(true);
  510. $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
  511. $doc->loadHTML('<span>'.$html.'</span>',LIBXML_NOERROR | LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
  512. $fixed = $doc->saveHTML();
  513. $fixed = mb_convert_encoding($fixed, "UTF-8", 'HTML-ENTITIES');
  514. return $fixed;
  515. }
  516. public static function init(){
  517. $GLOBALS["MdRenderStack"] = 0;
  518. }
  519. public function convert($markdown,$channelId=[],$queryId=null){
  520. if(isset($GLOBALS["MdRenderStack"]) && is_numeric($GLOBALS["MdRenderStack"])){
  521. $GLOBALS["MdRenderStack"]++;
  522. }else{
  523. $GLOBALS["MdRenderStack"] = 1;
  524. }
  525. if($GLOBALS["MdRenderStack"]<3){
  526. $output = $this->_convert($markdown,$channelId,$queryId);
  527. }else{
  528. $output = $markdown;
  529. }
  530. $GLOBALS["MdRenderStack"]--;
  531. return $output;
  532. }
  533. private function _convert($markdown,$channelId=[],$queryId=null){
  534. if(empty($markdown)){
  535. switch ($this->options['format']) {
  536. case 'react':
  537. return "<span></span>";
  538. break;
  539. default:
  540. return "";
  541. break;
  542. }
  543. }
  544. $wiki = $this->markdown2wiki($markdown);
  545. $html = $this->wiki2xml($wiki,$channelId);
  546. if(!is_null($queryId)){
  547. $html = $this->xmlQueryId($html, $queryId);
  548. }
  549. $html = $this->markdownToHtml($html);
  550. //后期处理
  551. $output = '';
  552. switch ($this->options['format']) {
  553. case 'react':
  554. //生成可展开组件
  555. $html = str_replace("<div/>","<div></div>",$html);
  556. $pattern = '/<li><div>(.+?)<\/div><\/li>/';
  557. $replacement = '<li><MdTpl name="toggle" tpl="toggle" props=""><div>$1</div></MdTpl></li>';
  558. $output = preg_replace($pattern,$replacement,$html);
  559. break;
  560. case 'text':
  561. case 'simple':
  562. $html = strip_tags($html);
  563. $output = htmlspecialchars_decode($html,ENT_QUOTES);
  564. //$output = html_entity_decode($html);
  565. break;
  566. case 'tex':
  567. $html = strip_tags($html);
  568. $output = htmlspecialchars_decode($html,ENT_QUOTES);
  569. //$output = html_entity_decode($html);
  570. break;
  571. case 'unity':
  572. $html = str_replace(['<strong>','</strong>','<em>','</em>'],['[%b%]','[%/b%]','[%i%]','[%/i%]'],$html);
  573. $html = strip_tags($html);
  574. $html = str_replace(['[%b%]','[%/b%]','[%i%]','[%/i%]'],['<b>','</b>','<i>','</i>'],$html);
  575. $output = htmlspecialchars_decode($html,ENT_QUOTES);
  576. break;
  577. case 'html':
  578. $output = htmlspecialchars_decode($html,ENT_QUOTES);
  579. break;
  580. }
  581. return $output;
  582. }
  583. /**
  584. * string[] $channelId
  585. */
  586. public static function render($markdown,$channelId,$queryId=null,$mode='read',$channelType='translation',$contentType="markdown",$format='react'){
  587. $mdRender = new MdRender(
  588. [
  589. 'mode'=>$mode,
  590. 'channelType'=>$channelType,
  591. 'contentType'=>$contentType,
  592. 'format'=>$format
  593. ]);
  594. $output = $mdRender->convert($markdown,$channelId,$queryId);
  595. return $output;
  596. }
  597. }