MarkdownHelper.php 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. <?php
  2. namespace App\Helpers;
  3. use Illuminate\Support\Str;
  4. class MarkdownHelper
  5. {
  6. /**
  7. * 将 Markdown 字符串按段落、标题、表格、列表、代码块等规则拆分成数组
  8. *
  9. * @param string $markdown
  10. * @return array
  11. */
  12. public static function splitByParagraphs(string $markdown): array
  13. {
  14. // 保护代码块内容,防止内部换行被拆分
  15. $codeBlocks = [];
  16. $markdown = preg_replace_callback('/```(.*?)```/s', function ($matches) use (&$codeBlocks) {
  17. $placeholder = '%%CODE_BLOCK_' . count($codeBlocks) . '%%';
  18. $codeBlocks[$placeholder] = $matches[0];
  19. return $placeholder;
  20. }, $markdown);
  21. // 按两个及以上换行符拆分
  22. $parts = preg_split('/\n\s*\n/', $markdown);
  23. $result = [];
  24. foreach ($parts as $part) {
  25. $part = trim($part);
  26. if ($part === '') {
  27. continue;
  28. }
  29. // 恢复代码块
  30. foreach ($codeBlocks as $placeholder => $codeBlock) {
  31. if (strpos($part, $placeholder) !== false) {
  32. $part = str_replace($placeholder, $codeBlock, $part);
  33. }
  34. }
  35. // 进一步按标题、表格、列表拆分(这些通常不会被两个换行分隔)
  36. $subParts = self::splitBySpecialBlocks($part);
  37. foreach ($subParts as $subPart) {
  38. $subPart = trim($subPart);
  39. if ($subPart !== '') {
  40. $result[] = $subPart;
  41. }
  42. }
  43. }
  44. return $result;
  45. }
  46. /**
  47. * 按标题、表格、列表等特殊块进一步拆分
  48. *
  49. * @param string $text
  50. * @return array
  51. */
  52. protected static function splitBySpecialBlocks(string $text): array
  53. {
  54. // 保护代码块(防止被误拆)
  55. $codeBlocks = [];
  56. $text = preg_replace_callback('/```(.*?)```/s', function ($matches) use (&$codeBlocks) {
  57. $placeholder = '%%CODE_BLOCK_' . count($codeBlocks) . '%%';
  58. $codeBlocks[$placeholder] = $matches[0];
  59. return $placeholder;
  60. }, $text);
  61. // 按标题 (#, ##, 等)
  62. $lines = preg_split('/(^#{1,6}\s+.*$)/m', $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
  63. $result = [];
  64. foreach ($lines as $line) {
  65. $line = trim($line);
  66. if ($line === '') {
  67. continue;
  68. }
  69. // 如果包含表格(简单判断:包含 | 且多个行)
  70. if (strpos($line, '|') !== false && substr_count($line, "\n") >= 1) {
  71. $rows = explode("\n", $line);
  72. $table = [];
  73. foreach ($rows as $row) {
  74. if (trim($row) !== '') {
  75. $table[] = $row;
  76. }
  77. }
  78. if (!empty($table)) {
  79. $result[] = implode("\n", $table);
  80. }
  81. continue;
  82. }
  83. // 如果包含列表(无序列表 - 或 *,有序列表 1. 2.)
  84. if (preg_match('/^(\s*[-*+]\s+|\s*\d+\.\s+)/m', $line)) {
  85. // 保留整个列表块(连续列表行)
  86. $result[] = $line;
  87. continue;
  88. }
  89. // 恢复代码块
  90. foreach ($codeBlocks as $placeholder => $codeBlock) {
  91. if (strpos($line, $placeholder) !== false) {
  92. $line = str_replace($placeholder, $codeBlock, $line);
  93. }
  94. }
  95. $result[] = $line;
  96. }
  97. return $result;
  98. }
  99. }