WikiContentParser.php 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. <?php
  2. // app/Support/WikiContentParser.php
  3. namespace App\Helpers;
  4. class WikiContentParser
  5. {
  6. /**
  7. * 给 HTML 中的 h1~h3 注入 id,并提取目录结构
  8. * 返回 ['content' => string, 'toc' => array]
  9. */
  10. public static function parse(string $html): array
  11. {
  12. $toc = [];
  13. $slugCount = [];
  14. $content = preg_replace_callback(
  15. '/<(h[123])([^>]*)>(.*?)<\/\1>/si',
  16. function ($matches) use (&$toc, &$slugCount) {
  17. [$full, $tag, $attrs, $inner] = $matches;
  18. if (preg_match('/\bid=["\']([^"\']+)["\']/', $attrs, $m)) {
  19. $id = $m[1];
  20. } else {
  21. $text = strip_tags($inner);
  22. $id = self::slugify($text);
  23. if (isset($slugCount[$id])) {
  24. $slugCount[$id]++;
  25. $id .= '-' . $slugCount[$id];
  26. } else {
  27. $slugCount[$id] = 0;
  28. }
  29. $attrs .= ' id="' . $id . '"';
  30. }
  31. $toc[] = [
  32. 'id' => $id,
  33. 'text' => strip_tags($inner),
  34. 'level' => (int) substr($tag, 1),
  35. ];
  36. return "<{$tag}{$attrs}>{$inner}</{$tag}>";
  37. },
  38. $html
  39. );
  40. // 归一化层级:找最小 level,所有条目 level = level - minLevel + 1
  41. if (!empty($toc)) {
  42. $minLevel = min(array_column($toc, 'level'));
  43. foreach ($toc as &$item) {
  44. $item['level'] = $item['level'] - $minLevel + 1;
  45. }
  46. unset($item);
  47. }
  48. return ['content' => $content, 'toc' => $toc];
  49. }
  50. private static function slugify(string $text): string
  51. {
  52. // 保留中文、字母、数字,其余转连字符
  53. $slug = preg_replace('/[^\p{L}\p{N}]+/u', '-', trim($text));
  54. return strtolower(trim($slug, '-')) ?: 'section';
  55. }
  56. }