visuddhinanda 2 месяцев назад
Родитель
Сommit
c908d59b4e

+ 122 - 0
api-v12/app/Services/NissayaParser.php

@@ -0,0 +1,122 @@
+<?php
+
+namespace App\Services;
+
+class NissayaParser
+{
+    /**
+     * 解析nissaya巴利文-缅文文本
+     *
+     * @param string $content
+     * @return array
+     */
+    public function parse(string $content): array
+    {
+        $lines = explode("\n", $content);
+        $records = [];
+        $currentRecord = null;
+        $pendingNotes = [];
+        $inCodeBlock = false;
+        $codeBlockContent = '';
+        $codeBlockDelimiter = '';
+
+        for ($i = 0; $i < count($lines); $i++) {
+            $line = $lines[$i];
+            $trimmedLine = trim($line);
+
+            // 检测代码块开始/结束 (支持 ``` 和 ``)
+            if (preg_match('/^(```|``)$/', $trimmedLine, $matches)) {
+                if (!$inCodeBlock) {
+                    // 开始代码块
+                    $inCodeBlock = true;
+                    $codeBlockDelimiter = $matches[1];
+                    $codeBlockContent = '';
+                } elseif ($matches[1] === $codeBlockDelimiter) {
+                    // 结束代码块
+                    $inCodeBlock = false;
+                    $pendingNotes[] = trim($codeBlockContent);
+                    $codeBlockContent = '';
+                    $codeBlockDelimiter = '';
+                }
+                continue;
+            }
+
+            // 在代码块内
+            if ($inCodeBlock) {
+                $codeBlockContent .= $line . "\n";
+                continue;
+            }
+
+            // 空行跳过
+            if (empty($trimmedLine)) {
+                continue;
+            }
+
+            // 检查是否包含等号
+            if (strpos($line, '=') !== false) {
+                // 检查是否是以等号开头(补充上一条记录的翻译)
+                if (strpos(ltrim($line), '=') === 0) {
+                    // 这是对上一条记录的翻译补充
+                    if ($currentRecord !== null && empty($currentRecord['translation'])) {
+                        $currentRecord['translation'] = trim(substr(ltrim($line), 1));
+                    }
+                } else {
+                    // 保存之前的记录
+                    if ($currentRecord !== null) {
+                        $currentRecord['notes'] = $pendingNotes;
+                        $records[] = $currentRecord;
+                        $pendingNotes = [];
+                    }
+
+                    // 解析新记录
+                    list($pali, $translation) = explode('=', $line, 2);
+                    $currentRecord = [
+                        'pali' => trim($pali),
+                        'translation' => trim($translation),
+                        'notes' => []
+                    ];
+                }
+            } else {
+                // 没有等号的行
+                if ($currentRecord !== null && empty($currentRecord['translation'])) {
+                    // 情况1: 上一行只有巴利文(等号后为空),当前行是缅文翻译
+                    $currentRecord['translation'] = trim($line);
+                } elseif ($currentRecord === null) {
+                    // 情况2: 第一行没有等号,可能是不完整的巴利文
+                    $currentRecord = [
+                        'pali' => trim($line),
+                        'translation' => '',
+                        'notes' => []
+                    ];
+                } else {
+                    // 其他情况视为注释内容
+                    $pendingNotes[] = trim($line);
+                }
+            }
+        }
+
+        // 保存最后一条记录
+        if ($currentRecord !== null) {
+            $currentRecord['notes'] = $pendingNotes;
+            $records[] = $currentRecord;
+        }
+
+        return $records;
+    }
+
+    /**
+     * 解析文件
+     *
+     * @param string $filePath
+     * @return array
+     */
+    public function parseFile(string $filePath): array
+    {
+        if (!file_exists($filePath)) {
+            throw new \InvalidArgumentException("文件不存在: {$filePath}");
+        }
+
+        $content = file_get_contents($filePath);
+        return $this->parse($content);
+    }
+}

+ 738 - 0
api-v12/app/Services/PaliTransliterationService.php

@@ -0,0 +1,738 @@
+<?php
+
+namespace App\Services;
+
+/**
+ * 控制器中
+public function convert(PaliTransliterationService $pali)
+{
+    $result = $pali->myanmarToRoman('သမ္မာ');
+}
+
+// 或者使用 app() 助手函数
+$pali = app(PaliTransliterationService::class);
+$result = $pali->thaiToRoman('สมฺมา');
+ */
+class PaliTransliterationService
+{
+    /**
+     * 缅文字母映射表
+     */
+    private const MYANMAR_CHARS = [
+        "ႁႏၵ",
+        "ခ္",
+        "ဃ္",
+        "ဆ္",
+        "ဈ္",
+        "ည္",
+        "ဌ္",
+        "ဎ္",
+        "ထ္",
+        "ဓ္",
+        "ဖ္",
+        "ဘ္",
+        "က္",
+        "ဂ္",
+        "စ္",
+        "ဇ္",
+        "ဉ္",
+        "ဠ္",
+        "ဋ္",
+        "ဍ္",
+        "ဏ္",
+        "တ္",
+        "ဒ္",
+        "န္",
+        "ဟ္",
+        "ပ္",
+        "ဗ္",
+        "မ္",
+        "ယ္",
+        "ရ္",
+        "လ္",
+        "ဝ္",
+        "သ္",
+        "င္",
+        "င်္",
+        "ဿ",
+        "ခ",
+        "ဃ",
+        "ဆ",
+        "ဈ",
+        "စျ",
+        "ည",
+        "ဌ",
+        "ဎ",
+        "ထ",
+        "ဓ",
+        "ဖ",
+        "ဘ",
+        "က",
+        "ဂ",
+        "စ",
+        "ဇ",
+        "ဉ",
+        "ဠ",
+        "ဋ",
+        "ဍ",
+        "ဏ",
+        "တ",
+        "ဒ",
+        "န",
+        "ဟ",
+        "ပ",
+        "ဗ",
+        "မ",
+        "ယ",
+        "ရ",
+        "႐",
+        "လ",
+        "ဝ",
+        "သ",
+        "aျ္",
+        "aွ္",
+        "aြ္",
+        "aြ",
+        "ၱ",
+        "ၳ",
+        "ၵ",
+        "ၶ",
+        "ၬ",
+        "ၭ",
+        "ၠ",
+        "ၡ",
+        "ၢ",
+        "ၣ",
+        "ၸ",
+        "ၹ",
+        "ၺ",
+        "႓",
+        "ၥ",
+        "ၧ",
+        "ၨ",
+        "ၩ",
+        "်",
+        "ျ",
+        "ႅ",
+        "ၼ",
+        "ွ",
+        "ႇ",
+        "ႆ",
+        "ၷ",
+        "ၲ",
+        "႒",
+        "႗",
+        "ၯ",
+        "ၮ",
+        "႑",
+        "kaၤ",
+        "gaၤ",
+        "khaၤ",
+        "ghaၤ",
+        "aှ",
+        "aိံ",
+        "aုံ",
+        "aော",
+        "aေါ",
+        "aအံ",
+        "aဣံ",
+        "aဥံ",
+        "aံ",
+        "aာ",
+        "aါ",
+        "aိ",
+        "aီ",
+        "aု",
+        "aဳ",
+        "aူ",
+        "aေ",
+        "အါ",
+        "အာ",
+        "အ",
+        "ဣ",
+        "ဤ",
+        "ဥ",
+        "ဦ",
+        "ဧ",
+        "ဩ",
+        "ႏ",
+        "ၪ",
+        "a္",
+        "္",
+        "aံ",
+        "ေss",
+        "ေkh",
+        "ေgh",
+        "ေch",
+        "ေjh",
+        "ေññ",
+        "ေṭh",
+        "ေḍh",
+        "ေth",
+        "ေdh",
+        "ေph",
+        "ေbh",
+        "ေk",
+        "ေg",
+        "ေc",
+        "ေj",
+        "ေñ",
+        "ေḷ",
+        "ေṭ",
+        "ေḍ",
+        "ေṇ",
+        "ေt",
+        "ေd",
+        "ေn",
+        "ေh",
+        "ေp",
+        "ေb",
+        "ေm",
+        "ေy",
+        "ေr",
+        "ေl",
+        "ေv",
+        "ေs",
+        "ေy",
+        "ေv",
+        "ေr",
+        "ea",
+        "eā",
+        "၁",
+        "၂",
+        "၃",
+        "၄",
+        "၅",
+        "၆",
+        "၇",
+        "၈",
+        "၉",
+        "၀",
+        "း",
+        "့",
+        "။",
+        "၊"
+    ];
+
+    /**
+     * 罗马巴利字母映射表
+     */
+    private const ROMAN_CHARS = [
+        "ndra",
+        "kh",
+        "gh",
+        "ch",
+        "jh",
+        "ññ",
+        "ṭh",
+        "ḍh",
+        "th",
+        "dh",
+        "ph",
+        "bh",
+        "k",
+        "g",
+        "c",
+        "j",
+        "ñ",
+        "ḷ",
+        "ṭ",
+        "ḍ",
+        "ṇ",
+        "t",
+        "d",
+        "n",
+        "h",
+        "p",
+        "b",
+        "m",
+        "y",
+        "r",
+        "l",
+        "v",
+        "s",
+        "ṅ",
+        "ṅ",
+        "ssa",
+        "kha",
+        "gha",
+        "cha",
+        "jha",
+        "jha",
+        "ñña",
+        "ṭha",
+        "ḍha",
+        "tha",
+        "dha",
+        "pha",
+        "bha",
+        "ka",
+        "ga",
+        "ca",
+        "ja",
+        "ña",
+        "ḷa",
+        "ṭa",
+        "ḍa",
+        "ṇa",
+        "ta",
+        "da",
+        "na",
+        "ha",
+        "pa",
+        "ba",
+        "ma",
+        "ya",
+        "ra",
+        "ra",
+        "la",
+        "va",
+        "sa",
+        "ya",
+        "va",
+        "ra",
+        "ra",
+        "្ta",
+        "្tha",
+        "្da",
+        "្dha",
+        "្ṭa",
+        "្ṭha",
+        "្ka",
+        "្kha",
+        "្ga",
+        "្gha",
+        "្pa",
+        "្pha",
+        "្ba",
+        "្bha",
+        "្ca",
+        "្cha",
+        "្ja",
+        "្jha",
+        "្a",
+        "្ya",
+        "្la",
+        "្ma",
+        "្va",
+        "្ha",
+        "ssa",
+        "na",
+        "ta",
+        "ṭṭha",
+        "ṭṭa",
+        "ḍḍha",
+        "ḍḍa",
+        "ṇḍa",
+        "ṅka",
+        "ṅga",
+        "ṅkha",
+        "ṅgha",
+        "ha",
+        "iṃ",
+        "uṃ",
+        "o",
+        "o",
+        "aṃ",
+        "iṃ",
+        "uṃ",
+        "aṃ",
+        "ā",
+        "ā",
+        "i",
+        "ī",
+        "u",
+        "u",
+        "ū",
+        "e",
+        "ā",
+        "ā",
+        "a",
+        "i",
+        "ī",
+        "u",
+        "ū",
+        "e",
+        "o",
+        "n",
+        "ñ",
+        "",
+        "",
+        "aṃ",
+        "sse",
+        "khe",
+        "ghe",
+        "che",
+        "jhe",
+        "ññe",
+        "ṭhe",
+        "ḍhe",
+        "the",
+        "dhe",
+        "phe",
+        "bhe",
+        "ke",
+        "ge",
+        "ce",
+        "je",
+        "ñe",
+        "ḷe",
+        "ṭe",
+        "ḍe",
+        "ṇe",
+        "te",
+        "de",
+        "ne",
+        "he",
+        "pe",
+        "be",
+        "me",
+        "ye",
+        "re",
+        "le",
+        "ve",
+        "se",
+        "ye",
+        "ve",
+        "re",
+        "e",
+        "o",
+        "1",
+        "2",
+        "3",
+        "4",
+        "5",
+        "6",
+        "7",
+        "8",
+        "9",
+        "0",
+        "\"",
+        "'",
+        ".",
+        ","
+    ];
+
+    /**
+     * 泰文字母映射表
+     */
+    private const THAI_CHARS = [
+        "นฺทฺร",
+        "ขฺ",
+        "ฆฺ",
+        "ฉฺ",
+        "ฌฺ",
+        "ญฺ",
+        "ฐฺ",
+        "ฑฺ",
+        "ถฺ",
+        "ธฺ",
+        "ผฺ",
+        "ภฺ",
+        "กฺ",
+        "คฺ",
+        "จฺ",
+        "ชฺ",
+        "ญฺ",
+        "ฬฺ",
+        "ฏฺ",
+        "ฑฺ",
+        "ณฺ",
+        "ตฺ",
+        "ทฺ",
+        "นฺ",
+        "หฺ",
+        "ปฺ",
+        "พฺ",
+        "มฺ",
+        "ยฺ",
+        "รฺ",
+        "ลฺ",
+        "วฺ",
+        "สฺ",
+        "งฺ",
+        "งฺ",
+        "สฺส",
+        "ข",
+        "ฆ",
+        "ฉ",
+        "ฌ",
+        "ฌ",
+        "ญฺญ",
+        "ฐ",
+        "ฑ",
+        "ถ",
+        "ธ",
+        "ผ",
+        "ภ",
+        "ก",
+        "ค",
+        "จ",
+        "ช",
+        "ญ",
+        "ฬ",
+        "ฏ",
+        "ฑ",
+        "ณ",
+        "ต",
+        "ท",
+        "น",
+        "ห",
+        "ป",
+        "พ",
+        "ม",
+        "ย",
+        "ร",
+        "ร",
+        "ล",
+        "ว",
+        "ส",
+        "ฺย",
+        "ฺว",
+        "ฺร",
+        "ร",
+        "ตฺต",
+        "ตฺถ",
+        "ทฺท",
+        "ทฺธ",
+        "ฏฺฏ",
+        "ฏฺฐ",
+        "กฺก",
+        "ขฺข",
+        "คฺค",
+        "ฆฺฆ",
+        "ปฺป",
+        "ผฺผ",
+        "พฺพ",
+        "ภฺภ",
+        "จฺจ",
+        "ฉฺฉ",
+        "ชฺช",
+        "ฌฺฌ",
+        "ฺ",
+        "ฺย",
+        "ฺล",
+        "ฺม",
+        "ฺว",
+        "ฺห",
+        "สฺส",
+        "น",
+        "ต",
+        "ฏฺฐ",
+        "ฏฺฏ",
+        "ฑฺฒ",
+        "ฑฺฑ",
+        "ณฺฑ",
+        "งฺก",
+        "งฺค",
+        "งฺข",
+        "งฺฆ",
+        "ห",
+        "ิํ",
+        "ุํ",
+        "โอ",
+        "โอ",
+        "อํ",
+        "อิํ",
+        "อุํ",
+        "ํ",
+        "า",
+        "า",
+        "ิ",
+        "ี",
+        "ุ",
+        "ุ",
+        "ู",
+        "เ",
+        "อา",
+        "อา",
+        "อ",
+        "อิ",
+        "อี",
+        "อุ",
+        "อู",
+        "เอ",
+        "โอ",
+        "น",
+        "ญ",
+        "",
+        "ฺ",
+        "ํ",
+        "เสฺส",
+        "เข",
+        "เฆ",
+        "เฉ",
+        "เฌ",
+        "เญฺญ",
+        "เฐ",
+        "เฑ",
+        "เถ",
+        "เธ",
+        "เผ",
+        "เภ",
+        "เก",
+        "เค",
+        "เจ",
+        "เช",
+        "เญ",
+        "เฬ",
+        "เฏ",
+        "เฑ",
+        "เณ",
+        "เต",
+        "เท",
+        "เน",
+        "เห",
+        "เป",
+        "เพ",
+        "เม",
+        "เย",
+        "เร",
+        "เล",
+        "เว",
+        "เส",
+        "เย",
+        "เว",
+        "เร",
+        "เอ",
+        "โอ",
+        "๑",
+        "๒",
+        "๓",
+        "๔",
+        "๕",
+        "๖",
+        "๗",
+        "๘",
+        "๙",
+        "๐",
+        "ํ",
+        "ฺ",
+        "ฯ",
+        "ฯลฯ"
+    ];
+
+    /**
+     * 缅文转罗马巴利
+     *
+     * @param string $input
+     * @return string
+     */
+    public function myanmarToRoman(string $input): string
+    {
+        return str_replace(self::MYANMAR_CHARS, self::ROMAN_CHARS, $input);
+    }
+
+    /**
+     * 罗马巴利转缅文
+     *
+     * @param string $input
+     * @return string
+     */
+    public function romanToMyanmar(string $input): string
+    {
+        // 手动构建映射数组,遇到重复的键时保留第一个
+        $mapping = [];
+        foreach (self::ROMAN_CHARS as $index => $roman) {
+            if (!isset($mapping[$roman])) {
+                $mapping[$roman] = self::MYANMAR_CHARS[$index];
+            }
+        }
+
+        // 按键长度降序排序,优先匹配较长的字符串
+        uksort($mapping, function ($a, $b) {
+            $lenDiff = strlen($b) - strlen($a);
+            if ($lenDiff !== 0) {
+                return $lenDiff;
+            }
+            return strcmp($a, $b);
+        });
+
+        return str_replace(array_keys($mapping), array_values($mapping), $input);
+    }
+
+    /**
+     * 泰文转罗马巴利
+     *
+     * @param string $input
+     * @return string
+     */
+    public function thaiToRoman(string $input): string
+    {
+        return str_replace(self::THAI_CHARS, self::ROMAN_CHARS, $input);
+    }
+
+    /**
+     * 罗马巴利转泰文
+     *
+     * @param string $input
+     * @return string
+     */
+    public function romanToThai(string $input): string
+    {
+        // 手动构建映射数组,遇到重复的键时保留第一个
+        $mapping = [];
+        foreach (self::ROMAN_CHARS as $index => $roman) {
+            if (!isset($mapping[$roman])) {
+                $mapping[$roman] = self::THAI_CHARS[$index];
+            }
+        }
+
+        // 按键长度降序排序,优先匹配较长的字符串
+        uksort($mapping, function ($a, $b) {
+            $lenDiff = strlen($b) - strlen($a);
+            if ($lenDiff !== 0) {
+                return $lenDiff;
+            }
+            return strcmp($a, $b);
+        });
+
+        return str_replace(array_keys($mapping), array_values($mapping), $input);
+    }
+
+    /**
+     * 缅文转泰文
+     *
+     * @param string $input
+     * @return string
+     */
+    public function myanmarToThai(string $input): string
+    {
+        $roman = $this->myanmarToRoman($input);
+        return $this->romanToThai($roman);
+    }
+
+    /**
+     * 泰文转缅文
+     *
+     * @param string $input
+     * @return string
+     */
+    public function thaiToMyanmar(string $input): string
+    {
+        $roman = $this->thaiToRoman($input);
+        return $this->romanToMyanmar($roman);
+    }
+
+    /**
+     * 自动检测并转换为罗马巴利
+     *
+     * @param string $input
+     * @return string
+     */
+    public function toRoman(string $input): string
+    {
+        // 检测是否包含缅文字符
+        if (preg_match('/[\x{1000}-\x{109F}]/u', $input)) {
+            return $this->myanmarToRoman($input);
+        }
+
+        // 检测是否包含泰文字符
+        if (preg_match('/[\x{0E00}-\x{0E7F}]/u', $input)) {
+            return $this->thaiToRoman($input);
+        }
+
+        // 默认返回原文
+        return $input;
+    }
+}

+ 244 - 0
api-v12/tests/Unit/Services/NissayaParserTest.php

@@ -0,0 +1,244 @@
+<?php
+
+namespace Tests\Unit\Services;
+
+use App\Services\NissayaParser;
+use Tests\TestCase;
+
+/**
+ * php artisan test --filter NissayaParserTest
+ */
+class NissayaParserTest extends TestCase
+{
+    private NissayaParser $parser;
+
+    protected function setUp(): void
+    {
+        parent::setUp();
+        $this->parser = new NissayaParser();
+    }
+
+    /**
+     * 测试标准格式解析
+     */
+    public function test_parse_standard_format(): void
+    {
+        $content = "pañcamassa=ပဉ္စမဝဂ်၏\npaṭhame=ပထမသုတ်၌";
+
+        $result = $this->parser->parse($content);
+
+        $this->assertCount(2, $result);
+        $this->assertEquals('pañcamassa', $result[0]['pali']);
+        $this->assertEquals('ပဉ္စမဝဂ်၏', $result[0]['translation']);
+        $this->assertEmpty($result[0]['notes']);
+
+        $this->assertEquals('paṭhame', $result[1]['pali']);
+        $this->assertEquals('ပထမသုတ်၌', $result[1]['translation']);
+    }
+
+    /**
+     * 测试带单个注释块的格式
+     */
+    public function test_parse_with_single_note(): void
+    {
+        $content = "uttānāti=ဥတ္တာနာ-ဟူသည်ကား\n```\nထင်ရှားသော\n```\nappaṭicchannā=ဖုံးကွယ်ခြင်းမရှိသော";
+
+        $result = $this->parser->parse($content);
+
+        $this->assertCount(2, $result);
+        $this->assertEquals('uttānāti', $result[0]['pali']);
+        $this->assertEquals('ဥတ္တာနာ-ဟူသည်ကား', $result[0]['translation']);
+        $this->assertCount(1, $result[0]['notes']);
+        $this->assertEquals('ထင်ရှားသော', $result[0]['notes'][0]);
+    }
+
+    /**
+     * 测试特殊情况1: 巴利文和翻译分离,中间有注释
+     */
+    public function test_parse_separated_pali_and_translation_with_note(): void
+    {
+        $content = "uttānāti\n```\nထင်ရှားသော\n```\n=ဥတ္တာနာ-ဟူသည်ကား";
+
+        $result = $this->parser->parse($content);
+
+        $this->assertCount(1, $result);
+        $this->assertEquals('uttānāti', $result[0]['pali']);
+        $this->assertContains('ထင်ရှားသော', $result[0]['notes']);
+    }
+
+    /**
+     * 测试特殊情况2: 等号在上一行
+     */
+    public function test_parse_with_equal_sign_on_previous_line(): void
+    {
+        $content = "uttānāti=\n```\nထင်ရှားသော\n```\nဥတ္တာနာ-ဟူသည်ကား";
+
+        $result = $this->parser->parse($content);
+
+        $this->assertCount(1, $result);
+        $this->assertEquals('uttānāti', $result[0]['pali']);
+        $this->assertEquals('ဥတ္တာနာ-ဟူသည်ကား', $result[0]['translation']);
+        $this->assertCount(1, $result[0]['notes']);
+        $this->assertEquals('ထင်ရှားသော', $result[0]['notes'][0]);
+    }
+
+    /**
+     * 测试多个注释块
+     */
+    public function test_parse_with_multiple_notes(): void
+    {
+        $content = "uttānāti=ဥတ္တာနာ-ဟူသည်ကား\n```\nထင်ရှားသော\n```\n```\n第二个注释\n```";
+
+        $result = $this->parser->parse($content);
+
+        $this->assertCount(1, $result);
+        $this->assertEquals('uttānāti', $result[0]['pali']);
+        $this->assertCount(2, $result[0]['notes']);
+        $this->assertEquals('ထင်ရှားသော', $result[0]['notes'][0]);
+        $this->assertEquals('第二个注释', $result[0]['notes'][1]);
+    }
+
+    /**
+     * 测试使用``包裹的注释
+     */
+    public function test_parse_with_double_backtick_notes(): void
+    {
+        $content = "uttānāti=ဥတ္တာနာ-ဟူသည်ကား\n``\n注释内容\n``";
+
+        $result = $this->parser->parse($content);
+
+        $this->assertCount(1, $result);
+        $this->assertCount(1, $result[0]['notes']);
+        $this->assertEquals('注释内容', $result[0]['notes'][0]);
+    }
+
+    /**
+     * 测试复杂的混合格式
+     */
+    public function test_parse_complex_mixed_format(): void
+    {
+        $content = <<<TEXT
+pañcamassa=ပဉ္စမဝဂ်၏
+paṭhame=ပထမသုတ်၌
+uttānāti=ဥတ္တာနာ-ဟူသည်ကား
+```
+ထင်ရှားသော
+```
+appaṭicchannā=ဖုံးကွယ်ခြင်းမရှိသော
+``
+另一种注释格式
+``
+dhammā=ဓမ္မာ-ဟူသည်ကား
+```
+第一个注释
+```
+```
+第二个注释
+```
+TEXT;
+
+        $result = $this->parser->parse($content);
+
+        $this->assertCount(5, $result);
+
+        // 第一条记录
+        $this->assertEquals('pañcamassa', $result[0]['pali']);
+        $this->assertEmpty($result[0]['notes']);
+
+        // 第二条记录
+        $this->assertEquals('paṭhame', $result[1]['pali']);
+
+        // 第三条记录 - 有单个注释
+        $this->assertEquals('uttānāti', $result[2]['pali']);
+        $this->assertCount(1, $result[2]['notes']);
+
+        // 第四条记录 - 有``格式注释
+        $this->assertEquals('appaṭicchannā', $result[3]['pali']);
+        $this->assertCount(1, $result[3]['notes']);
+        $this->assertEquals('另一种注释格式', $result[3]['notes'][0]);
+
+        // 第五条记录 - 有两个注释
+        $this->assertEquals('dhammā', $result[4]['pali']);
+        $this->assertCount(2, $result[4]['notes']);
+        $this->assertEquals('第一个注释', $result[4]['notes'][0]);
+        $this->assertEquals('第二个注释', $result[4]['notes'][1]);
+    }
+
+    /**
+     * 测试空内容
+     */
+    public function test_parse_empty_content(): void
+    {
+        $result = $this->parser->parse('');
+
+        $this->assertEmpty($result);
+    }
+
+    /**
+     * 测试只有空行的内容
+     */
+    public function test_parse_only_blank_lines(): void
+    {
+        $result = $this->parser->parse("\n\n\n");
+
+        $this->assertEmpty($result);
+    }
+
+    /**
+     * 测试文件解析 - Mock Storage
+     */
+    public function test_parse_file(): void
+    {
+        // 创建临时测试文件
+        $testContent = "pañcamassa=ပဉ္စမဝဂ်၏\npaṭhame=ပထမသုတ်၌";
+        $tempFile = tempnam(sys_get_temp_dir(), 'pali_test_');
+        file_put_contents($tempFile, $testContent);
+
+        try {
+            $result = $this->parser->parseFile($tempFile);
+
+            $this->assertCount(2, $result);
+            $this->assertEquals('pañcamassa', $result[0]['pali']);
+            $this->assertEquals('paṭhame', $result[1]['pali']);
+        } finally {
+            // 清理临时文件
+            if (file_exists($tempFile)) {
+                unlink($tempFile);
+            }
+        }
+    }
+
+    /**
+     * 测试文件不存在的情况
+     */
+    public function test_parse_file_not_found(): void
+    {
+        $this->expectException(\InvalidArgumentException::class);
+        $this->expectExceptionMessage('文件不存在');
+
+        $this->parser->parseFile('/path/to/nonexistent/file.txt');
+    }
+
+    /**
+     * 测试带有多行注释内容的代码块
+     */
+    public function test_parse_multiline_note_content(): void
+    {
+        $content = <<<TEXT
+uttānāti=ဥတ္တာနာ-ဟူသည်ကား
+```
+第一行注释
+第二行注释
+第三行注释
+```
+TEXT;
+
+        $result = $this->parser->parse($content);
+
+        $this->assertCount(1, $result);
+        $this->assertCount(1, $result[0]['notes']);
+        $this->assertStringContainsString('第一行注释', $result[0]['notes'][0]);
+        $this->assertStringContainsString('第二行注释', $result[0]['notes'][0]);
+        $this->assertStringContainsString('第三行注释', $result[0]['notes'][0]);
+    }
+}

+ 253 - 0
api-v12/tests/Unit/Services/PaliTransliterationServiceTest.php

@@ -0,0 +1,253 @@
+<?php
+
+namespace Tests\Unit\Services;
+
+use App\Services\PaliTransliterationService;
+use PHPUnit\Framework\TestCase;
+
+class PaliTransliterationServiceTest extends TestCase
+{
+    private PaliTransliterationService $service;
+
+    protected function setUp(): void
+    {
+        parent::setUp();
+        $this->service = new PaliTransliterationService();
+    }
+
+    /**
+     * 测试缅文转罗马巴利
+     */
+    public function test_myanmar_to_roman_conversion(): void
+    {
+        // 测试基本辅音
+        $this->assertEquals('ka', $this->service->myanmarToRoman('က'));
+        $this->assertEquals('kha', $this->service->myanmarToRoman('ခ'));
+        $this->assertEquals('ga', $this->service->myanmarToRoman('ဂ'));
+
+        // 测试复杂组合
+        $this->assertEquals('ssa', $this->service->myanmarToRoman('ဿ'));
+        $this->assertEquals('ndra', $this->service->myanmarToRoman('ႁႏၵ'));
+
+        // 测试元音标记
+        $this->assertEquals('ā', $this->service->myanmarToRoman('aာ'));
+        $this->assertEquals('i', $this->service->myanmarToRoman('aိ'));
+        $this->assertEquals('ī', $this->service->myanmarToRoman('aီ'));
+
+        // 测试数字
+        $this->assertEquals('1', $this->service->myanmarToRoman('၁'));
+        $this->assertEquals('0', $this->service->myanmarToRoman('၀'));
+
+        // 测试标点符号
+        $this->assertEquals('.', $this->service->myanmarToRoman('။'));
+        $this->assertEquals(',', $this->service->myanmarToRoman('၊'));
+    }
+
+    /**
+     * 测试罗马巴利转缅文
+     */
+    public function test_roman_to_myanmar_conversion(): void
+    {
+        $this->assertEquals('က', $this->service->romanToMyanmar('ka'));
+        $this->assertEquals('ခ', $this->service->romanToMyanmar('kha'));
+        $this->assertEquals('ဿ', $this->service->romanToMyanmar('ssa'));
+        $this->assertEquals('၁', $this->service->romanToMyanmar('1'));
+    }
+
+    /**
+     * 测试泰文转罗马巴利
+     */
+    public function test_thai_to_roman_conversion(): void
+    {
+        // 测试基本辅音
+        $this->assertEquals('ka', $this->service->thaiToRoman('ก'));
+        $this->assertEquals('kha', $this->service->thaiToRoman('ข'));
+        $this->assertEquals('ga', $this->service->thaiToRoman('ค'));
+
+        // 测试复杂组合
+        $this->assertEquals('ssa', $this->service->thaiToRoman('สฺส'));
+        $this->assertEquals('ndra', $this->service->thaiToRoman('นฺทฺร'));
+
+        // 测试元音
+        $this->assertEquals('ā', $this->service->thaiToRoman('า'));
+        $this->assertEquals('i', $this->service->thaiToRoman('ิ'));
+
+        // 测试数字
+        $this->assertEquals('1', $this->service->thaiToRoman('๑'));
+        $this->assertEquals('0', $this->service->thaiToRoman('๐'));
+
+        // 测试标点符号 - 泰文 ฯ 转换为罗马巴利 .
+        $this->assertEquals('.', $this->service->thaiToRoman('ฯ'));
+    }
+
+    /**
+     * 测试罗马巴利转泰文
+     */
+    public function test_roman_to_thai_conversion(): void
+    {
+        $this->assertEquals('ก', $this->service->romanToThai('ka'));
+        $this->assertEquals('ข', $this->service->romanToThai('kha'));
+        $this->assertEquals('สฺส', $this->service->romanToThai('ssa'));
+        $this->assertEquals('๑', $this->service->romanToThai('1'));
+    }
+
+    /**
+     * 测试缅文转泰文
+     */
+    public function test_myanmar_to_thai_conversion(): void
+    {
+        $this->assertEquals('ก', $this->service->myanmarToThai('က'));
+        $this->assertEquals('ข', $this->service->myanmarToThai('ခ'));
+        $this->assertEquals('สฺส', $this->service->myanmarToThai('ဿ'));
+    }
+
+    /**
+     * 测试泰文转缅文
+     */
+    public function test_thai_to_myanmar_conversion(): void
+    {
+        $this->assertEquals('က', $this->service->thaiToMyanmar('ก'));
+        $this->assertEquals('ခ', $this->service->thaiToMyanmar('ข'));
+        $this->assertEquals('ဿ', $this->service->thaiToMyanmar('สฺส'));
+    }
+
+    /**
+     * 测试自动检测 - 缅文输入
+     */
+    public function test_auto_detect_myanmar_input(): void
+    {
+        $input = 'သမ္မာ'; // 缅文
+        $result = $this->service->toRoman($input);
+
+        // 验证结果包含罗马字母和可能的变音符号
+        $this->assertMatchesRegularExpression('/^[a-zA-Zāīūṅñṭḍṇḷṃ]+$/', $result);
+    }
+
+    /**
+     * 测试自动检测 - 泰文输入
+     */
+    public function test_auto_detect_thai_input(): void
+    {
+        $input = 'สมฺมา'; // 泰文
+        $result = $this->service->toRoman($input);
+
+        // 验证结果包含罗马字母和可能的变音符号
+        $this->assertMatchesRegularExpression('/^[a-zA-Zāīūṅñṭḍṇḷṃ]+$/', $result);
+    }
+
+    /**
+     * 测试自动检测 - 罗马文输入(无需转换)
+     */
+    public function test_auto_detect_roman_input(): void
+    {
+        $input = 'sammā';
+        $result = $this->service->toRoman($input);
+
+        // 应该返回原文
+        $this->assertEquals($input, $result);
+    }
+
+    /**
+     * 测试空字符串
+     */
+    public function test_empty_string_conversion(): void
+    {
+        $this->assertEquals('', $this->service->myanmarToRoman(''));
+        $this->assertEquals('', $this->service->thaiToRoman(''));
+        $this->assertEquals('', $this->service->toRoman(''));
+    }
+
+    /**
+     * 测试完整单词转换 - 缅文
+     */
+    public function test_full_word_myanmar_conversion(): void
+    {
+        // 测试"法"这个词
+        $myanmar = 'ဓမ္မ';
+        $expected = 'dhamma';
+
+        $this->assertEquals($expected, $this->service->myanmarToRoman($myanmar));
+    }
+
+    /**
+     * 测试完整单词转换 - 泰文
+     */
+    public function test_full_word_thai_conversion(): void
+    {
+        // 测试"法"这个词
+        $thai = 'ธมฺม';
+        $expected = 'dhamma';
+
+        $this->assertEquals($expected, $this->service->thaiToRoman($thai));
+    }
+
+    /**
+     * 测试往返转换一致性 - 缅文
+     * 注意:由于映射表的特性,往返转换可能不完全一致
+     * 这里测试的是转换后再转回能得到有效的缅文
+     */
+    public function test_myanmar_round_trip_conversion(): void
+    {
+        $original = 'က';
+        $roman = $this->service->myanmarToRoman($original);
+
+        // 验证转换为罗马字母成功
+        $this->assertEquals('ka', $roman);
+
+        // 验证可以转回缅文(可能不完全相同)
+        $backToMyanmar = $this->service->romanToMyanmar($roman);
+        $this->assertNotEmpty($backToMyanmar);
+
+        // 验证再次转换为罗马字母时结果一致
+        $this->assertEquals($roman, $this->service->myanmarToRoman($backToMyanmar));
+    }
+
+    /**
+     * 测试往返转换一致性 - 泰文
+     * 注意:由于映射表的特性,往返转换可能不完全一致
+     * 这里测试的是转换后再转回能得到有效的泰文
+     */
+    public function test_thai_round_trip_conversion(): void
+    {
+        $original = 'ก';
+        $roman = $this->service->thaiToRoman($original);
+
+        // 验证转换为罗马字母成功
+        $this->assertEquals('ka', $roman);
+
+        // 验证可以转回泰文(可能不完全相同)
+        $backToThai = $this->service->romanToThai($roman);
+        $this->assertNotEmpty($backToThai);
+
+        // 验证再次转换为罗马字母时结果一致
+        $this->assertEquals($roman, $this->service->thaiToRoman($backToThai));
+    }
+
+    /**
+     * 测试混合内容(包含未映射字符)
+     */
+    public function test_mixed_content_with_unmapped_characters(): void
+    {
+        $input = 'က test ခ';
+        $result = $this->service->myanmarToRoman($input);
+
+        // 验证缅文被转换,英文保持不变
+        $this->assertStringContainsString('test', $result);
+        $this->assertStringContainsString('ka', $result);
+        $this->assertStringContainsString('kha', $result);
+    }
+
+    /**
+     * 测试特殊组合字符
+     */
+    public function test_special_combined_characters(): void
+    {
+        // 测试鼻音组合
+        $this->assertEquals('ṅka', $this->service->myanmarToRoman('kaၤ'));
+        $this->assertEquals('ṅga', $this->service->myanmarToRoman('gaၤ'));
+
+        // 测试双辅音
+        $this->assertEquals('ṭṭha', $this->service->myanmarToRoman('႒'));
+        $this->assertEquals('ṭṭa', $this->service->myanmarToRoman('႗'));
+    }
+}