5 maanden geleden · 8ac27b6c39
--- a/api-v8/app/Console/Commands/UpgradeSystemCommentary.php
+++ b/api-v8/app/Console/Commands/UpgradeSystemCommentary.php
@@ -0,0 +1,425 @@
 
				+<?php
			
 
				+
			
 
				+namespace App\Console\Commands;
			
 
				+
			
 
				+use Illuminate\Console\Command;
			
 
				+use Illuminate\Support\Facades\Log;
			
 
				+
			
 
				+use App\Models\RelatedParagraph;
			
 
				+use App\Models\BookTitle;
			
 
				+use App\Models\PaliText;
			
 
				+use App\Models\TagMap;
			
 
				+use App\Models\Tag;
			
 
				+use App\Models\PaliSentence;
			
 
				+
			
 
				+use App\Services\SearchPaliDataService;
			
 
				+use App\Services\OpenAIService;
			
 
				+use App\Services\AIModelService;
			
 
				+use App\Services\SentenceService;
			
 
				+
			
 
				+use App\Helpers\LlmResponseParser;
			
 
				+use App\Http\Api\ChannelApi;
			
 
				+
			
 
				+class UpgradeSystemCommentary extends Command
			
 
				+{
			
 
				+    /**
			
 
				+     * The name and signature of the console command.
			
 
				+     * php artisan upgrade:sys.commentary
			
 
				+     * @var string
			
 
				+     */
			
 
				+    protected $signature = 'upgrade:sys.commentary {--book=} {--para=} {--list} {--model=}';
			
 
				+    protected $prompt = <<<md
			
 
				+    你是一个注释对照阅读助手。
			
 
				+    pali 是巴利原文，jsonl格式， 每条记录是一个句子。包括id 和 content 两个字段
			
 
				+    commentary 是pali的注释，jsonl 格式，每条记录是一个句子。包括id 和 content 两个字段
			
 
				+    commentary里面的内容是对pali内容的注释
			
 
				+    commentary里面的黑体字，说明该句子是注释pali中的对应的巴利文。
			
 
				+    你需要按照顺序将commentary中的句子与pali原文对照,。
			
 
				+    输出格式jsonl
			
 
				+    只输出pali数据
			
 
				+    在pali句子数据里面增加一个字段“commentary” 里面放这个句子对应的commentary句子的id
			
 
				+    不要输出content字段，只输出id,commentary字段
			
 
				+    直接输出jsonl数据，无需解释
			
 
				+
			
 
				+**关键规则：**
			
 
				+1. 根据commentary中的句子的意思找到与pali对应的句子
			
 
				+1. 如果commentary中的某个句子**有黑体字**，它应该放在pali中对应巴利词汇出现的句子之后
			
 
				+2. 如果commentary中的某个句子**没有黑体字**，请将其与**上面最近的有黑体字的commentary句子**合并在一起（保持在同一个引用块内），不要单独成行
			
 
				+3. 有些pali原文句子可能没有对应的注释
			
 
				+4. 请不要遗漏任何commentary中的句子，也不要打乱顺序
			
 
				+5. 同时保持pali的句子数量不变，不要增删
			
 
				+6. 应该将全部commentary中的句子都与pali句子对应，不要有遗漏
			
 
				+7. 对照排版时，请保持原来的字体设置：原来是黑体就是黑体，原来不是黑体就不是黑体。尤其是pali巴利原文，请不要改变字体
			
 
				+
			
 
				+**输出范例**
			
 
				+[
			
 
				+    {
			
 
				+        "id": "165-6-112-136",
			
 
				+        "content": "Yepi te, bho gotama, ahesuṃ atītamaddhānaṃ arahanto sammāsambuddhā tepi bhagavanto etaparamaṃyeva sammā bhikkhusaṅghaṃ paṭipādesuṃ – seyyathāpi etarahi bhotā gotamena sammā bhikkhusaṅgho paṭipādito.",
			
 
				+        "commentary": [
			
 
				+            "131-9-35-63",
			
 
				+            "131-9-64-72",
			
 
				+            "131-9-73-82",
			
 
				+            "131-9-83-95",
			
 
				+            "131-9-96-130"
			
 
				+        ]
			
 
				+    }
			
 
				+]
			
 
				+md;
			
 
				+    /**
			
 
				+     * The console command description.
			
 
				+     *
			
 
				+     * @var string
			
 
				+     */
			
 
				+    protected $description = 'Command description';
			
 
				+    protected $sentenceService;
			
 
				+    protected $modelService;
			
 
				+    protected $openAIService;
			
 
				+    protected $model;
			
 
				+    protected $tokensPerSentence = 0;
			
 
				+    /**
			
 
				+     * Create a new command instance.
			
 
				+     *
			
 
				+     * @return void
			
 
				+     */
			
 
				+    public function __construct(AIModelService $model, SentenceService $sent, OpenAIService $openAI)
			
 
				+    {
			
 
				+        $this->modelService = $model;
			
 
				+        $this->sentenceService = $sent;
			
 
				+        $this->openAIService = $openAI;
			
 
				+        parent::__construct();
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Execute the console command.
			
 
				+     *
			
 
				+     * @return int
			
 
				+     */
			
 
				+    public function handle()
			
 
				+    {
			
 
				+        if ($this->option('list')) {
			
 
				+            $result = RelatedParagraph::whereNotNull('book_name')
			
 
				+                ->groupBy('book_name')
			
 
				+                ->selectRaw('book_name,count(*)')
			
 
				+                ->get();
			
 
				+            foreach ($result as $key => $value) {
			
 
				+                $this->info($value['book_name'] . "[" . $value['count'] . "]");
			
 
				+            }
			
 
				+            return 0;
			
 
				+        }
			
 
				+        if ($this->option('model')) {
			
 
				+            $this->model = $this->modelService->getModelById($this->option('model'));
			
 
				+            $this->info("model:{$this->model['model']}");
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+        $channel = ChannelApi::getChannelByName('_System_commentary_');
			
 
				+
			
 
				+        $books = [];
			
 
				+        if ($this->option('book')) {
			
 
				+            $books[] = ['book_name' => $this->option('book')];
			
 
				+        } else {
			
 
				+            $books = RelatedParagraph::whereNotNull('book_name')
			
 
				+                ->where('cs_para', '>', 0)
			
 
				+                ->groupBy('book_name')
			
 
				+                ->select('book_name')
			
 
				+                ->get()->toArray();
			
 
				+        }
			
 
				+        foreach ($books as $key => $currBook) {
			
 
				+            $paragraphs = [];
			
 
				+            if ($this->option('para')) {
			
 
				+                $paragraphs[] = ['cs_para' => $this->option('para')];
			
 
				+            } else {
			
 
				+                $paragraphs = RelatedParagraph::where('book_name', $currBook['book_name'])
			
 
				+                    ->where('cs_para', '>', 0)
			
 
				+                    ->groupBy('cs_para')
			
 
				+                    ->select('cs_para')
			
 
				+                    ->get()->toArray();
			
 
				+            }
			
 
				+            foreach ($paragraphs as $key => $paragraph) {
			
 
				+                $message = 'ai commentary ' . $currBook['book_name'] . '-' . $paragraph['cs_para'];
			
 
				+                $this->info($message);
			
 
				+                Log::info($message);
			
 
				+                $result = RelatedParagraph::where('book_name', $currBook['book_name'])
			
 
				+                    ->where('cs_para', $paragraph['cs_para'])
			
 
				+                    ->where('book_id', '>', 0)
			
 
				+                    ->orderBy('book_id')
			
 
				+                    ->orderBy('para')
			
 
				+                    ->get();
			
 
				+                $pcdBooks = [];
			
 
				+                $type = [];
			
 
				+                foreach ($result as $rBook) {
			
 
				+                    # 把段落整合成书。有几本书就有几条输出纪录
			
 
				+                    if (!isset($pcdBooks[$rBook->book_id])) {
			
 
				+                        $bookType = $this->getBookType($rBook->book_id);
			
 
				+                        $pcdBooks[$rBook->book_id] = $bookType;
			
 
				+                        if (!isset($type[$bookType])) {
			
 
				+                            $type[$bookType] = [];
			
 
				+                        }
			
 
				+                        $type[$bookType][$rBook->book_id] = [];
			
 
				+                    }
			
 
				+                    $currType = $pcdBooks[$rBook->book_id];
			
 
				+                    $type[$currType][$rBook->book_id][] = ['book' => $rBook->book, 'para' => $rBook->para];
			
 
				+                }
			
 
				+                foreach ($type as $keyType => $info) {
			
 
				+                    Log::debug($keyType);
			
 
				+                    foreach ($info as $bookId => $paragraphs) {
			
 
				+                        Log::debug($bookId);
			
 
				+                        foreach ($paragraphs as  $paragraph) {
			
 
				+                            Log::debug($paragraph['book'] . '-' . $paragraph['para']);
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+
			
 
				+                //处理pali
			
 
				+                if (
			
 
				+                    $this->hasData($type, 'pāḷi') &&
			
 
				+                    $this->hasData($type, 'aṭṭhakathā')
			
 
				+                ) {
			
 
				+                    $paliJson = [];
			
 
				+                    foreach ($type['pāḷi'] as $keyBook => $paragraphs) {
			
 
				+                        foreach ($paragraphs as  $paraData) {
			
 
				+                            $sentData = $this->getParaContent($paraData['book'], $paraData['para']);
			
 
				+                            $paliJson = array_merge($paliJson, $sentData);
			
 
				+                        }
			
 
				+                    }
			
 
				+
			
 
				+                    $attaJson = [];
			
 
				+                    foreach ($type['aṭṭhakathā'] as $keyBook => $paragraphs) {
			
 
				+                        foreach ($paragraphs as  $paraData) {
			
 
				+                            $sentData = $this->getParaContent($paraData['book'], $paraData['para']);
			
 
				+                            $attaJson = array_merge($attaJson, $sentData);
			
 
				+                        }
			
 
				+                    }
			
 
				+
			
 
				+                    //llm 对齐
			
 
				+                    $result = $this->textAlign($paliJson, $attaJson);
			
 
				+                    //写入db
			
 
				+                    $this->save($result, $channel);
			
 
				+                }
			
 
				+
			
 
				+                //处理义注
			
 
				+                if (
			
 
				+                    $this->hasData($type, 'aṭṭhakathā') &&
			
 
				+                    $this->hasData($type, 'ṭīkā')
			
 
				+                ) {
			
 
				+                    $tikaResult = array();
			
 
				+                    foreach ($type['ṭīkā'] as $keyBook => $paragraphs) {
			
 
				+                        $tikaJson = [];
			
 
				+                        foreach ($paragraphs as $key => $paraData) {
			
 
				+                            $sentData = $this->getParaContent($paraData['book'], $paraData['para']);
			
 
				+                            $tikaJson = array_merge($tikaJson, $sentData);
			
 
				+                        }
			
 
				+
			
 
				+                        //llm 对齐
			
 
				+                        $result = $this->textAlign($attaJson, $tikaJson);
			
 
				+                        //将新旧数据合并 如果原来没有，就添加，有，就合并数据
			
 
				+                        foreach ($result as $new) {
			
 
				+                            $found = false;
			
 
				+                            foreach ($tikaResult as $key => $old) {
			
 
				+                                if ($old['id'] === $new['id']) {
			
 
				+                                    $found = true;
			
 
				+                                    if (isset($new['commentary']) && is_array($new['commentary'])) {
			
 
				+                                        $tikaResult[$key]['commentary'] = array_merge($tikaResult[$key]['commentary'], $new['commentary']);
			
 
				+                                    }
			
 
				+                                    break;
			
 
				+                                }
			
 
				+                            }
			
 
				+                            if (!$found) {
			
 
				+                                array_push($tikaResult, $new);
			
 
				+                            }
			
 
				+                        }
			
 
				+                    }
			
 
				+                    //写入db
			
 
				+                    $this->save($tikaResult, $channel);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        return 0;
			
 
				+    }
			
 
				+    private function hasData($typeData, $typeName)
			
 
				+    {
			
 
				+        if (
			
 
				+            !isset($typeData[$typeName]) ||
			
 
				+            $this->getParagraphNumber($typeData[$typeName]) === 0
			
 
				+        ) {
			
 
				+            Log::warning($typeName . ' data is missing');
			
 
				+            return false;
			
 
				+        }
			
 
				+        return true;
			
 
				+    }
			
 
				+    private function getParagraphNumber($type)
			
 
				+    {
			
 
				+        if (!isset($type) || !is_array($type)) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+        $count = 0;
			
 
				+        foreach ($type as $bookId => $paragraphs) {
			
 
				+            $count += count($paragraphs);
			
 
				+        }
			
 
				+        return $count;
			
 
				+    }
			
 
				+    private function getBookType($bookId)
			
 
				+    {
			
 
				+        $bookTitle = BookTitle::where('sn', $bookId)->first();
			
 
				+        $paliTextUuid = PaliText::where('book', $bookTitle->book)->where('paragraph', $bookTitle->paragraph)->value('uid');
			
 
				+        $tagIds = TagMap::where('anchor_id', $paliTextUuid)->select('tag_id')->get();
			
 
				+        $tags = Tag::whereIn('id', $tagIds)->select('name')->get();
			
 
				+        foreach ($tags as $key => $tag) {
			
 
				+            if (in_array($tag->name, ['pāḷi', 'aṭṭhakathā', 'ṭīkā'])) {
			
 
				+                return $tag->name;
			
 
				+            }
			
 
				+        }
			
 
				+        return null;
			
 
				+    }
			
 
				+
			
 
				+    private function getParaContent($book, $para)
			
 
				+    {
			
 
				+        $sentenceService = app(SearchPaliDataService::class);
			
 
				+        $sentences = PaliSentence::where('book', $book)
			
 
				+            ->where('paragraph', $para)
			
 
				+            ->orderBy('word_begin')
			
 
				+            ->get();
			
 
				+        if (!$sentences) {
			
 
				+            return null;
			
 
				+        }
			
 
				+        $json = [];
			
 
				+        foreach ($sentences as $key => $sentence) {
			
 
				+            $content = $sentenceService->getSentenceText($book, $para, $sentence->word_begin, $sentence->word_end);
			
 
				+            $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}";
			
 
				+            $json[] = ['id' => $id, 'content' => $content['markdown']];
			
 
				+        }
			
 
				+        return $json;
			
 
				+    }
			
 
				+
			
 
				+    private function arrayIndexed(array $input): array
			
 
				+    {
			
 
				+        $output  = [];
			
 
				+        foreach ($input as $key => $value) {
			
 
				+            $value['id'] = $key;
			
 
				+            $output[] = $value;
			
 
				+        }
			
 
				+        return $output;
			
 
				+    }
			
 
				+    private function arrayUnIndexed(array $input, array $original, array $commentary): array
			
 
				+    {
			
 
				+        $output  = [];
			
 
				+        foreach ($input as $key => $value) {
			
 
				+            $value['id'] = $original[$key]['id'];
			
 
				+            if (isset($value['commentary'])) {
			
 
				+                $newCommentary = array_map(function ($n) use ($commentary) {
			
 
				+                    if (isset($commentary[$n])) {
			
 
				+                        return $commentary[$n]['id'];
			
 
				+                    }
			
 
				+                    return '';
			
 
				+                }, $value['commentary']);
			
 
				+                $value['commentary'] = $newCommentary;
			
 
				+            }
			
 
				+            $output[] = $value;
			
 
				+        }
			
 
				+        return $output;
			
 
				+    }
			
 
				+    private function textAlign(array $original, array $commentary)
			
 
				+    {
			
 
				+        if (!$this->model) {
			
 
				+            Log::error('model is invalid');
			
 
				+            return [];
			
 
				+        }
			
 
				+        $originalSn  = $this->arrayIndexed($original);
			
 
				+        $commentarySn  = $this->arrayIndexed($commentary);
			
 
				+
			
 
				+        $originalText = "```jsonl\n" . LlmResponseParser::jsonl_encode($originalSn) . "\n```";
			
 
				+        $commentaryText = "```jsonl\n" . LlmResponseParser::jsonl_encode($commentarySn) . "\n```";
			
 
				+
			
 
				+        Log::debug('ai request', [
			
 
				+            'original' => $originalText,
			
 
				+            'commentary' => $commentaryText
			
 
				+        ]);
			
 
				+
			
 
				+        $totalSentences = count($original) + count($commentary);
			
 
				+        $maxTokens = (int)($this->tokensPerSentence * $totalSentences * 1.5);
			
 
				+        $this->info("requesting…… {$totalSentences} sentences {$this->tokensPerSentence}tokens/sentence set {$maxTokens} max_tokens");
			
 
				+        Log::debug('requesting…… ' . $this->model['model']);
			
 
				+        $startAt = time();
			
 
				+        $response = $this->openAIService->setApiUrl($this->model['url'])
			
 
				+            ->setModel($this->model['model'])
			
 
				+            ->setApiKey($this->model['key'])
			
 
				+            ->setSystemPrompt($this->prompt)
			
 
				+            ->setTemperature(0.0)
			
 
				+            ->setStream(false)
			
 
				+            ->setMaxToken($maxTokens)
			
 
				+            ->send("# pali\n\n{$originalText}\n\n# commentary\n\n{$commentaryText}");
			
 
				+        $completeAt = time();
			
 
				+        $answer = $response['choices'][0]['message']['content'] ?? '[]';
			
 
				+        Log::debug('ai response', ['data' => $answer]);
			
 
				+        $message = ($completeAt - $startAt) . 's';
			
 
				+
			
 
				+        if (isset($response['usage']['completion_tokens'])) {
			
 
				+            Log::debug('usage', $response['usage']);
			
 
				+            $message .= " completion_tokens:" . $response['usage']['completion_tokens'];
			
 
				+            $curr = (int)($response['usage']['completion_tokens'] / $totalSentences);
			
 
				+            if ($curr > $this->tokensPerSentence) {
			
 
				+                $this->tokensPerSentence = $curr;
			
 
				+            }
			
 
				+        }
			
 
				+        $this->info($message);
			
 
				+        $json = [];
			
 
				+        if (is_string($answer)) {
			
 
				+            $json = LlmResponseParser::jsonl($answer);
			
 
				+            $json = $this->arrayUnIndexed($json, $original, $commentary);
			
 
				+            Log::debug(json_encode($json, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
			
 
				+        }
			
 
				+        if (count($json) === 0) {
			
 
				+            Log::error("jsonl is empty");
			
 
				+        }
			
 
				+
			
 
				+        return $json;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+    private function save($json, $channel)
			
 
				+    {
			
 
				+        if (!is_array($json)) {
			
 
				+            Log::warning('llm return null');
			
 
				+            return false;
			
 
				+        }
			
 
				+        foreach ($json as $key => $sentence) {
			
 
				+            if (!isset($sentence['commentary'])) {
			
 
				+                continue;
			
 
				+            }
			
 
				+            $sentId = explode('-', $sentence['id']);
			
 
				+            $arrCommentary = $sentence['commentary'];
			
 
				+            if (
			
 
				+                isset($arrCommentary) &&
			
 
				+                is_array($arrCommentary) &&
			
 
				+                count($arrCommentary) > 0
			
 
				+            ) {
			
 
				+                $content =  array_map(function ($n) {
			
 
				+                    if (is_string($n)) {
			
 
				+                        return '{{' . $n . '}}';
			
 
				+                    } else if (is_array($n) && isset($n['id']) && is_string($n['id'])) {
			
 
				+                        return '{{' . $n['id'] . '}}';
			
 
				+                    } else {
			
 
				+                        return '';
			
 
				+                    }
			
 
				+                }, $arrCommentary);
			
 
				+                $this->sentenceService->save(
			
 
				+                    [
			
 
				+                        'book_id' => $sentId[0],
			
 
				+                        'paragraph' => $sentId[1],
			
 
				+                        'word_start' => $sentId[2],
			
 
				+                        'word_end' => $sentId[3],
			
 
				+                        'channel_uid' => $channel->uid,
			
 
				+                        'content' => implode("\n", $content),
			
 
				+                        'lang' => $channel->lang,
			
 
				+                        'status' => $channel->status,
			
 
				+                        'editor_uid' => $this->model['uid'],
			
 
				+                    ]
			
 
				+                );
			
 
				+                $this->info($sentence['id'] . ' saved');
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/api-v8/app/Helpers/LlmResponseParser.php
+++ b/api-v8/app/Helpers/LlmResponseParser.php
@@ -0,0 +1,130 @@
 
				+<?php
			
 
				+
			
 
				+namespace App\Helpers;
			
 
				+
			
 
				+use Illuminate\Support\Facades\Log;
			
 
				+
			
 
				+/**
			
 
				+ * Class LlmResponseParser
			
 
				+ * @package App\Helpers
			
 
				+ */
			
 
				+class LlmResponseParser
			
 
				+{
			
 
				+    /**
			
 
				+     * 解析LLM返回的可能包含Markdown格式（如```json...```）或额外文字说明的JSON字符串。
			
 
				+     *
			
 
				+     * @param string $input LLM返回的原始字符串。
			
 
				+     * @return array|null 解析成功的PHP数组，如果解析失败则返回空数组
			
 
				+     */
			
 
				+    public static function json(string $input): array
			
 
				+    {
			
 
				+        // 1. 预处理：查找并提取被 Markdown 代码块包裹的JSON字符串。
			
 
				+        // 匹配 ```json ... ``` 或 ``` ... ``` 格式的代码块。
			
 
				+        // S: dotall 模式，允许 . 匹配换行符。
			
 
				+        // ?: 非贪婪模式，匹配尽可能少的字符直到遇到下一个 ```。
			
 
				+        $pattern = '/```(?:json)?\s*(.*?)\s*```/s';
			
 
				+
			
 
				+        if (preg_match($pattern, $input, $matches)) {
			
 
				+            // 情况 2 和 3：找到代码块，提取其内容。
			
 
				+            // $matches[1] 包含代码块内部的内容。
			
 
				+            $jsonString = trim($matches[1]);
			
 
				+        } else {
			
 
				+            // 情况 1：没有代码块包裹，认为整个输入就是纯 JSON 字符串。
			
 
				+            // 这种情况也包含了用户可能提供的、未被代码块包裹但带有文字说明的JSON。
			
 
				+            // 这种情况下，我们需要在后续尝试解析时，先尝试 trim() 整个输入。
			
 
				+            $jsonString = trim($input);
			
 
				+        }
			
 
				+
			
 
				+        // 2. 尝试解析提取或预处理后的字符串。
			
 
				+        // json_decode 的第二个参数设为 true，将其解码为关联数组。
			
 
				+        $data = json_decode($jsonString, true);
			
 
				+
			
 
				+        // 3. 检查解析结果。
			
 
				+        // json_last_error() 返回 JSON 解析的最后一个错误代码。
			
 
				+        if (json_last_error() === JSON_ERROR_NONE && is_array($data)) {
			
 
				+            // 解析成功且结果为数组。
			
 
				+            return $data;
			
 
				+        }
			
 
				+
			
 
				+        // 4. 如果第一次尝试失败 (通常是针对情况 1 或包含文字说明的情况)，
			
 
				+        // 并且提取的字符串与原始输入相同（即没有匹配到代码块），
			
 
				+        // 并且原始输入中包含可能干扰解析的文字，
			
 
				+        // 则可以考虑更复杂的清理步骤，但考虑到 LLM 返回的 JSON 通常比较规范，
			
 
				+        // 推荐的做法是如果第一次没有成功，就返回 null，以保持函数的健壮性。
			
 
				+        // 纯粹的 JSON 字符串（情况 1）在第一次尝试时应该已经成功。
			
 
				+
			
 
				+        // 如果解析失败，则返回 空数据
			
 
				+        Log::error('解析失败' . $input);
			
 
				+        return [];
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * 解析LLM返回的JSONL（JSON Lines）格式字符串。
			
 
				+     * 支持Markdown代码块包裹、额外说明文字，以及处理截断的JSONL数据。
			
 
				+     * 每一行应为独立的JSON对象，解析时会跳过无效行并返回所有成功解析的数据。
			
 
				+     *
			
 
				+     * @param string $input LLM返回的原始JSONL字符串。
			
 
				+     * @return array 解析成功的PHP数组，每个元素对应一行有效的JSON对象。如果完全解析失败则返回空数组。
			
 
				+     */
			
 
				+    public static function jsonl(string $input): array
			
 
				+    {
			
 
				+        // 1. 预处理：查找并提取被 Markdown 代码块包裹的 JSONL 字符串。
			
 
				+        // 匹配 ```jsonl ... ``` 或 ``` ... ``` 格式的代码块。
			
 
				+        $pattern = '/```(?:jsonl?)?\s*(.*?)\s*```/s';
			
 
				+
			
 
				+        if (preg_match($pattern, $input, $matches)) {
			
 
				+            // 情况 2 和 3：找到代码块，提取其内容。
			
 
				+            $jsonlString = trim($matches[1]);
			
 
				+        } else {
			
 
				+            // 情况 1：没有代码块包裹，认为整个输入就是纯 JSONL 字符串。
			
 
				+            $jsonlString = trim($input);
			
 
				+        }
			
 
				+
			
 
				+        // 2. 按行分割 JSONL 字符串。
			
 
				+        // 使用 PHP_EOL 或 \n 作为分隔符，同时处理 Windows (\r\n) 和 Unix (\n) 换行符。
			
 
				+        $lines = preg_split('/\r\n|\r|\n/', $jsonlString);
			
 
				+
			
 
				+        // 3. 逐行解析 JSON 对象。
			
 
				+        $result = [];
			
 
				+        foreach ($lines as $lineNumber => $line) {
			
 
				+            // 去除每行的首尾空白字符。
			
 
				+            $line = trim($line);
			
 
				+
			
 
				+            // 跳过空行。
			
 
				+            if (empty($line)) {
			
 
				+                continue;
			
 
				+            }
			
 
				+
			
 
				+            // 尝试解析当前行为 JSON。
			
 
				+            $decoded = json_decode($line, true);
			
 
				+
			
 
				+            // 检查解析是否成功。
			
 
				+            if (json_last_error() === JSON_ERROR_NONE && is_array($decoded)) {
			
 
				+                // 解析成功，添加到结果数组。
			
 
				+                $result[] = $decoded;
			
 
				+            } else {
			
 
				+                // 解析失败，记录错误日志（可选）。
			
 
				+                // 这里处理了截断的情况：如果某行不是有效 JSON，则跳过它。
			
 
				+                // 通常最后一行可能因截断而无效，前面的有效行仍会被返回。
			
 
				+                Log::warning("JSONL解析失败 - 行 " . ($lineNumber + 1) . ": " . $line);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        // 4. 返回解析结果。
			
 
				+        // 即使没有成功解析任何行，也返回空数组而非 null，保持返回类型一致。
			
 
				+        if (empty($result)) {
			
 
				+            Log::error('JSONL解析失败，未能提取任何有效数据: ' . $input);
			
 
				+        }
			
 
				+
			
 
				+        return $result;
			
 
				+    }
			
 
				+
			
 
				+    public static function jsonl_encode(array $input): string
			
 
				+    {
			
 
				+        $rows = [];
			
 
				+        foreach ($input as $key => $value) {
			
 
				+            $rows[] = json_encode($value, JSON_UNESCAPED_UNICODE);
			
 
				+        }
			
 
				+        return implode("\n", $rows);
			
 
				+    }
			
 
				+}
			
--- a/api-v8/app/Services/SentenceService.php
+++ b/api-v8/app/Services/SentenceService.php
@@ -0,0 +1,47 @@
 
				+<?php
			
 
				+
			
 
				+namespace App\Services;
			
 
				+
			
 
				+use App\Models\Sentence;
			
 
				+use App\Models\SentHistory;
			
 
				+use Illuminate\Support\Str;
			
 
				+
			
 
				+class SentenceService
			
 
				+{
			
 
				+    public function save($data)
			
 
				+    {
			
 
				+        $row = Sentence::firstOrNew([
			
 
				+            "book_id" => $data['book_id'],
			
 
				+            "paragraph" => $data['paragraph'],
			
 
				+            "word_start" => $data['word_start'],
			
 
				+            "word_end" => $data['word_end'],
			
 
				+            "channel_uid" => $data['channel_uid'],
			
 
				+        ], [
			
 
				+            "id" => app('snowflake')->id(),
			
 
				+            "uid" => Str::uuid(),
			
 
				+        ]);
			
 
				+        $row->content = $data['content'];
			
 
				+        if (isset($data['content_type']) && !empty($data['content_type'])) {
			
 
				+            $row->content_type = $data['content_type'];
			
 
				+        }
			
 
				+        $row->strlen = mb_strlen($data['content'], "UTF-8");
			
 
				+        $row->language = $data['lang'];
			
 
				+        $row->status = $data['status'];
			
 
				+        if (isset($data['copy'])) {
			
 
				+            //复制句子，保留原作者信息
			
 
				+            $row->editor_uid = $data["editor_uid"];
			
 
				+            $row->acceptor_uid = $data["acceptor_uid"];
			
 
				+            $row->pr_edit_at = $data["updated_at"];
			
 
				+            if (isset($data['fork_from'])) {
			
 
				+                $row->fork_at = now();
			
 
				+            }
			
 
				+        } else {
			
 
				+            $row->editor_uid = $data["editor_uid"];
			
 
				+            $row->acceptor_uid = null;
			
 
				+            $row->pr_edit_at = null;
			
 
				+        }
			
 
				+        $row->create_time = time() * 1000;
			
 
				+        $row->modify_time = time() * 1000;
			
 
				+        $row->save();
			
 
				+    }
			
 
				+}