modelService = $model; $this->sentenceService = $sent; $this->openAIService = $openAI; parent::__construct(); } /** * Execute the console command. * * @return int */ public function handle() { if ($this->option('list')) { $result = RelatedParagraph::whereNotNull('book_name') ->groupBy('book_name') ->selectRaw('book_name,count(*)') ->get(); foreach ($result as $key => $value) { $this->info($value['book_name'] . "[" . $value['count'] . "]"); } return 0; } if ($this->option('model')) { $this->model = $this->modelService->getModelById($this->option('model')); $this->info("model:{$this->model['model']}"); } $channel = ChannelApi::getChannelByName('_System_commentary_'); $books = []; if ($this->option('book')) { $books[] = ['book_name' => $this->option('book')]; } else { $books = RelatedParagraph::whereNotNull('book_name') ->where('cs_para', '>', 0) ->groupBy('book_name') ->select('book_name') ->get()->toArray(); } foreach ($books as $key => $currBook) { $paragraphs = []; if ($this->option('para')) { $paragraphs[] = ['cs_para' => $this->option('para')]; } else { $paragraphs = RelatedParagraph::where('book_name', $currBook['book_name']) ->where('cs_para', '>', 0) ->groupBy('cs_para') ->select('cs_para') ->get()->toArray(); } foreach ($paragraphs as $key => $paragraph) { $message = 'ai commentary ' . $currBook['book_name'] . '-' . $paragraph['cs_para']; $this->info($message); Log::info($message); $result = RelatedParagraph::where('book_name', $currBook['book_name']) ->where('cs_para', $paragraph['cs_para']) ->where('book_id', '>', 0) ->orderBy('book_id') ->orderBy('para') ->get(); $pcdBooks = []; $type = []; foreach ($result as $rBook) { # 把段落整合成书。有几本书就有几条输出纪录 if (!isset($pcdBooks[$rBook->book_id])) { $bookType = $this->getBookType($rBook->book_id); $pcdBooks[$rBook->book_id] = $bookType; if (!isset($type[$bookType])) { $type[$bookType] = []; } $type[$bookType][$rBook->book_id] = []; } $currType = $pcdBooks[$rBook->book_id]; $type[$currType][$rBook->book_id][] = ['book' => $rBook->book, 'para' => $rBook->para]; } foreach ($type as $keyType => $info) { Log::debug($keyType); foreach ($info as $bookId => $paragraphs) { Log::debug($bookId); foreach ($paragraphs as $paragraph) { Log::debug($paragraph['book'] . '-' . $paragraph['para']); } } } //处理pali if ( $this->hasData($type, 'pāḷi') && $this->hasData($type, 'aṭṭhakathā') ) { $paliJson = []; foreach ($type['pāḷi'] as $keyBook => $paragraphs) { foreach ($paragraphs as $paraData) { $sentData = $this->getParaContent($paraData['book'], $paraData['para']); $paliJson = array_merge($paliJson, $sentData); } } $attaJson = []; foreach ($type['aṭṭhakathā'] as $keyBook => $paragraphs) { foreach ($paragraphs as $paraData) { $sentData = $this->getParaContent($paraData['book'], $paraData['para']); $attaJson = array_merge($attaJson, $sentData); } } //llm 对齐 $result = $this->textAlign($paliJson, $attaJson); //写入db $this->save($result, $channel); } //处理义注 if ( $this->hasData($type, 'aṭṭhakathā') && $this->hasData($type, 'ṭīkā') ) { $tikaResult = array(); foreach ($type['ṭīkā'] as $keyBook => $paragraphs) { $tikaJson = []; foreach ($paragraphs as $key => $paraData) { $sentData = $this->getParaContent($paraData['book'], $paraData['para']); $tikaJson = array_merge($tikaJson, $sentData); } //llm 对齐 $result = $this->textAlign($attaJson, $tikaJson); //将新旧数据合并 如果原来没有,就添加,有,就合并数据 foreach ($result as $new) { $found = false; foreach ($tikaResult as $key => $old) { if ($old['id'] === $new['id']) { $found = true; if (isset($new['commentary']) && is_array($new['commentary'])) { $tikaResult[$key]['commentary'] = array_merge($tikaResult[$key]['commentary'], $new['commentary']); } break; } } if (!$found) { array_push($tikaResult, $new); } } } //写入db $this->save($tikaResult, $channel); } } } return 0; } private function hasData($typeData, $typeName) { if ( !isset($typeData[$typeName]) || $this->getParagraphNumber($typeData[$typeName]) === 0 ) { Log::warning($typeName . ' data is missing'); return false; } return true; } private function getParagraphNumber($type) { if (!isset($type) || !is_array($type)) { return 0; } $count = 0; foreach ($type as $bookId => $paragraphs) { $count += count($paragraphs); } return $count; } private function getBookType($bookId) { $bookTitle = BookTitle::where('sn', $bookId)->first(); $paliTextUuid = PaliText::where('book', $bookTitle->book)->where('paragraph', $bookTitle->paragraph)->value('uid'); $tagIds = TagMap::where('anchor_id', $paliTextUuid)->select('tag_id')->get(); $tags = Tag::whereIn('id', $tagIds)->select('name')->get(); foreach ($tags as $key => $tag) { if (in_array($tag->name, ['pāḷi', 'aṭṭhakathā', 'ṭīkā'])) { return $tag->name; } } return null; } private function getParaContent($book, $para) { $sentenceService = app(SearchPaliDataService::class); $sentences = PaliSentence::where('book', $book) ->where('paragraph', $para) ->orderBy('word_begin') ->get(); if (!$sentences) { return null; } $json = []; foreach ($sentences as $key => $sentence) { $content = $sentenceService->getSentenceText($book, $para, $sentence->word_begin, $sentence->word_end); $id = "{$book}-{$para}-{$sentence->word_begin}-{$sentence->word_end}"; $json[] = ['id' => $id, 'content' => $content['markdown']]; } return $json; } private function arrayIndexed(array $input): array { $output = []; foreach ($input as $key => $value) { $value['id'] = $key; $output[] = $value; } return $output; } private function arrayUnIndexed(array $input, array $original, array $commentary): array { $output = []; foreach ($input as $key => $value) { $value['id'] = $original[$key]['id']; if (isset($value['commentary'])) { $newCommentary = array_map(function ($n) use ($commentary) { if (isset($commentary[$n])) { return $commentary[$n]['id']; } return ''; }, $value['commentary']); $value['commentary'] = $newCommentary; } $output[] = $value; } return $output; } private function textAlign(array $original, array $commentary) { if (!$this->model) { Log::error('model is invalid'); return []; } $originalSn = $this->arrayIndexed($original); $commentarySn = $this->arrayIndexed($commentary); $originalText = "```jsonl\n" . LlmResponseParser::jsonl_encode($originalSn) . "\n```"; $commentaryText = "```jsonl\n" . LlmResponseParser::jsonl_encode($commentarySn) . "\n```"; Log::debug('ai request', [ 'original' => $originalText, 'commentary' => $commentaryText ]); $totalSentences = count($original) + count($commentary); $maxTokens = (int)($this->tokensPerSentence * $totalSentences * 1.5); $this->info("requesting…… {$totalSentences} sentences {$this->tokensPerSentence}tokens/sentence set {$maxTokens} max_tokens"); Log::debug('requesting…… ' . $this->model['model']); $startAt = time(); $response = $this->openAIService->setApiUrl($this->model['url']) ->setModel($this->model['model']) ->setApiKey($this->model['key']) ->setSystemPrompt($this->prompt) ->setTemperature(0.0) ->setStream(false) ->setMaxToken($maxTokens) ->send("# pali\n\n{$originalText}\n\n# commentary\n\n{$commentaryText}"); $completeAt = time(); $answer = $response['choices'][0]['message']['content'] ?? '[]'; Log::debug('ai response', ['data' => $answer]); $message = ($completeAt - $startAt) . 's'; if (isset($response['usage']['completion_tokens'])) { Log::debug('usage', $response['usage']); $message .= " completion_tokens:" . $response['usage']['completion_tokens']; $curr = (int)($response['usage']['completion_tokens'] / $totalSentences); if ($curr > $this->tokensPerSentence) { $this->tokensPerSentence = $curr; } } $this->info($message); $json = []; if (is_string($answer)) { $json = LlmResponseParser::jsonl($answer); $json = $this->arrayUnIndexed($json, $original, $commentary); Log::debug(json_encode($json, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE)); } if (count($json) === 0) { Log::error("jsonl is empty"); } return $json; } private function save($json, $channel) { if (!is_array($json)) { Log::warning('llm return null'); return false; } foreach ($json as $key => $sentence) { if (!isset($sentence['commentary'])) { continue; } $sentId = explode('-', $sentence['id']); $arrCommentary = $sentence['commentary']; if ( isset($arrCommentary) && is_array($arrCommentary) && count($arrCommentary) > 0 ) { $content = array_map(function ($n) { if (is_string($n)) { return '{{' . $n . '}}'; } else if (is_array($n) && isset($n['id']) && is_string($n['id'])) { return '{{' . $n['id'] . '}}'; } else { return ''; } }, $arrCommentary); $this->sentenceService->save( [ 'book_id' => $sentId[0], 'paragraph' => $sentId[1], 'word_start' => $sentId[2], 'word_end' => $sentId[3], 'channel_uid' => $channel->uid, 'content' => implode("\n", $content), 'lang' => $channel->lang, 'status' => $channel->status, 'editor_uid' => $this->model['uid'], ] ); $this->info($sentence['id'] . ' saved'); } } } }