visuddhinanda пре 6 дана
родитељ
комит
80ecadcfd9
1 измењених фајлова са 127 додато и 14 уклоњено
  1. 127 14
      api-v13/app/Console/Commands/UpdateCorpus.php

+ 127 - 14
api-v13/app/Console/Commands/UpdateCorpus.php

@@ -2,13 +2,20 @@
 
 namespace App\Console\Commands;
 
+use Illuminate\Support\Facades\Log;
+
+
 use App\Services\SentenceService;
+use App\Services\TermService;
 use Illuminate\Console\Attributes\Description;
 use Illuminate\Console\Attributes\Signature;
 use Illuminate\Console\Command;
 use Illuminate\Support\Facades\DB;
 use App\Models\Channel;
 
+use App\Http\Api\UserApi;
+
+
 #[Signature('app:update-corpus')]
 #[Description('Update corpus from JSONL files in corpus directory')]
 class UpdateCorpus extends Command
@@ -27,8 +34,6 @@ class UpdateCorpus extends Command
      *
      * @param SentenceService $sentenceService
      */
-    public function __construct(SentenceService $sentenceService)
-    {
     public function __construct(SentenceService $sentenceService, TermService $termService)
     {
         parent::__construct();
@@ -54,34 +59,55 @@ class UpdateCorpus extends Command
         }
 
         // Scan subdirectories of the corpus path
-        $subdirectories = $this->getSubdirectories($corpusBasePath);
+        $stores = $this->getSubdirectories($corpusBasePath);
 
-        if (empty($subdirectories)) {
+        if (empty($stores)) {
             $this->warn('No subdirectories found in corpus path.');
             return self::SUCCESS;
         }
 
-        $this->info("Found " . count($subdirectories) . " subdirectories to process.");
+        $this->info("Found " . count($stores) . " subdirectories to process.");
 
         $totalProcessed = 0;
         $totalErrors = 0;
 
-        foreach ($subdirectories as $subdir) {
-            $this->info("Processing directory: {$subdir}");
+        foreach ($stores as $store) {
+            $this->info("Processing directory: {$store}");
 
             try {
-                $stats = $this->processCorpusDirectory($subdir);
+                $stats = $this->processCorpusDirectory($store);
                 $totalProcessed += $stats['processed'];
                 $totalErrors += $stats['errors'];
                 $this->info("Directory processed: {$stats['processed']} records saved, {$stats['errors']} errors");
+                if (isset($stats['channels'])) {
+                    foreach ($stats['channels'] as $key => $channelId) {
+                        $this->call('upgrade:progress', ['--channel' => $channelId]);
+                        $this->call('upgrade:progress.chapter', ['--channel' => $channelId]);
+                        $this->call('opensearch:index-tipitaka', [
+                            'book' => 0,
+                            '--channel' => $channelId,
+                            '--granularity' => 'chapter',
+                            '--summary' => 'off'
+                        ]);
+                    }
+                }
             } catch (\Exception $e) {
-                $this->error("Failed to process directory {$subdir}: {$e->getMessage()}");
+                $this->error("Failed to process directory {$store}: {$e->getMessage()}");
+                Log::error("Failed to process directory", [
+                    'dir'        => $store,
+                    'message'    => $e->getMessage(),
+                    'file'       => $e->getFile(),
+                    'line'       => $e->getLine(),
+                    'trace'      => $e->getTraceAsString(),
+                ]);
                 $totalErrors++;
             }
         }
 
         $this->info("Corpus update completed. Total processed: {$totalProcessed}, Total errors: {$totalErrors}");
 
+
+
         return $totalErrors > 0 ? self::FAILURE : self::SUCCESS;
     }
 
@@ -152,6 +178,13 @@ class UpdateCorpus extends Command
 
         $this->info("Found {$channels->count()} channel(s) for source ID: {$sourceId}");
 
+        $glossaryFile = $directoryPath . DIRECTORY_SEPARATOR . 'glossary.csv';
+
+        if (file_exists($glossaryFile)) {
+            $status = $this->processGlossary($glossaryFile, $channels);
+            $this->line('glossary load');
+        }
+
         // Scan subdirectories of the current directory for JSONL files
         $childDirectories = $this->getSubdirectories($directoryPath);
 
@@ -166,10 +199,89 @@ class UpdateCorpus extends Command
                 $stats['errors'] += $fileStats['errors'];
             }
         }
-
+        $stats['channels'] = array_map(fn($item) => $item['uid'], $channels->toArray());
         return $stats;
     }
+    /**
+     * Process a glossary csv file and save glossary for each channel.
+     *
+     * @param string $filePath
+     * @param \Illuminate\Database\Eloquent\Collection $channels
+     * @return array
+     */
+    protected function processGlossary(string $filePath, $channels): array
+    {
+        $stats = [
+            'processed' => 0,
+            'errors'    => 0,
+        ];
 
+        $handle = fopen($filePath, 'r');
+
+        if (!$handle) {
+            $this->error("Failed to open file: {$filePath}");
+            return $stats;
+        }
+
+        $robotUid = config('mint.admin.robot_uuid');
+
+        if (!$robotUid) {
+            $this->error('robot_uuid not configured in mint.admin.robot_uid');
+            fclose($handle);
+            return $stats;
+        }
+
+        // 读取表头行
+        $headers = fgetcsv($handle);
+
+        if ($headers === false) {
+            $this->error("Failed to read CSV headers from: {$filePath}");
+            fclose($handle);
+            return $stats;
+        }
+
+        $lineNumber = 0;
+
+        while (($row = fgetcsv($handle)) !== false) {
+            $lineNumber++;
+
+            if (count($row) !== count($headers)) {
+                $this->error("Column count mismatch at line {$lineNumber} in file: {$filePath}");
+                $stats['errors']++;
+                continue;
+            }
+
+            $data = array_combine($headers, $row);
+            $editor_id = UserApi::getIdByUuid($robotUid);
+            foreach ($channels as $channel) {
+                try {
+                    $saveData = [
+                        'word'          => $data['pali_word'],
+                        'tag'           => $data['tag'] ?? null,
+                        'channel_id'    => $channel->uid,
+                        'meaning'       => $data['meaning'],
+                        'redirect'       => $data['redirect'] ?? null,
+                        'other_meaning' => $data['meaning2'] ?: null,
+                        'note'          => $data['note'] ?: null,
+                        'editor_id'     => $editor_id,
+                    ];
+
+                    DB::transaction(function () use ($saveData) {
+                        $this->termService->updateOrCreateByWord($saveData);
+                    });
+
+                    $stats['processed']++;
+                } catch (\Exception $e) {
+                    $this->error("Failed to save glossary for channel {$channel->uid} at line {$lineNumber}: {$e->getMessage()}");
+                    $stats['errors']++;
+                }
+            }
+        }
+
+        fclose($handle);
+        $this->line("glossary {$lineNumber} lines processed");
+        return $stats;
+    }
     /**
      * Process a single JSONL file and save records for each channel.
      *
@@ -220,11 +332,12 @@ class UpdateCorpus extends Command
             // Save for each channel
             foreach ($channels as $channel) {
                 try {
+                    [$book, $para, $start, $end] = explode('-', $data['id']);
                     $saveData = [
-                        'book_id' => $data['book'],
-                        'paragraph' => $data['paragraph'],
-                        'word_start' => $data['start'],
-                        'word_end' => $data['end'],
+                        'book_id' => $book,
+                        'paragraph' => $para,
+                        'word_start' => $start,
+                        'word_end' => $end,
                         'content' => $data['content'],
                         'channel_uid' => $channel->uid,
                         'editor_uid' => $robotUid,