visuddhinanda 1 месяц назад
Родитель
Сommit
ab36e18e18

+ 59 - 18
api-v12/app/Console/Commands/ExportAiTrainingData.php

@@ -6,14 +6,14 @@ use Illuminate\Console\Command;
 use Illuminate\Support\Facades\Log;
 use App\Models\Sentence;
 use App\Models\PaliSentence;
-use Illuminate\Support\Str;
 use App\Http\Api\MdRender;
+use Illuminate\Support\Facades\File;
 
 class ExportAiTrainingData extends Command
 {
     /**
      * The name and signature of the console command.
-     *
+     * php artisan export:ai.training.data
      * @var string
      */
     protected $signature = 'export:ai.training.data {--format=gz  : zip file format 7z,lzma,gz }';
@@ -44,7 +44,8 @@ class ExportAiTrainingData extends Command
     {
         Log::debug('task export offline sentence-table start');
         //创建文件夹
-        $exportDir = storage_path('app/tmp/export/offline');
+        $base = 'app/tmp/export/offline';
+        $exportDir = storage_path($base);
         if (!is_dir($exportDir)) {
             $res = mkdir($exportDir, 0755, true);
             if (!$res) {
@@ -54,20 +55,43 @@ class ExportAiTrainingData extends Command
                 $this->info('make dir successful ' . $exportDir);
             }
         }
-        $filename = 'wikipali-offline-ai-training-' . date("Y-m-d") . '.tsv';
-        $exportFile = storage_path('app/tmp/export/offline/' . $filename);
-        $fp = fopen($exportFile, 'w');
-        if ($fp === false) {
-            die('无法创建文件');
+
+        //创建临时文件夹\
+        $dirname = $exportDir . '/' . 'wikipali-offline-ai-training-' . date("YmdHis");
+
+        $tmp = mkdir($dirname, 0755, true);
+        if (!$tmp) {
+            $this->error('mkdir fail path=' . $dirname);
+            return 1;
+        } else {
+            $this->info('make dir successful ' . $dirname);
         }
 
+
         $channels = [
             '19f53a65-81db-4b7d-8144-ac33f1217d34',
+            'e5bc5c97-a6fb-4ccb-b7df-be6dcfee9c43',
+            '7ac4d13b-a43d-4409-91b5-5f2a82b916b3',
+            '74ebf4c5-c243-4948-955d-6c277e29276a',
+            '3b0cb0aa-ea88-4ce5-b67d-00a3e76220cc',
+            '5310999c-0b0c-4bb0-9bb9-9cdd176e9ef0',
+            '331447b6-39bb-4b49-ac10-6206db93a050',
         ];
+
         $start = time();
         foreach ($channels as $key => $channel) {
+            // 创建文件
+            $this->info('export start' . $channel);
+            $filename = $channel . '.jsonl';
+            $exportFile = $dirname . '/' . $filename;
+            $fp = fopen($exportFile, 'w');
+            if ($fp === false) {
+                die('无法创建文件');
+            }
+
             $db = Sentence::where('channel_uid', $channel);
             $bar = $this->output->createProgressBar($db->count());
+
             $srcDb = $db->select([
                 'book_id',
                 'paragraph',
@@ -75,8 +99,15 @@ class ExportAiTrainingData extends Command
                 'word_end',
                 'content',
                 'content_type'
-            ])->cursor();
+            ])->orderBy('book_id')
+                ->orderBy('paragraph')
+                ->orderBy('word_start')->cursor();
+            $done = [];
             foreach ($srcDb as $sent) {
+                $id = "{$sent->book_id}-{$sent->paragraph}-{$sent->word_start}-{$sent->word_end}";
+                if (isset($done[$id])) {
+                    continue;
+                }
                 $content = MdRender::render(
                     $sent->content,
                     [$channel],
@@ -91,24 +122,34 @@ class ExportAiTrainingData extends Command
                     ->where('word_begin', $sent->word_start)
                     ->where('word_end', $sent->word_end)
                     ->value('text');
-                $currData = array(
-                    str_replace("\n", "", $origin),
-                    str_replace("\n", "", $content),
-                );
-
-                fwrite($fp, implode("\t", $currData) . "\n");
+                if (empty($origin)) {
+                    Log::warning('origin is empty id=' . $id);
+                    continue;
+                }
+                if (empty($content)) {
+                    Log::warning('translation is empty id=' . $id);
+                    continue;
+                }
+                $currData = ['id' => $id, 'original' => $origin, 'translation' => trim($content)];
 
+                fwrite($fp, json_encode($currData, JSON_UNESCAPED_UNICODE) . "\n");
                 $bar->advance();
+                $done[$id] = 1;
             }
+            fclose($fp);
         }
-        fclose($fp);
+
         $this->info((time() - $start) . ' seconds');
-        $this->call('export:zip', [
+        $this->call('export:zip2', [
             'id' => 'ai-translating-training-data',
-            'filename' => $exportFile,
+            'filename' => $dirname,
             'title' => 'wikipali ai translating training data',
             'format' => $this->option('format'),
         ]);
+
+        sleep(5);
+        File::deleteDirectory($dirname);
+
         return 0;
     }
 }

+ 269 - 0
api-v12/app/Console/Commands/ExportZip2.php

@@ -0,0 +1,269 @@
+<?php
+
+namespace App\Console\Commands;
+
+use Illuminate\Console\Command;
+use Illuminate\Support\Facades\Storage;
+use Illuminate\Support\Facades\Log;
+use Illuminate\Support\Facades\Cache;
+use Illuminate\Support\Facades\App;
+
+use Symfony\Component\Process\Process;
+
+class ExportZip2 extends Command
+{
+    protected $signature = 'export:zip2
+        {filename : filename}
+        {title : title}
+        {id : 标识符}
+        {format? : zip file format 7z,lzma,gz }';
+
+    protected $description = '压缩导出的文件';
+
+    public function handle()
+    {
+        Log::debug('export offline: 开始压缩');
+
+        $defaultExportPath = storage_path('app/public/export/offline');
+        $exportFile = $this->argument('filename');
+
+        $filename = basename($exportFile);
+
+        if ($filename === $exportFile) {
+            $exportFullFileName = $defaultExportPath . '/' . $filename;
+            $exportPath = $defaultExportPath;
+        } else {
+            $exportFullFileName = $exportFile;
+            $exportPath = dirname($exportFile);
+        }
+
+        $format = $this->argument('format') ?? 'gz';
+
+        if (!file_exists($exportFullFileName)) {
+
+            Log::error('export offline: file not exists', [
+                'file' => $exportFullFileName
+            ]);
+
+            $this->error('file not exists: ' . $exportFullFileName);
+
+            return 1;
+        }
+
+        $zipFile = $this->getZipFileName($filename, $format);
+        $zipFullFileName = $exportPath . '/' . $zipFile;
+
+        if (file_exists($zipFullFileName)) {
+            unlink($zipFullFileName);
+        }
+
+        $this->info("start compress: {$exportFullFileName}");
+        Log::debug('export offline zip start', [
+            'file' => $exportFullFileName,
+            'format' => $format
+        ]);
+
+        $this->compress($exportFullFileName, $zipFullFileName, $format);
+
+        $this->info('压缩完成');
+
+        Log::debug('zip done', [
+            'zip' => $zipFullFileName
+        ]);
+
+        /*
+        |--------------------------------------------------------------------------
+        | 上传 S3
+        |--------------------------------------------------------------------------
+        */
+
+        $bucket = config('mint.attachments.bucket_name.temporary');
+        $tmpFile = $bucket . '/' . $zipFile;
+
+        $this->info('upload file=' . $tmpFile);
+
+        Log::debug('export offline upload', [
+            'file' => $tmpFile
+        ]);
+
+        Storage::put($tmpFile, fopen($zipFullFileName, 'r'));
+
+        $this->info('upload done');
+
+        Log::debug('upload done');
+
+        /*
+        |--------------------------------------------------------------------------
+        | 生成下载链接
+        |--------------------------------------------------------------------------
+        */
+
+        if (App::environment('local')) {
+            $link = Storage::url($tmpFile);
+        } else {
+            try {
+                $link = Storage::temporaryUrl(
+                    $tmpFile,
+                    now()->addDays(2)
+                );
+            } catch (\Exception $e) {
+                Log::error('temporaryUrl fail', [
+                    'exception' => $e
+                ]);
+                $this->error('generate temporaryUrl fail');
+                return 1;
+            }
+        }
+
+        $this->info('link=' . $link);
+
+        /*
+        |--------------------------------------------------------------------------
+        | CDN 列表
+        |--------------------------------------------------------------------------
+        */
+
+        $url = [];
+        foreach (config('mint.server.cdn_urls') as $key => $cdn) {
+            $url[] = [
+                'link' => $cdn . '/' . $zipFile,
+                'hostname' => 'china cdn-' . $key
+            ];
+        }
+
+        $url[] = [
+            'link' => $link,
+            'hostname' => 'Amazon cloud storage(Hongkong)'
+        ];
+
+        /*
+        |--------------------------------------------------------------------------
+        | Cache 写入
+        |--------------------------------------------------------------------------
+        */
+
+        $info = Cache::get('/offline/index', []);
+
+        $info[] = [
+            'id' => $this->argument('id'),
+            'title' => $this->argument('title'),
+            'filename' => $zipFile,
+            'url' => $url,
+            'create_at' => now()->toDateTimeString(),
+            'chapter' => Cache::get("/export/chapter/count"),
+            'filesize' => filesize($zipFullFileName),
+            'min_app_ver' => '1.3',
+        ];
+
+        Cache::put('/offline/index', $info);
+
+        /*
+        |--------------------------------------------------------------------------
+        | 删除原始文件
+        |--------------------------------------------------------------------------
+        */
+
+        sleep(5);
+        try {
+            if (is_file($exportFullFileName)) {
+                unlink($exportFullFileName);
+            }
+            if (file_exists($zipFullFileName)) {
+                unlink($zipFullFileName);
+            }
+        } catch (\Throwable $e) {
+            Log::error('delete source fail', [
+                'exception' => $e
+            ]);
+        }
+
+        return 0;
+    }
+
+    /*
+    |--------------------------------------------------------------------------
+    | 生成压缩文件名
+    |--------------------------------------------------------------------------
+    */
+
+    protected function getZipFileName(string $filename, string $format): string
+    {
+        return match ($format) {
+            '7z' => $filename . '.7z',
+            'lzma' => $filename . '.lzma',
+            default => $filename . '.tar.gz'
+        };
+    }
+
+    /*
+    |--------------------------------------------------------------------------
+    | 压缩函数
+    |--------------------------------------------------------------------------
+    */
+
+    protected function compress($source, $target, $format)
+    {
+        $isDir = is_dir($source);
+        switch ($format) {
+            case '7z':
+                $command = [
+                    '7z',
+                    'a',
+                    '-t7z',
+                    '-mx=9',
+                    $target,
+                    $source
+                ];
+                break;
+
+            case 'lzma':
+                if ($isDir) {
+                    $tmpTar = $source . '.tar';
+                    $tar = new Process([
+                        'tar',
+                        '-cf',
+                        $tmpTar,
+                        '-C',
+                        dirname($source),
+                        basename($source)
+                    ]);
+                    $tar->run();
+                    $source = $tmpTar;
+                }
+                $command = [
+                    'xz',
+                    '-k',
+                    '-9',
+                    '--format=lzma',
+                    $source
+                ];
+                break;
+
+            default:
+                $command = [
+                    'tar',
+                    '-czf',
+                    $target,
+                    '-C',
+                    dirname($source),
+                    basename($source)
+                ];
+        }
+
+        $this->info(implode(' ', $command));
+        $process = new Process($command);
+        $process->setTimeout(60 * 60 * 6);
+        $process->run();
+
+        $this->info($process->getOutput());
+
+        if (!$process->isSuccessful()) {
+
+            Log::error('compress fail', [
+                'error' => $process->getErrorOutput()
+            ]);
+
+            throw new \RuntimeException($process->getErrorOutput());
+        }
+    }
+}

+ 2 - 2
api-v12/config/filesystems.php

@@ -32,7 +32,7 @@ return [
 
         'local' => [
             'driver' => 'local',
-            'root' => storage_path('app/private'),
+            'root' => storage_path('app/public'),
             'serve' => true,
             'throw' => false,
             'report' => false,
@@ -41,7 +41,7 @@ return [
         'public' => [
             'driver' => 'local',
             'root' => storage_path('app/public'),
-            'url' => env('APP_URL').'/storage',
+            'url' => env('APP_URL') . '/storage',
             'visibility' => 'public',
             'throw' => false,
             'report' => false,

+ 22 - 1
dashboard-v6/src/components/navigation/MainMenu.tsx

@@ -208,7 +208,6 @@ const Widget = ({ onSearch }: Props) => {
       icon: <CourseOutLinedIcon />,
       label: "Course",
     },
-
     {
       key: "/workspace/task",
       icon: <TaskIcon />,
@@ -242,6 +241,28 @@ const Widget = ({ onSearch }: Props) => {
         },
       ],
     },
+    {
+      key: "/workspace/tools",
+      icon: <CourseOutLinedIcon />,
+      label: "tools",
+      children: [
+        {
+          key: "/workspace/tools/tag",
+          label: "tag",
+          activeId: "workspace.tools.tag",
+        },
+        {
+          key: "/workspace/tools/drive",
+          label: "drive",
+          activeId: "workspace.tools.drive",
+        },
+        {
+          key: "/workspace/tools/dict",
+          label: "dict",
+          activeId: "workspace.tools.dict",
+        },
+      ],
+    },
   ];
   console.log("nav", routeId);
   /** 当前选中 */