Browse Source

添加开始拆词时间提示

visuddhinanda 1 year ago
parent
commit
b04b943fb8
1 changed files with 115 additions and 95 deletions
  1. 115 95
      api-v8/app/Console/Commands/UpgradeCompound.php

+ 115 - 95
api-v8/app/Console/Commands/UpgradeCompound.php

@@ -1,4 +1,5 @@
 <?php
+
 namespace App\Console\Commands;
 
 use Illuminate\Console\Command;
@@ -17,7 +18,7 @@ class UpgradeCompound extends Command
      * php -d memory_limit=1024M artisan upgrade:compound  --api=https://next.wikipali.org/api --from=182852 --to=30000
      * @var string
      */
-    protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test} {--continue} {--api=} {--from=} {--to=}';
+    protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test} {--continue} {--api=} {--from=0} {--to=0} {--min=7} {--max=50}';
 
     /**
      * The console command description.
@@ -45,178 +46,197 @@ class UpgradeCompound extends Command
      */
     public function handle()
     {
-        if(\App\Tools\Tools::isStop()){
+        if (\App\Tools\Tools::isStop()) {
             $this->info('.stop exists');
             return 0;
         }
-        $this->info('['.date('Y-m-d H:i:s', time()).'] upgrade:compound start');
+        $confirm = '';
+        if ($this->option('api')) {
+            $confirm .= 'api=' . $this->option('api') . "\n";
+        }
+        $confirm .= "min=" . $this->option('min') . "\n";
+        $confirm .= "max="  . $this->option('max') . "\n";
+        $confirm .= "from="  . $this->option('from') . "\n";
+        $confirm .= "to="  . $this->option('to') . "\n";
+
+        if (!$this->confirm($confirm)) {
+            return 0;
+        }
+        $this->info('[' . date('Y-m-d H:i:s', time()) . '] upgrade:compound start');
 
         $dict_id = DictApi::getSysDict('robot_compound');
-        if(!$dict_id){
+        if (!$dict_id) {
             $this->error('没有找到 robot_compound 字典');
             return 1;
         }
 
-		$start = \microtime(true);
+        $start = \microtime(true);
 
 
 
-		//
-		if($this->option('test')){
-			//调试代码
+        //
+        if ($this->option('test')) {
+            //调试代码
             $ts = new TurboSplit();
-			Storage::disk('local')->put("tmp/compound.md", "# Turbo Split");
-			//获取需要拆的词
-			$list = [
-				[5,20,20],
-				[21,30,20],
-				[31,40,10],
-				[41,60,10],
-			];
-			foreach ($list as $take) {
-				# code...
-				$words = WordIndex::where('final',0)
-                            ->whereBetween('len',[$take[0],$take[1]])
-                            ->select('word')
-                            ->take($take[2])->get();
-				foreach ($words as $word) {
-					$this->info($word->word);
-					Storage::disk('local')->append("tmp/compound.md", "## {$word->word}");
-					$parts = $ts->splitA($word->word);
-					foreach ($parts as $part) {
-						# code...
+            Storage::disk('local')->put("tmp/compound.md", "# Turbo Split");
+            //获取需要拆的词
+            $list = [
+                [5, 20, 20],
+                [21, 30, 20],
+                [31, 40, 10],
+                [41, 60, 10],
+            ];
+            foreach ($list as $take) {
+                # code...
+                $words = WordIndex::where('final', 0)
+                    ->whereBetween('len', [$take[0], $take[1]])
+                    ->select('word')
+                    ->take($take[2])->get();
+                foreach ($words as $word) {
+                    $this->info($word->word);
+                    Storage::disk('local')->append("tmp/compound.md", "## {$word->word}");
+                    $parts = $ts->splitA($word->word);
+                    foreach ($parts as $part) {
+                        # code...
                         $info = "`{$part['word']}`,{$part['factors']},{$part['confidence']}";
-						$this->info($info);
-						Storage::disk('local')->append("tmp/compound.md", "- {$info}");
-					}
-				}
-			}
-			$this->info("耗时:".\microtime(true)-$start);
-			return 0;
-		}
-
-
-		$_word = $this->argument('word');
-		if(!empty($_word)){
-            $words = array((object)array('real'=>$_word));
+                        $this->info($info);
+                        Storage::disk('local')->append("tmp/compound.md", "- {$info}");
+                    }
+                }
+            }
+            $this->info("耗时:" . \microtime(true) - $start);
+            return 0;
+        }
+
+
+        $_word = $this->argument('word');
+        if (!empty($_word)) {
+            $words = array((object)array('real' => $_word));
             $count = 1;
-		}else if($this->option('book')){
+        } else if ($this->option('book')) {
             $words = WbwTemplate::select('real')
-                            ->where('book',$this->option('book'))
-                            ->where('type','<>','.ctl.')
-                            ->where('real','<>','')
-                            ->orderBy('real')
-                            ->groupBy('real')->cursor();
-            $query = DB::select('SELECT count(*) from (
+                ->where('book', $this->option('book'))
+                ->where('type', '<>', '.ctl.')
+                ->where('real', '<>', '')
+                ->orderBy('real')
+                ->groupBy('real')->cursor();
+            $query = DB::select(
+                'SELECT count(*) from (
                                     SELECT "real" from wbw_templates where book = ? and type <> ? and real <> ? group by real) T',
-                                    [$this->option('book'),'.ctl.','']);
+                [$this->option('book'), '.ctl.', '']
+            );
             $count = $query[0]->count;
-        }else{
+        } else {
             $min = WordIndex::min('id');
             $max = WordIndex::max('id');
-            if($this->option('from')){
+            if ($this->option('from') > 0) {
                 $from = $min + $this->option('from');
-            }else{
+            } else {
                 $from = $min;
             }
-            if($this->option('to')){
+            if ($this->option('to') > 0) {
                 $to = $min + $this->option('to');
-            }else{
+            } else {
                 $to = $max;
             }
-            $words = WordIndex::whereBetween('id',[$from,$to])
-                            ->where('len','>',7)
-                            ->where('len','<',51)
-                            ->orderBy('id')
-                            ->selectRaw('word as real')
-                            ->cursor();
+            $words = WordIndex::whereBetween('id', [$from, $to])
+                ->where('len', '>', $this->option('min'))
+                ->where('len', '<', $this->option('max'))
+                ->orderBy('id')
+                ->selectRaw('id,word as real')
+                ->cursor();
             $count = $to - $from + 1;
         }
 
-		$sn = 0;
+        $sn = 0;
         $wordIndex = array();
         $result = array();
-		foreach ($words as $key => $word) {
-            if(\App\Tools\Tools::isStop()){
+        foreach ($words as $key => $word) {
+            if (\App\Tools\Tools::isStop()) {
                 return 0;
             }
             $sn++;
             $startAt = microtime(true);
-
-			$ts = new TurboSplit();
-            if($this->option('debug')){
+            $now = date('Y-m-d H:i:s');
+            $this->info("[{$now}]{$word->real} start id={$word->id}");
+            $ts = new TurboSplit();
+            if ($this->option('debug')) {
                 $ts->debug(true);
             }
             $wordIndex[] = $word->real;
             $parts = $ts->splitA($word->real);
-            $time = round(microtime(true) - $startAt,2);
+            $time = round(microtime(true) - $startAt, 2);
             $percent = (int)($sn * 100 / $count);
 
             $this->info("[{$percent}%][{$sn}] {$word->real}  {$time}s");
 
             $resultCount = 0;
             foreach ($parts as $part) {
-                if(isset($part['type']) && $part['type'] === ".v."){
+                if (isset($part['type']) && $part['type'] === ".v.") {
                     continue;
                 }
                 $resultCount++;
                 $new = array();
                 $new['word'] = $part['word'];
                 $new['factors'] = $part['factors'];
-                if(isset($part['type'])){
+                if (isset($part['type'])) {
                     $new['type'] = $part['type'];
-                }else{
+                } else {
                     $new['type'] = ".cp.";
                 }
-                if(isset($part['grammar'])){
+                if (isset($part['grammar'])) {
                     $new['grammar'] = $part['grammar'];
-                }else{
+                } else {
                     $new['grammar'] = null;
                 }
-                if(isset($part['parent'])){
+                if (isset($part['parent'])) {
                     $new['parent'] = $part['parent'];
-                }else{
+                } else {
                     $new['parent'] = null;
                 }
-                $new['confidence'] = 50*$part['confidence'];
+                $new['confidence'] = 50 * $part['confidence'];
                 $result[] = $new;
 
-                if(!empty($_word)){
+                if (!empty($_word)) {
                     $output = "[{$resultCount}],{$part['word']},{$part['type']},{$part['grammar']},{$part['parent']},{$part['factors']},{$part['confidence']}";
                     $this->info($output);
                 }
             }
 
-            if(count($wordIndex) % 100 ===0){
-                $this->upload($wordIndex,$result,$this->option('api'));
+            if (count($wordIndex) % 100 === 0) {
+                //每100个单词上传一次
+                $this->upload($wordIndex, $result, $this->option('api'));
                 $wordIndex = array();
                 $result = array();
             }
-		}
-        $this->upload($wordIndex,$result,$this->option('api'));
+        }
+        $this->upload($wordIndex, $result, $this->option('api'));
 
-        $this->info('['.date('Y-m-d H:i:s', time()).'] upgrade:compound finished');
+        $this->info('[' . date('Y-m-d H:i:s', time()) . '] upgrade:compound finished');
 
         return 0;
     }
 
-    private function upload($index,$words,$url=null){
+    private function upload($index, $words, $url = null)
+    {
 
-        if(!$url){
-            $url = config('app.url').'/api/v2/compound';
-        }else{
-            $url = $url.'/v2/compound';
+        if (!$url) {
+            $url = config('app.url') . '/api/v2/compound';
+        } else {
+            $url = $url . '/v2/compound';
         }
-        $this->info('url = '.$url);
-        $this->info('uploading size='.strlen(json_encode($words,JSON_UNESCAPED_UNICODE)));
-        $response = Http::post($url,
-                                [
-                                    'index'=> $index,
-                                    'words'=> $words,
-                                ]);
-        if($response->ok()){
+        $this->info('url = ' . $url);
+        $this->info('uploading size=' . strlen(json_encode($words, JSON_UNESCAPED_UNICODE)));
+        $response = Http::post(
+            $url,
+            [
+                'index' => $index,
+                'words' => $words,
+            ]
+        );
+        if ($response->ok()) {
             $this->info('upload ok');
-        }else{
+        } else {
             $this->error('upload fail.');
         }
     }