Browse Source

如果服务器有,就不拆了

visuddhinanda 1 year ago
parent
commit
88a497b4ba
1 changed files with 74 additions and 15 deletions
  1. 74 15
      api-v8/app/Console/Commands/UpgradeCompound.php

+ 74 - 15
api-v8/app/Console/Commands/UpgradeCompound.php

@@ -6,6 +6,9 @@ use Illuminate\Console\Command;
 use Illuminate\Support\Facades\Storage;
 use App\Models\WordIndex;
 use App\Models\WbwTemplate;
+use App\Models\UserDict;
+use Illuminate\Support\Facades\Log;
+
 use App\Tools\TurboSplit;
 use App\Http\Api\DictApi;
 use Illuminate\Support\Facades\DB;
@@ -18,7 +21,7 @@ class UpgradeCompound extends Command
      * php -d memory_limit=1024M artisan upgrade:compound  --api=https://next.wikipali.org/api --from=182852 --to=30000
      * @var string
      */
-    protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test} {--continue} {--api=} {--from=0} {--to=0} {--min=7} {--max=50}';
+    protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test} {--continue} {--api=} {--from=0} {--to=0} {--min=7} {--max=300}';
 
     /**
      * The console command description.
@@ -52,12 +55,12 @@ class UpgradeCompound extends Command
         }
         $confirm = '';
         if ($this->option('api')) {
-            $confirm .= 'api=' . $this->option('api') . "\n";
+            $confirm .= 'api=' . $this->option('api') . PHP_EOL;
         }
-        $confirm .= "min=" . $this->option('min') . "\n";
-        $confirm .= "max="  . $this->option('max') . "\n";
-        $confirm .= "from="  . $this->option('from') . "\n";
-        $confirm .= "to="  . $this->option('to') . "\n";
+        $confirm .= "min=" . $this->option('min') . PHP_EOL;
+        $confirm .= "max="  . $this->option('max') . PHP_EOL;
+        $confirm .= "from="  . $this->option('from') . PHP_EOL;
+        $confirm .= "to="  . $this->option('to') . PHP_EOL;
 
         if (!$this->confirm($confirm)) {
             return 0;
@@ -108,10 +111,9 @@ class UpgradeCompound extends Command
             return 0;
         }
 
-
         $_word = $this->argument('word');
         if (!empty($_word)) {
-            $words = array((object)array('real' => $_word));
+            $words = array((object)array('real' => $_word, 'id' => 0));
             $count = 1;
         } else if ($this->option('book')) {
             $words = WbwTemplate::select('real')
@@ -151,20 +153,68 @@ class UpgradeCompound extends Command
         $sn = 0;
         $wordIndex = array();
         $result = array();
+        $dbHas = array();
+        $fDbHas = fopen(__DIR__ . '/compound.csv', 'r');
+        while (! feof($fDbHas)) {
+            $dbHas[] = trim(fgets($fDbHas));
+        }
+        fclose($fDbHas);
+        $this->info('load db has ' . count($dbHas));
         foreach ($words as $key => $word) {
             if (\App\Tools\Tools::isStop()) {
                 return 0;
             }
+            if (in_array($word->real, $dbHas)) {
+                $this->info("[{$key}]{$word->real}数据库中已经有了");
+                continue;
+            }
             $sn++;
             $startAt = microtime(true);
             $now = date('Y-m-d H:i:s');
             $this->info("[{$now}]{$word->real} start id={$word->id}");
-            $ts = new TurboSplit();
-            if ($this->option('debug')) {
-                $ts->debug(true);
-            }
             $wordIndex[] = $word->real;
-            $parts = $ts->splitA($word->real);
+
+            //先查询vir数据有没有拆分
+            $parts = array();
+            $wbwWords = WbwTemplate::where('real', $word->real)
+                ->select('word')->groupBy('word')->get();
+            foreach ($wbwWords as $key => $wbwWord) {
+                if (strpos($wbwWord->word, '-') !== false) {
+                    $wbwFactors = explode('-', $wbwWord->word);
+                    //看词尾是否能找到语尾
+                    $endWord = end($wbwFactors);
+                    $endWordInDict = UserDict::where('word', $endWord)->get();
+                    foreach ($endWordInDict as $key => $oneWord) {
+                        if (
+                            !empty($oneWord->type) &&
+                            strpos($oneWord->type, 'base') === false &&
+                            $oneWord->type !== '.cp.'
+                        ) {
+                            $parts[] = [
+                                'word' => $oneWord->real,
+                                'type' => $oneWord->type,
+                                'grammar' => $oneWord->grammar,
+                                'parent' => $oneWord->parent,
+                                'factors' => implode('+', array_slice($wbwFactors, 0, -1)) . '+' . $oneWord->factors,
+                                'confidence' => 100,
+                            ];
+                        }
+                    }
+                }
+            }
+            if (count($parts) === 0) {
+                if (mb_strlen($word->real, 'UTF-8') > 100) {
+                    Log::error('超长' . $word->real);
+                }
+                $ts = new TurboSplit();
+                if ($this->option('debug')) {
+                    $ts->debug(true);
+                }
+                $parts = $ts->splitA($word->real);
+            } else {
+                $this->info("找到vri拆分数据:" . count($parts));
+            }
+
             $time = round(microtime(true) - $startAt, 2);
             $percent = (int)($sn * 100 / $count);
 
@@ -198,8 +248,17 @@ class UpgradeCompound extends Command
                 $result[] = $new;
 
                 if (!empty($_word)) {
-                    $output = "[{$resultCount}],{$part['word']},{$part['type']},{$part['grammar']},{$part['parent']},{$part['factors']},{$part['confidence']}";
-                    $this->info($output);
+                    //指定拆分单词输出结果
+                    $debugOutput = [
+                        $resultCount,
+                        $part['word'],
+                        $part['type'],
+                        $part['grammar'],
+                        $part['parent'],
+                        $part['factors'],
+                        $part['confidence']
+                    ];
+                    $this->info(implode(',', $debugOutput));
                 }
             }