info('.stop exists'); return 0; } $dict_id = DictApi::getSysDict('robot_compound'); if(!$dict_id){ $this->error('没有找到 robot_compound 字典'); return 1; } $start = \microtime(true); // if($this->option('test')){ //调试代码 $ts = new TurboSplit(); Storage::disk('local')->put("tmp/compound.md", "# Turbo Split"); //获取需要拆的词 $list = [ [5,20,20], [21,30,20], [31,40,10], [41,60,10], ]; foreach ($list as $take) { # code... $words = WordIndex::where('final',0) ->whereBetween('len',[$take[0],$take[1]]) ->select('word') ->take($take[2])->get(); foreach ($words as $word) { $this->info($word->word); Storage::disk('local')->append("tmp/compound.md", "## {$word->word}"); $parts = $ts->splitA($word->word); foreach ($parts as $part) { # code... $info = "`{$part['word']}`,{$part['factors']},{$part['confidence']}"; $this->info($info); Storage::disk('local')->append("tmp/compound.md", "- {$info}"); } } } $this->info("耗时:".\microtime(true)-$start); return 0; } $_word = $this->argument('word'); if(!empty($_word)){ $words = array((object)array('real'=>$_word)); $count[] = (object)array('count'=>1); }else if($this->option('book')){ $words = WbwTemplate::select('real') ->where('book',$this->option('book')) ->where('type','<>','.ctl.') ->where('real','<>','') ->orderBy('real') ->groupBy('real')->cursor(); $count = DB::select('SELECT count(*) from ( SELECT "real" from wbw_templates where book = ? and type <> ? and real <> ? group by real) T', [$this->option('book'),'.ctl.','']); }else{ $words = WbwTemplate::select('real') ->where('type','<>','.ctl.') ->where('real','<>','') ->orderBy('real') ->groupBy('real')->cursor(); $count = DB::select('SELECT count(*) from ( SELECT "real" from wbw_templates where type <> ? and real <> ? group by real) T', ['.ctl.','']); } $bar = $this->output->createProgressBar($count[0]->count); foreach ($words as $key => $word) { $bar->advance(); if($this->option('continue')){ //先看目前字典里有没有已经拆过的这个词 $isExists = UserDict::where('word',$word->real) ->where('dict_id',$dict_id) ->where('flag',1) ->exists(); if($isExists){ continue; } } //删除该词旧数据 UserDict::where('word',$word->real) ->where('dict_id',$dict_id) ->delete(); $ts = new TurboSplit(); if($this->option('debug')){ $ts->debug(true); } $parts = $ts->splitA($word->real); if(!empty($_word)){ Storage::disk('local')->put("tmp/compound1.csv", "word,type,grammar,parent,factors"); } $count = 0; foreach ($parts as $part) { if(isset($part['type']) && $part['type'] === ".v."){ continue; } $count++; $new = new UserDict; $new->id = app('snowflake')->id(); $new->word = $part['word']; $new->factors = $part['factors']; $new->dict_id = $dict_id; $new->source = '_ROBOT_'; $new->create_time = (int)(microtime(true)*1000); if(isset($part['type'])){ $new->type = $part['type']; }else{ $new->type = ".cp."; } if(isset($part['grammar'])){ $new->grammar = $part['grammar']; } if(isset($part['parent'])){ $new->parent = $part['parent']; } $new->confidence = 50*$part['confidence']; $new->note = $part['confidence']; $new->language = 'cm'; $new->creator_id = 1; $new->flag = 1;//标记为维护状态 $new->save(); if(!empty($_word)){ $output = "{$part['word']},{$part['type']},{$part['grammar']},{$part['parent']},{$part['factors']},{$part['confidence']}"; $this->info($count); $this->info($output); Storage::disk('local')->append("tmp/compound1.csv", $output); } } } //维护状态数据改为正常状态 UserDict::where('dict_id',$dict_id)->where('flag',1)->update(['flag'=>0]); $bar->finish(); return 0; } }