UpgradeCompound.php 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Storage;
  5. use App\Models\WordIndex;
  6. use App\Models\WbwTemplate;
  7. use App\Models\UserDict;
  8. use App\Tools\TurboSplit;
  9. use App\Http\Api\DictApi;
  10. use Illuminate\Support\Facades\DB;
  11. use Illuminate\Support\Facades\Log;
  12. class UpgradeCompound extends Command
  13. {
  14. /**
  15. * The name and signature of the console command.
  16. *
  17. * @var string
  18. */
  19. protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test} {--continue}';
  20. /**
  21. * The console command description.
  22. *
  23. * @var string
  24. */
  25. protected $description = 'auto split compound word';
  26. /**
  27. * Create a new command instance.
  28. *
  29. * @return void
  30. */
  31. public function __construct()
  32. {
  33. parent::__construct();
  34. }
  35. /**
  36. * Execute the console command.
  37. *
  38. * @return int
  39. */
  40. public function handle()
  41. {
  42. if(\App\Tools\Tools::isStop()){
  43. return 0;
  44. }
  45. if(file_exists(base_path('.stop'))){
  46. $this->info('.stop exists');
  47. return 0;
  48. }
  49. $dict_id = DictApi::getSysDict('robot_compound');
  50. if(!$dict_id){
  51. $this->error('没有找到 robot_compound 字典');
  52. return 1;
  53. }
  54. $start = \microtime(true);
  55. //
  56. if($this->option('test')){
  57. //调试代码
  58. $ts = new TurboSplit();
  59. Storage::disk('local')->put("tmp/compound.md", "# Turbo Split");
  60. //获取需要拆的词
  61. $list = [
  62. [5,20,20],
  63. [21,30,20],
  64. [31,40,10],
  65. [41,60,10],
  66. ];
  67. foreach ($list as $take) {
  68. # code...
  69. $words = WordIndex::where('final',0)
  70. ->whereBetween('len',[$take[0],$take[1]])
  71. ->select('word')
  72. ->take($take[2])->get();
  73. foreach ($words as $word) {
  74. $this->info($word->word);
  75. Storage::disk('local')->append("tmp/compound.md", "## {$word->word}");
  76. $parts = $ts->splitA($word->word);
  77. foreach ($parts as $part) {
  78. # code...
  79. $info = "`{$part['word']}`,{$part['factors']},{$part['confidence']}";
  80. $this->info($info);
  81. Storage::disk('local')->append("tmp/compound.md", "- {$info}");
  82. }
  83. }
  84. }
  85. $this->info("耗时:".\microtime(true)-$start);
  86. return 0;
  87. }
  88. $_word = $this->argument('word');
  89. if(!empty($_word)){
  90. $words = array((object)array('real'=>$_word));
  91. $count[] = (object)array('count'=>1);
  92. }else if($this->option('book')){
  93. $words = WbwTemplate::select('real')
  94. ->where('book',$this->option('book'))
  95. ->where('type','<>','.ctl.')
  96. ->where('real','<>','')
  97. ->orderBy('real')
  98. ->groupBy('real')->cursor();
  99. $count = DB::select('SELECT count(*) from (
  100. SELECT "real" from wbw_templates where book = ? and type <> ? and real <> ? group by real) T',
  101. [$this->option('book'),'.ctl.','']);
  102. }else{
  103. $words = WbwTemplate::select('real')
  104. ->where('type','<>','.ctl.')
  105. ->where('real','<>','')
  106. ->orderBy('real')
  107. ->groupBy('real')->cursor();
  108. $count = DB::select('SELECT count(*) from (
  109. SELECT "real" from wbw_templates where type <> ? and real <> ? group by real) T',
  110. ['.ctl.','']);
  111. }
  112. $bar = $this->output->createProgressBar($count[0]->count);
  113. foreach ($words as $key => $word) {
  114. if(\App\Tools\Tools::isStop()){
  115. return 0;
  116. }
  117. $bar->advance();
  118. if($this->option('continue')){
  119. //先看目前字典里有没有已经拆过的这个词
  120. $isExists = UserDict::where('word',$word->real)
  121. ->where('dict_id',$dict_id)
  122. ->where('flag',1)
  123. ->exists();
  124. if($isExists){
  125. continue;
  126. }
  127. }
  128. //删除该词旧数据
  129. UserDict::where('word',$word->real)
  130. ->where('dict_id',$dict_id)
  131. ->delete();
  132. $ts = new TurboSplit();
  133. if($this->option('debug')){
  134. $ts->debug(true);
  135. }
  136. $parts = $ts->splitA($word->real);
  137. if(!empty($_word)){
  138. Storage::disk('local')->put("tmp/compound1.csv", "word,type,grammar,parent,factors");
  139. }
  140. $count = 0;
  141. foreach ($parts as $part) {
  142. if(isset($part['type']) && $part['type'] === ".v."){
  143. continue;
  144. }
  145. $count++;
  146. $new = new UserDict;
  147. $new->id = app('snowflake')->id();
  148. $new->word = $part['word'];
  149. $new->factors = $part['factors'];
  150. $new->dict_id = $dict_id;
  151. $new->source = '_ROBOT_';
  152. $new->create_time = (int)(microtime(true)*1000);
  153. if(isset($part['type'])){
  154. $new->type = $part['type'];
  155. }else{
  156. $new->type = ".cp.";
  157. }
  158. if(isset($part['grammar'])){
  159. $new->grammar = $part['grammar'];
  160. }
  161. if(isset($part['parent'])){
  162. $new->parent = $part['parent'];
  163. }
  164. $new->confidence = 50*$part['confidence'];
  165. $new->note = $part['confidence'];
  166. $new->language = 'cm';
  167. $new->creator_id = 1;
  168. $new->flag = 1;//标记为维护状态
  169. $new->save();
  170. if(!empty($_word)){
  171. $output = "{$part['word']},{$part['type']},{$part['grammar']},{$part['parent']},{$part['factors']},{$part['confidence']}";
  172. $this->info($count);
  173. $this->info($output);
  174. Storage::disk('local')->append("tmp/compound1.csv", $output);
  175. }
  176. }
  177. }
  178. //维护状态数据改为正常状态
  179. UserDict::where('dict_id',$dict_id)->where('flag',1)->update(['flag'=>0]);
  180. $bar->finish();
  181. return 0;
  182. }
  183. }