UpgradeCompound.php 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Storage;
  5. use App\Models\WordIndex;
  6. use App\Models\WbwTemplate;
  7. use App\Models\UserDict;
  8. use App\Tools\TurboSplit;
  9. use App\Http\Api\DictApi;
  10. class UpgradeCompound extends Command
  11. {
  12. /**
  13. * The name and signature of the console command.
  14. *
  15. * @var string
  16. */
  17. protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test}';
  18. /**
  19. * The console command description.
  20. *
  21. * @var string
  22. */
  23. protected $description = 'Command description';
  24. /**
  25. * Create a new command instance.
  26. *
  27. * @return void
  28. */
  29. public function __construct()
  30. {
  31. parent::__construct();
  32. }
  33. /**
  34. * Execute the console command.
  35. *
  36. * @return int
  37. */
  38. public function handle()
  39. {
  40. $dict_id = DictApi::getSysDict('robot_compound');
  41. if(!$dict_id){
  42. $this->error('没有找到 robot_compound 字典');
  43. return 1;
  44. }
  45. $start = \microtime(true);
  46. $_word = $this->argument('word');
  47. if(!empty($_word)){
  48. $ts = new TurboSplit();
  49. if($this->option('debug')){
  50. $ts->debug(true);
  51. }
  52. $results = $ts->splitA($_word);
  53. Storage::disk('local')->put("tmp/compound1.csv", "word,type,grammar,parent,factors");
  54. foreach ($results as $key => $value) {
  55. # code...
  56. $output = "{$value['word']},{$value['type']},{$value['grammar']},{$value['parent']},{$value['factors']}";
  57. $this->info($output);
  58. Storage::disk('local')->append("tmp/compound1.csv", $output);
  59. }
  60. return 0;
  61. }
  62. //
  63. if($this->option('test')){
  64. //调试代码
  65. $ts = new TurboSplit();
  66. Storage::disk('local')->put("tmp/compound.md", "# Turbo Split");
  67. //获取需要拆的词
  68. $list = [
  69. [5,20,20],
  70. [21,30,20],
  71. [31,40,10],
  72. [41,60,10],
  73. ];
  74. foreach ($list as $take) {
  75. # code...
  76. $words = WordIndex::where('final',0)->whereBetween('len',[$take[0],$take[1]])->select('word')->take($take[2])->get();
  77. foreach ($words as $word) {
  78. $this->info($word->word);
  79. Storage::disk('local')->append("tmp/compound.md", "## {$word->word}");
  80. $parts = $ts->splitA($word->word);
  81. foreach ($parts as $part) {
  82. # code...
  83. $info = "`{$part['word']}`,{$part['factors']},{$part['confidence']}";
  84. $this->info($info);
  85. Storage::disk('local')->append("tmp/compound.md", "- {$info}");
  86. }
  87. }
  88. }
  89. $this->info("耗时:".\microtime(true)-$start);
  90. return 0;
  91. }
  92. if($this->option('book')){
  93. $words = WbwTemplate::select('real')
  94. ->where('book',$this->option('book'))
  95. ->where('type','<>','.ctl.')
  96. ->where('real','<>','')
  97. ->groupBy('real')->cursor();
  98. }else{
  99. $words = WbwTemplate::select('real')
  100. ->where('type','<>','.ctl.')
  101. ->where('real','<>','')
  102. ->groupBy('real')->cursor();
  103. }
  104. $count = 0;
  105. foreach ($words as $key => $word) {
  106. UserDict::where('word',$word->real)
  107. ->where('dict_id',$dict_id)
  108. ->update(['flag'=>2]);
  109. //先看目前字典里有没有
  110. $isExists = UserDict::where('word',$word->real)
  111. ->where('dict_id',"<>",$dict_id)
  112. ->exists();
  113. if($isExists){
  114. $this->info("Exists:{$word->real}");
  115. //continue;
  116. }
  117. # code...
  118. $count++;
  119. $this->info("{$count}:{$word->real}");
  120. $ts = new TurboSplit();
  121. $parts = $ts->splitA($word->real);
  122. foreach ($parts as $part) {
  123. if(isset($part['type']) && $part['type'] === ".v."){
  124. continue;
  125. }
  126. $new = UserDict::firstOrNew(
  127. [
  128. 'word' => $part['word'],
  129. 'factors' => $part['factors'],
  130. 'dict_id' => $dict_id,
  131. ],
  132. [
  133. 'id' => app('snowflake')->id(),
  134. 'source' => '_ROBOT_',
  135. 'create_time'=>(int)(microtime(true)*1000),
  136. ]
  137. );
  138. if(isset($part['type'])){
  139. $new->type = $part['type'];
  140. }else{
  141. $new->type = ".cp.";
  142. }
  143. if(isset($part['grammar'])) $new->grammar = $part['grammar'];
  144. if(isset($part['parent'])) $new->parent = $part['parent'];
  145. $new->confidence = 50*$part['confidence'];
  146. $new->note = $part['confidence'];
  147. $new->language = 'cm';
  148. $new->creator_id = 1;
  149. $new->flag = 1;
  150. $new->save();
  151. }
  152. }
  153. //删除旧数据
  154. UserDict::where('dict_id',$dict_id)->where('flag',2)->delete();
  155. UserDict::where('dict_id',$dict_id)->where('flag',1)->update(['flag'=>0]);
  156. return 0;
  157. }
  158. }