UpgradeCompound.php 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Storage;
  5. use App\Models\WordIndex;
  6. use App\Models\WbwTemplate;
  7. use App\Models\UserDict;
  8. use App\Tools\TurboSplit;
  9. use App\Http\Api\DictApi;
  10. use Illuminate\Support\Facades\DB;
  11. class UpgradeCompound extends Command
  12. {
  13. /**
  14. * The name and signature of the console command.
  15. *
  16. * @var string
  17. */
  18. protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test} {--continue}';
  19. /**
  20. * The console command description.
  21. *
  22. * @var string
  23. */
  24. protected $description = 'auto split compound word';
  25. /**
  26. * Create a new command instance.
  27. *
  28. * @return void
  29. */
  30. public function __construct()
  31. {
  32. parent::__construct();
  33. }
  34. /**
  35. * Execute the console command.
  36. *
  37. * @return int
  38. */
  39. public function handle()
  40. {
  41. if(file_exists(base_path('.stop'))){
  42. $this->info('.stop exists');
  43. return 0;
  44. }
  45. $dict_id = DictApi::getSysDict('robot_compound');
  46. if(!$dict_id){
  47. $this->error('没有找到 robot_compound 字典');
  48. return 1;
  49. }
  50. $start = \microtime(true);
  51. $_word = $this->argument('word');
  52. if(!empty($_word)){
  53. $ts = new TurboSplit();
  54. if($this->option('debug')){
  55. $ts->debug(true);
  56. }
  57. $results = $ts->splitA($_word);
  58. Storage::disk('local')->put("tmp/compound1.csv", "word,type,grammar,parent,factors");
  59. foreach ($results as $key => $value) {
  60. # code...
  61. $output = "{$value['word']},{$value['type']},{$value['grammar']},{$value['parent']},{$value['factors']},{$value['confidence']}";
  62. $this->info($output);
  63. Storage::disk('local')->append("tmp/compound1.csv", $output);
  64. }
  65. return 0;
  66. }
  67. //
  68. if($this->option('test')){
  69. //调试代码
  70. $ts = new TurboSplit();
  71. Storage::disk('local')->put("tmp/compound.md", "# Turbo Split");
  72. //获取需要拆的词
  73. $list = [
  74. [5,20,20],
  75. [21,30,20],
  76. [31,40,10],
  77. [41,60,10],
  78. ];
  79. foreach ($list as $take) {
  80. # code...
  81. $words = WordIndex::where('final',0)
  82. ->whereBetween('len',[$take[0],$take[1]])
  83. ->select('word')
  84. ->take($take[2])->get();
  85. foreach ($words as $word) {
  86. $this->info($word->word);
  87. Storage::disk('local')->append("tmp/compound.md", "## {$word->word}");
  88. $parts = $ts->splitA($word->word);
  89. foreach ($parts as $part) {
  90. # code...
  91. $info = "`{$part['word']}`,{$part['factors']},{$part['confidence']}";
  92. $this->info($info);
  93. Storage::disk('local')->append("tmp/compound.md", "- {$info}");
  94. }
  95. }
  96. }
  97. $this->info("耗时:".\microtime(true)-$start);
  98. return 0;
  99. }
  100. if($this->option('book')){
  101. $words = WbwTemplate::select('real')
  102. ->where('book',$this->option('book'))
  103. ->where('type','<>','.ctl.')
  104. ->where('real','<>','')
  105. ->orderBy('real')
  106. ->groupBy('real')->cursor();
  107. $count = DB::select('SELECT count(*) from (
  108. SELECT "real" from wbw_templates where book = ? and type <> ? and real <> ? group by real) T',
  109. [$this->option('book'),'.ctl.','']);
  110. }else{
  111. $words = WbwTemplate::select('real')
  112. ->where('type','<>','.ctl.')
  113. ->where('real','<>','')
  114. ->orderBy('real')
  115. ->groupBy('real')->cursor();
  116. $count = DB::select('SELECT count(*) from (
  117. SELECT "real" from wbw_templates where type <> ? and real <> ? group by real) T',
  118. ['.ctl.','']);
  119. }
  120. $bar = $this->output->createProgressBar($count[0]->count);
  121. foreach ($words as $key => $word) {
  122. $bar->advance();
  123. if($this->option('continue')){
  124. //先看目前字典里有没有已经拆过的这个词
  125. $isExists = UserDict::where('word',$word->real)
  126. ->where('dict_id',$dict_id)
  127. ->where('flag',1)
  128. ->exists();
  129. if($isExists){
  130. continue;
  131. }
  132. }
  133. //删除该词旧数据
  134. UserDict::where('word',$word->real)
  135. ->where('dict_id',$dict_id)
  136. ->delete();
  137. $ts = new TurboSplit();
  138. $parts = $ts->splitA($word->real);
  139. foreach ($parts as $part) {
  140. if(isset($part['type']) && $part['type'] === ".v."){
  141. continue;
  142. }
  143. $new = UserDict::firstOrNew(
  144. [
  145. 'word' => $part['word'],
  146. 'factors' => $part['factors'],
  147. 'dict_id' => $dict_id,
  148. ],
  149. [
  150. 'id' => app('snowflake')->id(),
  151. 'source' => '_ROBOT_',
  152. 'create_time'=>(int)(microtime(true)*1000),
  153. ]
  154. );
  155. if(isset($part['type'])){
  156. $new->type = $part['type'];
  157. }else{
  158. $new->type = ".cp.";
  159. }
  160. if(isset($part['grammar'])) $new->grammar = $part['grammar'];
  161. if(isset($part['parent'])) $new->parent = $part['parent'];
  162. $new->confidence = 50*$part['confidence'];
  163. $new->note = $part['confidence'];
  164. $new->language = 'cm';
  165. $new->creator_id = 1;
  166. $new->flag = 1;//标记为维护状态
  167. $new->save();
  168. }
  169. if(env('APP_ENV','local') !== 'local'){
  170. usleep(500);
  171. }
  172. }
  173. //维护状态数据改为正常状态
  174. UserDict::where('dict_id',$dict_id)->where('flag',1)->update(['flag'=>0]);
  175. $bar->finish();
  176. return 0;
  177. }
  178. }