UpgradeCompound.php 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Storage;
  5. use App\Models\WordIndex;
  6. use App\Models\WbwTemplate;
  7. use App\Models\UserDict;
  8. use App\Tools\TurboSplit;
  9. class UpgradeCompound extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. *
  14. * @var string
  15. */
  16. protected $signature = 'upgrade:compound {word?} {--test}';
  17. /**
  18. * The console command description.
  19. *
  20. * @var string
  21. */
  22. protected $description = 'Command description';
  23. protected $dict_id = 'c42980f0-5967-4833-b695-84183344f68f';
  24. /**
  25. * Create a new command instance.
  26. *
  27. * @return void
  28. */
  29. public function __construct()
  30. {
  31. parent::__construct();
  32. }
  33. /**
  34. * Execute the console command.
  35. *
  36. * @return int
  37. */
  38. public function handle()
  39. {
  40. $start = \microtime(true);
  41. $_word = $this->argument('word');
  42. if(!empty($_word)){
  43. $ts = new TurboSplit();
  44. $results = $ts->splitA($_word);
  45. Storage::disk('local')->put("tmp/compound1.csv", "word,type,grammar,parent,factors");
  46. foreach ($results as $key => $value) {
  47. # code...
  48. Storage::disk('local')->append("tmp/compound1.csv", "{$value['word']},{$value['type']},{$value['grammar']},{$value['parent']},{$value['factors']}");
  49. }
  50. return 0;
  51. }
  52. //
  53. if($this->option('test')){
  54. //调试代码
  55. Storage::disk('local')->put("tmp/compound.md", "# Turbo Split");
  56. //获取需要拆的词
  57. $list = [
  58. [5,20,20],
  59. [21,30,20],
  60. [31,40,10],
  61. [41,60,10],
  62. ];
  63. foreach ($list as $take) {
  64. # code...
  65. $words = WordIndex::where('final',0)->whereBetween('len',[$take[0],$take[1]])->select('word')->take($take[2])->get();
  66. foreach ($words as $word) {
  67. $this->info($word->word);
  68. Storage::disk('local')->append("tmp/compound.md", "## {$word->word}");
  69. $parts = $ts->splitA($word->word);
  70. foreach ($parts as $part) {
  71. # code...
  72. $this->info("{$part['word']},{$part['factors']},{$part['confidence']}");
  73. Storage::disk('local')->append("tmp/compound.md", "- `{$part['word']}`,{$part['factors']},{$part['confidence']}");
  74. }
  75. }
  76. }
  77. $this->info("耗时:".\microtime(true)-$start);
  78. return 0;
  79. }
  80. //$words = WordIndex::where('final',0)->select('word')->orderBy('count','desc')->skip(72300)->cursor();
  81. $words = WbwTemplate::select('real')
  82. ->where('book',118)
  83. ->whereBetween('paragraph',[1329,1367])
  84. ->where('type','<>','.ctl.')
  85. ->where('real','<>','')
  86. ->groupBy('real')->cursor();
  87. $count = 0;
  88. foreach ($words as $key => $word) {
  89. //先看目前字典里有没有
  90. $isExists = UserDict::where('word',$word->real)
  91. ->where('dict_id',"<>",'8359757e-9575-455b-a772-cc6f036caea0')
  92. ->exists();
  93. if($isExists){
  94. $this->info("found:{$word->real}");
  95. continue;
  96. }
  97. # code...
  98. $count++;
  99. $this->info("{$count}:{$word->real}");
  100. $ts = new TurboSplit();
  101. $parts = $ts->splitA($word->real);
  102. foreach ($parts as $part) {
  103. $new = UserDict::firstOrNew(
  104. [
  105. 'word' => $part['word'],
  106. 'factors' => $part['factors'],
  107. 'dict_id' => $this->dict_id,
  108. ],
  109. [
  110. 'id' => app('snowflake')->id(),
  111. 'source' => '_ROBOT_',
  112. 'create_time'=>(int)(microtime(true)*1000),
  113. ]
  114. );
  115. if(isset($part['type'])){
  116. $new->type = $part['type'];
  117. }else{
  118. $new->type = ".cp.";
  119. }
  120. if(isset($part['grammar'])) $new->parent = $part['grammar'];
  121. if(isset($part['parent'])) $new->parent = $part['parent'];
  122. $new->confidence = 50*$part['confidence'];
  123. $new->note = $part['confidence'];
  124. $new->language = 'cm';
  125. $new->creator_id = 1;
  126. $new->flag = 1;
  127. $new->save();
  128. }
  129. }
  130. //删除旧数据
  131. UserDict::where('dict_id',$this->dict_id)->where('flag',0)->delete();
  132. UserDict::where('dict_id',$this->dict_id)->where('flag',1)->update(['flag'=>0]);
  133. return 0;
  134. }
  135. }