UpgradeDictSysWbwExtract.php 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. <?php
  2. /**
  3. * 将用户词典中的数据进行汇总。
  4. * 算法:
  5. * 同样词性的合并为一条记录。意思按照出现的次数排序
  6. */
  7. namespace App\Console\Commands;
  8. use Illuminate\Console\Command;
  9. use App\Models\UserDict;
  10. use App\Http\Api\DictApi;
  11. class UpgradeDictSysWbwExtract extends Command
  12. {
  13. /**
  14. * The name and signature of the console command.
  15. *
  16. * @var string
  17. */
  18. protected $signature = 'upgrade:syswbwextract';
  19. /**
  20. * The console command description.
  21. *
  22. * @var string
  23. */
  24. protected $description = '从社区词典中提取最优结果';
  25. /**
  26. * Create a new command instance.
  27. *
  28. * @return void
  29. */
  30. public function __construct()
  31. {
  32. parent::__construct();
  33. }
  34. /**
  35. * Execute the console command.
  36. *
  37. * @return int
  38. */
  39. public function handle()
  40. {
  41. if(\App\Tools\Tools::isStop()){
  42. return 0;
  43. }
  44. $user_dict_id = DictApi::getSysDict('community');
  45. if(!$user_dict_id){
  46. $this->error('没有找到 community 字典');
  47. return 1;
  48. }
  49. $user_dict_extract_id = DictApi::getSysDict('community_extract');
  50. if(!$user_dict_extract_id){
  51. $this->error('没有找到 community_extract 字典');
  52. return 1;
  53. }
  54. $dict = UserDict::select('word')->where('word','!=','')->where('dict_id',$user_dict_id)->groupBy('word');
  55. $bar = $this->output->createProgressBar($dict->count());
  56. foreach ($dict->cursor() as $word) {
  57. # code...
  58. //case
  59. $wordtype = '';
  60. $wordgrammar = '';
  61. $wordparent = '';
  62. $wordfactors = '';
  63. $case = UserDict::selectRaw('type,grammar, sum(confidence)')
  64. ->where('word',$word->word)
  65. ->where('dict_id',$user_dict_id)
  66. ->where('type','!=','.part.')
  67. ->where('type','<>','')
  68. ->whereNotNull('type')
  69. ->groupBy(['type','grammar'])
  70. ->orderBy('sum','desc')
  71. ->first();
  72. if($case){
  73. $wordtype = $case->type;
  74. $wordgrammar = $case->grammar;
  75. }
  76. //parent
  77. $parent = UserDict::selectRaw('parent, sum(confidence)')
  78. ->where('word',$word->word)
  79. ->where('dict_id',$user_dict_id)
  80. ->where('type','!=','.part.')
  81. ->where('parent','!=','')
  82. ->whereNotNull('parent')
  83. ->groupBy('parent')
  84. ->orderBy('sum','desc')
  85. ->first();
  86. if($parent){
  87. $wordparent = $parent->parent;
  88. }
  89. //factors
  90. $factor = UserDict::selectRaw('factors, sum(confidence)')
  91. ->where('word',$word->word)
  92. ->where('dict_id',$user_dict_id)
  93. ->where('type','!=','.part.')
  94. ->where('factors','<>','')
  95. ->whereNotNull('factors')
  96. ->groupBy('factors')
  97. ->orderBy('sum','desc')
  98. ->first();
  99. if($factor){
  100. $wordfactors = $factor->factors;
  101. }
  102. $new = UserDict::firstOrNew(
  103. [
  104. 'word' => $word->word,
  105. 'type' => $wordtype,
  106. 'grammar' => $wordgrammar,
  107. 'parent' => $wordparent,
  108. 'factors' => $wordfactors,
  109. 'dict_id' => $user_dict_extract_id,
  110. ],
  111. [
  112. 'id' => app('snowflake')->id(),
  113. 'source' => '_ROBOT_',
  114. 'create_time'=>(int)(microtime(true)*1000)
  115. ]
  116. );
  117. $new->confidence = 90;
  118. $new->language = 'cm';
  119. $new->creator_id = 1;
  120. $new->flag = 1;
  121. $new->save();
  122. $bar->advance();
  123. }
  124. $bar->finish();
  125. //TODO 删除旧数据
  126. return 0;
  127. }
  128. }