UpgradeDictSysWbwExtract.php 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. <?php
  2. /**
  3. * 将用户词典中的数据进行汇总。
  4. * 算法:
  5. * 同样词性的合并为一条记录。意思按照出现的次数排序
  6. */
  7. namespace App\Console\Commands;
  8. use Illuminate\Console\Command;
  9. use App\Models\UserDict;
  10. use App\Http\Api\DictApi;
  11. class UpgradeDictSysWbwExtract extends Command
  12. {
  13. /**
  14. * The name and signature of the console command.
  15. *
  16. * @var string
  17. */
  18. protected $signature = 'upgrade:syswbwextract';
  19. /**
  20. * The console command description.
  21. *
  22. * @var string
  23. */
  24. protected $description = '从社区词典中提取最优结果';
  25. /**
  26. * Create a new command instance.
  27. *
  28. * @return void
  29. */
  30. public function __construct()
  31. {
  32. parent::__construct();
  33. }
  34. /**
  35. * Execute the console command.
  36. *
  37. * @return int
  38. */
  39. public function handle()
  40. {
  41. $user_dict_id = DictApi::getSysDict('community');
  42. if(!$user_dict_id){
  43. $this->error('没有找到 community 字典');
  44. return 1;
  45. }
  46. $user_dict_extract_id = DictApi::getSysDict('community_extract');
  47. if(!$user_dict_extract_id){
  48. $this->error('没有找到 community_extract 字典');
  49. return 1;
  50. }
  51. $dict = UserDict::select('word')->where('word','!=','')->where('dict_id',$user_dict_id)->groupBy('word');
  52. $bar = $this->output->createProgressBar($dict->count());
  53. foreach ($dict->cursor() as $word) {
  54. # code...
  55. //case
  56. $wordtype = '';
  57. $wordgrammar = '';
  58. $wordparent = '';
  59. $wordfactors = '';
  60. $case = UserDict::selectRaw('type,grammar, sum(confidence)')
  61. ->where('word',$word->word)
  62. ->where('dict_id',$user_dict_id)
  63. ->where('type','!=','.part.')
  64. ->where('type','<>','')
  65. ->whereNotNull('type')
  66. ->groupBy(['type','grammar'])
  67. ->orderBy('sum','desc')
  68. ->first();
  69. if($case){
  70. $wordtype = $case->type;
  71. $wordgrammar = $case->grammar;
  72. }
  73. //parent
  74. $parent = UserDict::selectRaw('parent, sum(confidence)')
  75. ->where('word',$word->word)
  76. ->where('dict_id',$user_dict_id)
  77. ->where('type','!=','.part.')
  78. ->where('parent','!=','')
  79. ->whereNotNull('parent')
  80. ->groupBy('parent')
  81. ->orderBy('sum','desc')
  82. ->first();
  83. if($parent){
  84. $wordparent = $parent->parent;
  85. }
  86. //factors
  87. $factor = UserDict::selectRaw('factors, sum(confidence)')
  88. ->where('word',$word->word)
  89. ->where('dict_id',$user_dict_id)
  90. ->where('type','!=','.part.')
  91. ->where('factors','<>','')
  92. ->whereNotNull('factors')
  93. ->groupBy('factors')
  94. ->orderBy('sum','desc')
  95. ->first();
  96. if($factor){
  97. $wordfactors = $factor->factors;
  98. }
  99. $new = UserDict::firstOrNew(
  100. [
  101. 'word' => $word->word,
  102. 'type' => $wordtype,
  103. 'grammar' => $wordgrammar,
  104. 'parent' => $wordparent,
  105. 'factors' => $wordfactors,
  106. 'dict_id' => $user_dict_extract_id,
  107. ],
  108. [
  109. 'id' => app('snowflake')->id(),
  110. 'source' => '_ROBOT_',
  111. 'create_time'=>(int)(microtime(true)*1000)
  112. ]
  113. );
  114. $new->confidence = 90;
  115. $new->language = 'cm';
  116. $new->creator_id = 1;
  117. $new->flag = 1;
  118. $new->save();
  119. $bar->advance();
  120. }
  121. $bar->finish();
  122. //TODO 删除旧数据
  123. return 0;
  124. }
  125. }