UpgradeRegular.php 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. <?php
  2. namespace App\Console\Commands;
  3. use App\Models\UserDict;
  4. use App\Models\WbwTemplate;
  5. use Illuminate\Console\Command;
  6. use Illuminate\Support\Facades\Cache;
  7. use Illuminate\Support\Facades\Log;
  8. use Illuminate\Support\Facades\DB;
  9. class UpgradeRegular extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. *
  14. * @var string
  15. */
  16. protected $signature = 'upgrade:regular';
  17. /**
  18. * The console command description.
  19. *
  20. * @var string
  21. */
  22. protected $description = 'Command description';
  23. /**
  24. * Create a new command instance.
  25. *
  26. * @return void
  27. */
  28. public function __construct()
  29. {
  30. parent::__construct();
  31. }
  32. /**
  33. * Execute the console command.
  34. *
  35. * @return int
  36. */
  37. public function handle()
  38. {
  39. $nounEnding = array();
  40. $rowCount=0;
  41. if(($handle=fopen(public_path('app/public/ending/noun.csv'),'r'))!==FALSE){
  42. while(($data=fgetcsv($handle,0,','))!==FALSE){
  43. $rowCount++;
  44. if($rowCount==1) continue;//忽略首行
  45. array_push($nounEnding,$data);
  46. }
  47. }
  48. fclose($handle);
  49. $adjEnding = array();
  50. $rowCount=0;
  51. if(($handle=fopen(public_path('app/public/ending/adj.csv'),'r'))!==FALSE){
  52. while(($data=fgetcsv($handle,0,','))!==FALSE){
  53. $rowCount++;
  54. if($rowCount==1) continue;//忽略首行
  55. array_push($adjEnding,$data);
  56. }
  57. }
  58. fclose($handle);
  59. $verbEnding = array();
  60. $rowCount=0;
  61. if(($handle=fopen(public_path('app/public/ending/verb.csv'),'r'))!==FALSE){
  62. while(($data=fgetcsv($handle,0,','))!==FALSE){
  63. $rowCount++;
  64. if($rowCount==1) continue;//忽略首行
  65. array_push($verbEnding,$data);
  66. }
  67. }
  68. fclose($handle);
  69. $words = UserDict::where('type','.n:base.')
  70. ->orWhere('type','.v:base.')
  71. ->orWhere('type','.adj:base.')
  72. ->orWhere('type','.ti:base.')
  73. ->select(['word','type','grammar'])
  74. ->groupBy(['word','type','grammar'])
  75. ->orderBy('word');
  76. $query = "
  77. select count(*) from (select count(*) from user_dicts ud where
  78. \"type\" = '.v:base.' or
  79. \"type\" = '.n:base.' or
  80. \"type\" = '.ti:base.' or
  81. \"type\" = '.adj:base.'
  82. group by word,type,grammar) as t;
  83. ";
  84. $count = DB::select($query);
  85. $bar = $this->output->createProgressBar($count[0]->count);
  86. /*
  87. $words = UserDict::where('word','ābandhattalakkhaṇa')
  88. ->select(['word','type','grammar'])
  89. ->groupBy(['word','type','grammar']);
  90. $bar = $this->output->createProgressBar(1);
  91. */
  92. foreach ($words->cursor() as $word) {
  93. # code...
  94. switch($word->type){
  95. case ".v:base.":
  96. $casetable=$verbEnding;
  97. break;
  98. case ".n:base.":
  99. $casetable = $nounEnding;
  100. break;
  101. case ".ti:base.":
  102. case ".adj:base.":
  103. $casetable = $adjEnding;
  104. break;
  105. case "":
  106. $casetable=false;
  107. break;
  108. default:
  109. $casetable=false;
  110. break;
  111. }
  112. if($casetable === false){
  113. continue;
  114. }
  115. //$this->info("{$word->word}:{$word->type}");
  116. foreach($casetable as $thiscase){
  117. if($word->type==".v:base."){
  118. $endLen = (int)$thiscase[0];
  119. $head = mb_substr($word->word,0,(0-$endLen),"UTF-8");//原词剩余的部分
  120. $newEnding = $thiscase[1];
  121. $newGrammar = $thiscase[2];
  122. $newword=$head.$thiscase[1];
  123. //动词不做符合规则判定
  124. $isMatch = true;
  125. }else{
  126. $endLen = (int)$thiscase[5];
  127. $end = mb_substr($word->word,0-$endLen,NULL,"UTF-8");//原词被切下来的部分
  128. $head = mb_substr($word->word,0,(0-$endLen),"UTF-8");//原词剩余的部分
  129. $newEnding = $thiscase[3];
  130. $newGrammar = $thiscase[4];
  131. $newword=$head.$thiscase[2];
  132. if($word->type==".n:base."){
  133. //名词
  134. if($thiscase[0]==$word->grammar && $thiscase[1]==$end){
  135. //符合规则判定成功
  136. $isMatch = true;
  137. }else{
  138. $isMatch = false;
  139. }
  140. }else{
  141. //形容词
  142. if($thiscase[1]==$end){
  143. //符合规则判定成功
  144. $isMatch = true;
  145. }else{
  146. $isMatch = false;
  147. }
  148. }
  149. }
  150. if($isMatch){
  151. //$this->error($newword.':match');
  152. //查询这个词是否在三藏存在
  153. $exist = Cache::remember('palicanon/word/exists/'.$newword, 10 , function() use($newword) {
  154. return WbwTemplate::where('real',$newword)->exists();
  155. });
  156. if($exist){
  157. //$this->info("{$newword} exists");
  158. $new = UserDict::firstOrNew(
  159. [
  160. 'word' => $newword,
  161. 'type' => \str_replace(':base','',$word->type),
  162. 'grammar' => $newGrammar,
  163. 'parent' => $word->word,
  164. 'factors' => "{$word->word}+[{$newEnding}]",
  165. 'source' => '_SYS_REGULAR_'
  166. ],
  167. [
  168. 'id' => app('snowflake')->id(),
  169. 'create_time'=>(int)(microtime(true)*1000)
  170. ]
  171. );
  172. $new->confidence = 80;
  173. $new->language = 'cm';
  174. $new->creator_id = 1;
  175. $new->save();
  176. }
  177. }
  178. }
  179. $bar->advance();
  180. }
  181. $bar->finish();
  182. return 0;
  183. }
  184. }