UpgradeDictSysRegular.php 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. <?php
  2. /**
  3. * 生成系统规则变形词典
  4. * 算法: 扫描字典里的所有单词。根据语尾表变形。
  5. * 并在词库中查找是否在三藏中出现。出现的保存。
  6. */
  7. namespace App\Console\Commands;
  8. use App\Models\UserDict;
  9. use App\Models\WbwTemplate;
  10. use Illuminate\Console\Command;
  11. use Illuminate\Support\Facades\Cache;
  12. use Illuminate\Support\Facades\Log;
  13. use Illuminate\Support\Facades\DB;
  14. use App\Http\Api\DictApi;
  15. class UpgradeRegular extends Command
  16. {
  17. /**
  18. * The name and signature of the console command.
  19. *
  20. * @var string
  21. */
  22. protected $signature = 'upgrade:regular {word?} {--debug}';
  23. /**
  24. * The console command description.
  25. *
  26. * @var string
  27. */
  28. protected $description = 'upgrade regular';
  29. /**
  30. * Create a new command instance.
  31. *
  32. * @return void
  33. */
  34. public function __construct()
  35. {
  36. parent::__construct();
  37. }
  38. /**
  39. * Execute the console command.
  40. *
  41. * @return int
  42. */
  43. public function handle()
  44. {
  45. $dict_id = DictApi::getSysDict('system_regular');
  46. if(!$dict_id){
  47. $this->error('没有找到 system_regular 字典');
  48. return 1;
  49. }
  50. $nounEnding = array();
  51. $rowCount=0;
  52. if(($handle=fopen(public_path('app/public/ending/noun.csv'),'r'))!==FALSE){
  53. while(($data=fgetcsv($handle,0,','))!==FALSE){
  54. $rowCount++;
  55. if($rowCount==1) continue;//忽略首行
  56. array_push($nounEnding,$data);
  57. }
  58. }
  59. fclose($handle);
  60. $adjEnding = array();
  61. $rowCount=0;
  62. if(($handle=fopen(public_path('app/public/ending/adj.csv'),'r'))!==FALSE){
  63. while(($data=fgetcsv($handle,0,','))!==FALSE){
  64. $rowCount++;
  65. if($rowCount==1) continue;//忽略首行
  66. array_push($adjEnding,$data);
  67. }
  68. }
  69. fclose($handle);
  70. $verbEnding = array();
  71. $rowCount=0;
  72. if(($handle=fopen(public_path('app/public/ending/verb.csv'),'r'))!==FALSE){
  73. while(($data=fgetcsv($handle,0,','))!==FALSE){
  74. $rowCount++;
  75. if($rowCount==1) continue;//忽略首行
  76. array_push($verbEnding,$data);
  77. }
  78. }
  79. fclose($handle);
  80. if(empty($this->argument('word'))){
  81. $words = UserDict::where('type','.n:base.')
  82. ->orWhere('type','.v:base.')
  83. ->orWhere('type','.adj:base.')
  84. ->orWhere('type','.ti:base.');
  85. }else{
  86. $words = UserDict::where('word',$this->argument('word'))
  87. ->where(function($query) {
  88. $query->where('type','.n:base.')
  89. ->orWhere('type','.v:base.')
  90. ->orWhere('type','.adj:base.')
  91. ->orWhere('type','.ti:base.');
  92. });
  93. }
  94. $words = $words->select(['word','type','grammar'])
  95. ->groupBy(['word','type','grammar'])
  96. ->orderBy('word');
  97. $query = "
  98. select count(*) from (select count(*) from user_dicts ud where
  99. \"type\" = '.v:base.' or
  100. \"type\" = '.n:base.' or
  101. \"type\" = '.ti:base.' or
  102. \"type\" = '.adj:base.'
  103. group by word,type,grammar) as t;
  104. ";
  105. $count = DB::select($query);
  106. $bar = $this->output->createProgressBar($count[0]->count);
  107. /*
  108. $words = UserDict::where('word','ābandhattalakkhaṇa')
  109. ->select(['word','type','grammar'])
  110. ->groupBy(['word','type','grammar']);
  111. $bar = $this->output->createProgressBar(1);
  112. */
  113. foreach ($words->cursor() as $word) {
  114. # code...
  115. switch($word->type){
  116. case ".v:base.":
  117. $casetable=$verbEnding;
  118. break;
  119. case ".n:base.":
  120. $casetable = $nounEnding;
  121. break;
  122. case ".ti:base.":
  123. case ".adj:base.":
  124. $casetable = $adjEnding;
  125. break;
  126. case "":
  127. $casetable=false;
  128. break;
  129. default:
  130. $casetable=false;
  131. break;
  132. }
  133. if($casetable === false){
  134. continue;
  135. }
  136. if($this->option('debug')) $this->info("{$word->word}:{$word->type}");
  137. foreach($casetable as $thiscase){
  138. if($word->type==".v:base."){
  139. $endLen = (int)$thiscase[0];
  140. $head = mb_substr($word->word,0,(0-$endLen),"UTF-8");//原词剩余的部分
  141. $newEnding = $thiscase[1];
  142. $newGrammar = $thiscase[2];
  143. $newword=$head.$thiscase[1];
  144. //动词不做符合规则判定
  145. $isMatch = true;
  146. }else{
  147. $endLen = (int)$thiscase[5];
  148. $end = mb_substr($word->word,0-$endLen,NULL,"UTF-8");//原词被切下来的部分
  149. $head = mb_substr($word->word,0,(0-$endLen),"UTF-8");//原词剩余的部分
  150. $newEnding = $thiscase[3];
  151. $newGrammar = $thiscase[4];
  152. $newword=$head.$thiscase[2];
  153. if($word->type==".n:base."){
  154. //名词
  155. if($thiscase[0]==$word->grammar && $thiscase[1]==$end){
  156. //符合规则判定成功
  157. $isMatch = true;
  158. }else{
  159. $isMatch = false;
  160. }
  161. }else{
  162. //形容词
  163. if($thiscase[1]==$end){
  164. //符合规则判定成功
  165. $isMatch = true;
  166. }else{
  167. $isMatch = false;
  168. }
  169. }
  170. }
  171. if($isMatch){
  172. if($this->option('debug')) $this->error($newword.':match');
  173. //查询这个词是否在三藏存在
  174. $exist = Cache::remember('palicanon/word/exists/'.$newword, 100 , function() use($newword) {
  175. return WbwTemplate::where('real',$newword)->exists();
  176. });
  177. if($exist){
  178. if($this->option('debug')) $this->info('exist');
  179. $new = UserDict::firstOrNew(
  180. [
  181. 'word' => $newword,
  182. 'type' => \str_replace(':base','',$word->type),
  183. 'grammar' => $newGrammar,
  184. 'parent' => $word->word,
  185. 'factors' => "{$word->word}+[{$newEnding}]",
  186. 'dict_id' => $dict_id,
  187. ],
  188. [
  189. 'id' => app('snowflake')->id(),
  190. 'source' => '_ROBOT_',
  191. 'create_time'=>(int)(microtime(true)*1000)
  192. ]
  193. );
  194. $new->confidence = 80;
  195. $new->language = 'cm';
  196. $new->creator_id = 1;
  197. $new->flag = 1;
  198. $new->save();
  199. }else{
  200. if($this->option('debug')) $this->info('not exist');
  201. }
  202. }
  203. }
  204. $bar->advance();
  205. }
  206. $bar->finish();
  207. //删除旧数据
  208. $delOld = UserDict::where('dict_id',$dict_id);
  209. if(!empty($this->argument('word'))){
  210. $delOld = $delOld->where('word',$this->argument('word'));
  211. }
  212. $delOld->where('flag',0)->delete();
  213. $delOld->where('flag',1)->update(['flag'=>0]);
  214. return 0;
  215. }
  216. }