ExportFtsPali.php 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Http\Api\DictApi;
  5. use App\Models\UserDict;
  6. use Illuminate\Support\Facades\Redis;
  7. class ExportFtsPali extends Command
  8. {
  9. /**
  10. * The name and signature of the console command.
  11. *
  12. * @var string
  13. */
  14. protected $signature = 'export:fts.pali';
  15. /**
  16. * The console command description.
  17. *
  18. * @var string
  19. */
  20. protected $description = '导出全文搜索用的巴利语词汇表';
  21. /**
  22. * Create a new command instance.
  23. *
  24. * @return void
  25. */
  26. public function __construct()
  27. {
  28. parent::__construct();
  29. }
  30. /**
  31. * Execute the console command.
  32. *
  33. * @return int
  34. */
  35. public function handle()
  36. {
  37. //irregular
  38. $dictId = ['4d3a0d92-0adc-4052-80f5-512a2603d0e8'];
  39. //regular
  40. $dictId[] = DictApi::getSysDict('system_regular');
  41. $long = ["ā","ī","ū"];
  42. $path = storage_path('app/export/fts');
  43. if(!is_dir($path)){
  44. $res = mkdir($path,0700,true);
  45. if(!$res){
  46. Log::error('mkdir fail path='.$exportDir);
  47. return 1;
  48. }
  49. }
  50. $pageSize = 10000;
  51. $currPage = 1;
  52. $filename = "/pali-{$currPage}.syn";
  53. $fp = fopen($path.$filename,'w') or die("Unable to open file!");
  54. $count = 0;
  55. foreach ($dictId as $key => $value) {
  56. $words = UserDict::where('dict_id',$value)
  57. ->select('word')
  58. ->groupBy('word')->cursor();
  59. $this->info('word count='.count($words));
  60. foreach ($words as $key => $word) {
  61. $count++;
  62. if($count % 1000 === 0){
  63. $this->info($count);
  64. }
  65. if($count % 10000 === 0){
  66. fclose($fp);
  67. $redisKey = 'export/fts/pali'.$filename;
  68. $content = file_get_contents($path.$filename);
  69. Redis::set($redisKey,$content);
  70. Redis::expire($redisKey,3600*24*10);
  71. $currPage++;
  72. $filename = "/pali-{$currPage}.syn";
  73. $this->info('new file filename='.$filename);
  74. $fp = fopen($path.$filename,'w') or die("Unable to open file!");
  75. }
  76. $parent = UserDict::where('dict_id',$value)
  77. ->where('word',$word->word)
  78. ->selectRaw('parent,char_length("parent")')
  79. ->groupBy('parent')->orderBy('char_length','asc')->first();
  80. if($parent && !empty($parent->parent)){
  81. $end = mb_substr($parent->parent,-1,null,"UTF-8");
  82. if(in_array($end,["ā","ī","ū"])){
  83. $head = mb_substr($parent->parent,0,mb_strlen($parent->parent)-1,"UTF-8");
  84. $newEnd = str_replace(["ā","ī","ū"],["a","i","u"],$end);
  85. $parentWord = $head.$newEnd;
  86. }else{
  87. $parentWord = $parent->parent;
  88. }
  89. fwrite($fp, $word->word.' '.$parentWord.PHP_EOL);
  90. }else{
  91. $this->error('word no parent word='.$word->word);
  92. }
  93. }
  94. }
  95. fclose($fp);
  96. return 0;
  97. }
  98. }