ExportFtsPali.php 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Http\Api\DictApi;
  5. use App\Models\UserDict;
  6. class ExportFtsPali extends Command
  7. {
  8. /**
  9. * The name and signature of the console command.
  10. *
  11. * @var string
  12. */
  13. protected $signature = 'export:fts.pali';
  14. /**
  15. * The console command description.
  16. *
  17. * @var string
  18. */
  19. protected $description = '导出全文搜索用的巴利语词汇表';
  20. /**
  21. * Create a new command instance.
  22. *
  23. * @return void
  24. */
  25. public function __construct()
  26. {
  27. parent::__construct();
  28. }
  29. /**
  30. * Execute the console command.
  31. *
  32. * @return int
  33. */
  34. public function handle()
  35. {
  36. //irregular
  37. $dictId = ['4d3a0d92-0adc-4052-80f5-512a2603d0e8'];
  38. //regular
  39. $dictId[] = DictApi::getSysDict('system_regular');
  40. $long = ["ā","ī","ū"];
  41. $path = storage_path('app/export/fts');
  42. if(!is_dir($path)){
  43. $res = mkdir($path,0700,true);
  44. if(!$res){
  45. Log::error('mkdir fail path='.$exportDir);
  46. return 1;
  47. }
  48. }
  49. $pageSize = 10000;
  50. $currPage = 1;
  51. $fp = fopen($path."/pali-{$currPage}.syn",'w') or die("Unable to open file!");
  52. $count = 0;
  53. foreach ($dictId as $key => $value) {
  54. $words = UserDict::where('dict_id',$value)
  55. ->select('word')
  56. ->groupBy('word')->cursor();
  57. $this->info('word count='.count($words));
  58. foreach ($words as $key => $word) {
  59. $count++;
  60. if($count % 1000 === 0){
  61. $this->info($count);
  62. }
  63. if($count % 10000 === 0){
  64. fclose($fp);
  65. $currPage++;
  66. $filename = "/pali-{$currPage}.syn";
  67. $this->info('new file filename='.$filename);
  68. $fp = fopen($path.$filename,'w') or die("Unable to open file!");
  69. }
  70. $parent = UserDict::where('dict_id',$value)
  71. ->where('word',$word->word)
  72. ->selectRaw('parent,char_length("parent")')
  73. ->groupBy('parent')->orderBy('char_length','asc')->first();
  74. if($parent && !empty($parent->parent)){
  75. $end = mb_substr($parent->parent,-1,null,"UTF-8");
  76. if(in_array($end,["ā","ī","ū"])){
  77. $head = mb_substr($parent->parent,0,mb_strlen($parent->parent)-1,"UTF-8");
  78. $newEnd = str_replace(["ā","ī","ū"],["a","i","u"],$end);
  79. $parentWord = $head.$newEnd;
  80. }else{
  81. $parentWord = $parent->parent;
  82. }
  83. fwrite($fp, $word->word.' '.$parentWord.PHP_EOL);
  84. }else{
  85. $this->error('word no parent word='.$word->word);
  86. }
  87. }
  88. }
  89. fclose($fp);
  90. return 0;
  91. }
  92. }