ExportFtsPali.php 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Http\Api\DictApi;
  5. use App\Models\UserDict;
  6. use Illuminate\Support\Facades\Redis;
  7. use Illuminate\Support\Facades\Log;
  8. class ExportFtsPali extends Command
  9. {
  10. /**
  11. * The name and signature of the console command.
  12. *
  13. * @var string
  14. */
  15. protected $signature = 'export:fts.pali';
  16. /**
  17. * The console command description.
  18. *
  19. * @var string
  20. */
  21. protected $description = '导出全文搜索用的巴利语词汇表';
  22. /**
  23. * Create a new command instance.
  24. *
  25. * @return void
  26. */
  27. public function __construct()
  28. {
  29. parent::__construct();
  30. }
  31. /**
  32. * Execute the console command.
  33. *
  34. * @return int
  35. */
  36. public function handle()
  37. {
  38. //irregular
  39. $dictId = ['4d3a0d92-0adc-4052-80f5-512a2603d0e8'];
  40. //regular
  41. $dictId[] = DictApi::getSysDict('system_regular');
  42. $long = ["ā", "ī", "ū"];
  43. $path = storage_path('app/export/fts');
  44. if (!is_dir($path)) {
  45. $res = mkdir($path, 0700, true);
  46. if (!$res) {
  47. Log::error('mkdir fail path=' . $path);
  48. return 1;
  49. }
  50. }
  51. $pageSize = 10000;
  52. $currPage = 1;
  53. $filename = "/pali-{$currPage}.syn";
  54. $fp = fopen($path . $filename, 'w') or die("Unable to open file!");
  55. $count = 0;
  56. foreach ($dictId as $key => $value) {
  57. $words = UserDict::where('dict_id', $value)
  58. ->select('word')
  59. ->groupBy('word')->cursor();
  60. $this->info('word count=' . count($words));
  61. foreach ($words as $key => $word) {
  62. $count++;
  63. if ($count % 1000 === 0) {
  64. $this->info($count);
  65. }
  66. if ($count % 10000 === 0) {
  67. fclose($fp);
  68. $redisKey = 'export/fts/pali' . $filename;
  69. $content = file_get_contents($path . $filename);
  70. Redis::set($redisKey, $content);
  71. Redis::expire($redisKey, 3600 * 24 * 10);
  72. $currPage++;
  73. $filename = "/pali-{$currPage}.syn";
  74. $this->info('new file filename=' . $filename);
  75. $fp = fopen($path . $filename, 'w') or die("Unable to open file!");
  76. }
  77. $parent = UserDict::where('dict_id', $value)
  78. ->where('word', $word->word)
  79. ->selectRaw('parent,char_length("parent")')
  80. ->groupBy('parent')->orderBy('char_length', 'asc')->first();
  81. if ($parent && !empty($parent->parent)) {
  82. $end = mb_substr($parent->parent, -1, null, "UTF-8");
  83. if (in_array($end, ["ā", "ī", "ū"])) {
  84. $head = mb_substr($parent->parent, 0, mb_strlen($parent->parent) - 1, "UTF-8");
  85. $newEnd = str_replace(["ā", "ī", "ū"], ["a", "i", "u"], $end);
  86. $parentWord = $head . $newEnd;
  87. } else {
  88. $parentWord = $parent->parent;
  89. }
  90. fwrite($fp, $word->word . ' ' . $parentWord . PHP_EOL);
  91. } else {
  92. $this->error('word no parent word=' . $word->word);
  93. }
  94. }
  95. }
  96. fclose($fp);
  97. return 0;
  98. }
  99. }