| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- <?php
- namespace App\Console\Commands;
- use Illuminate\Console\Command;
- use App\Http\Api\DictApi;
- use App\Models\UserDict;
- use Illuminate\Support\Facades\Redis;
- use Illuminate\Support\Facades\Log;
- class ExportFtsPali extends Command
- {
- /**
- * The name and signature of the console command.
- *
- * @var string
- */
- protected $signature = 'export:fts.pali';
- /**
- * The console command description.
- *
- * @var string
- */
- protected $description = '导出全文搜索用的巴利语词汇表';
- /**
- * Create a new command instance.
- *
- * @return void
- */
- public function __construct()
- {
- parent::__construct();
- }
- /**
- * Execute the console command.
- *
- * @return int
- */
- public function handle()
- {
- //irregular
- $dictId = ['4d3a0d92-0adc-4052-80f5-512a2603d0e8'];
- //regular
- $dictId[] = DictApi::getSysDict('system_regular');
- $long = ["ā", "ī", "ū"];
- $path = storage_path('app/export/fts');
- if (!is_dir($path)) {
- $res = mkdir($path, 0700, true);
- if (!$res) {
- Log::error('mkdir fail path=' . $path);
- return 1;
- }
- }
- $pageSize = 10000;
- $currPage = 1;
- $filename = "/pali-{$currPage}.syn";
- $fp = fopen($path . $filename, 'w') or die("Unable to open file!");
- $count = 0;
- foreach ($dictId as $key => $value) {
- $words = UserDict::where('dict_id', $value)
- ->select('word')
- ->groupBy('word')->cursor();
- $this->info('word count=' . count($words));
- foreach ($words as $key => $word) {
- $count++;
- if ($count % 1000 === 0) {
- $this->info($count);
- }
- if ($count % 10000 === 0) {
- fclose($fp);
- $redisKey = 'export/fts/pali' . $filename;
- $content = file_get_contents($path . $filename);
- Redis::set($redisKey, $content);
- Redis::expire($redisKey, 3600 * 24 * 10);
- $currPage++;
- $filename = "/pali-{$currPage}.syn";
- $this->info('new file filename=' . $filename);
- $fp = fopen($path . $filename, 'w') or die("Unable to open file!");
- }
- $parent = UserDict::where('dict_id', $value)
- ->where('word', $word->word)
- ->selectRaw('parent,char_length("parent")')
- ->groupBy('parent')->orderBy('char_length', 'asc')->first();
- if ($parent && !empty($parent->parent)) {
- $end = mb_substr($parent->parent, -1, null, "UTF-8");
- if (in_array($end, ["ā", "ī", "ū"])) {
- $head = mb_substr($parent->parent, 0, mb_strlen($parent->parent) - 1, "UTF-8");
- $newEnd = str_replace(["ā", "ī", "ū"], ["a", "i", "u"], $end);
- $parentWord = $head . $newEnd;
- } else {
- $parentWord = $parent->parent;
- }
- fwrite($fp, $word->word . ' ' . $parentWord . PHP_EOL);
- } else {
- $this->error('word no parent word=' . $word->word);
- }
- }
- }
- fclose($fp);
- return 0;
- }
- }
|