UpgradeDict.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Support\Str;
  4. use Illuminate\Console\Command;
  5. use Illuminate\Support\Facades\Cache;
  6. use Illuminate\Support\Facades\Storage;
  7. use App\Models\UserDict;
  8. use App\Models\DictInfo;
  9. class UpgradeDict extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. * php artisan upgrade:dict
  14. * @var string
  15. */
  16. protected $signature = 'upgrade:dict {uuid?} {--part}';
  17. /**
  18. * The console command description.
  19. *
  20. * @var string
  21. */
  22. protected $description = '导入csv字典';
  23. protected $dictInfo;
  24. protected $cols;
  25. /**
  26. * Create a new command instance.
  27. *
  28. * @return void
  29. */
  30. public function __construct()
  31. {
  32. parent::__construct();
  33. }
  34. private function scandict($dir)
  35. {
  36. if (is_dir($dir)) {
  37. $this->info("scan:" . $dir);
  38. if ($files = scandir($dir)) {
  39. //进入目录搜索字典或子目录
  40. foreach ($files as $file) {
  41. //进入语言目录循环搜索
  42. $fullPath = $dir . "/" . $file;
  43. if (is_dir($fullPath) && $file !== '.' && $file !== '..') {
  44. //是目录继续搜索
  45. $this->scandict($fullPath);
  46. } else {
  47. //是文件,查看是否是字典信息文件
  48. $infoFile = $fullPath;
  49. if (pathinfo($infoFile, PATHINFO_EXTENSION) === 'ini') {
  50. $this->dictInfo = parse_ini_file($infoFile, true);
  51. if (isset($this->dictInfo['meta']['dictname'])) {
  52. //是字典信息文件
  53. $this->info($this->dictInfo['meta']['dictname']);
  54. if (Str::isUuid($this->argument('uuid'))) {
  55. if ($this->argument('uuid') !== $this->dictInfo['meta']['uuid']) {
  56. continue;
  57. }
  58. }
  59. if (!Str::isUuid($this->dictInfo['meta']['uuid'])) {
  60. $this->error("not uuid");
  61. continue;
  62. }
  63. //读取 description
  64. $desFile = $dir . "/description.md";
  65. if (file_exists($desFile)) {
  66. $description = file_get_contents($desFile);
  67. } else {
  68. $description = $this->dictInfo['meta']['description'];
  69. }
  70. $tableDict = DictInfo::firstOrNew([
  71. "id" => $this->dictInfo['meta']['uuid']
  72. ]);
  73. $tableDict->id = $this->dictInfo['meta']['uuid'];
  74. $tableDict->name = $this->dictInfo['meta']['dictname'];
  75. $tableDict->shortname = $this->dictInfo['meta']['shortname'];
  76. $tableDict->description = $description;
  77. $tableDict->src_lang = $this->dictInfo['meta']['src_lang'];
  78. $tableDict->dest_lang = $this->dictInfo['meta']['dest_lang'];
  79. $tableDict->rows = $this->dictInfo['meta']['rows'];
  80. $tableDict->owner_id = config("mint.admin.root_uuid");
  81. $tableDict->meta = json_encode($this->dictInfo['meta']);
  82. $tableDict->save();
  83. if ($this->option('part')) {
  84. $this->info(" dict id = " . $this->dictInfo['meta']['uuid']);
  85. } else {
  86. $del = UserDict::where("dict_id", $this->dictInfo['meta']['uuid'])->delete();
  87. $this->info("delete {$del} rows dict id = " . $this->dictInfo['meta']['uuid']);
  88. }
  89. /**
  90. * 允许一个字典拆成若干个小文件
  91. * 文件名 为 ***.csv , ***-1.csv , ***-2.csv
  92. *
  93. */
  94. $filename = $dir . '/' . pathinfo($infoFile, PATHINFO_FILENAME);
  95. $csvFile = $filename . ".csv";
  96. $count = 0;
  97. $bar = $this->output->createProgressBar($this->dictInfo['meta']['rows']);
  98. while (file_exists($csvFile)) {
  99. # code...
  100. $this->info("runing:{$csvFile}");
  101. $inputRow = 0;
  102. if (($fp = fopen($csvFile, "r")) !== false) {
  103. $this->cols = array();
  104. while (($data = fgetcsv($fp, 0, ',')) !== false) {
  105. if ($inputRow == 0) {
  106. foreach ($data as $key => $colname) {
  107. # 列名列表
  108. $this->cols[$colname] = $key;
  109. }
  110. } else {
  111. if ($this->option('part')) {
  112. //仅仅提取拆分零件
  113. $word = $this->get($data, 'word');
  114. $factor1 = $this->get($data, 'factors');
  115. $factor1 = \str_replace([' ', '(', ')', '=', '-', '$'], "+", $factor1);
  116. foreach (\explode('+', $factor1) as $part) {
  117. # code...
  118. if (empty($part)) {
  119. continue;
  120. }
  121. if (isset($newPart[$part])) {
  122. $newPart[$part][0]++;
  123. } else {
  124. $partExists = Cache::remember('dict/part/' . $part, config('cache.expire', 1000), function () use ($part) {
  125. return UserDict::where('word', $part)->exists();
  126. });
  127. if (!$partExists) {
  128. $count++;
  129. $newPart[$part] = [1, $word];
  130. $this->info("{$count}:{$part}-{$word}");
  131. }
  132. }
  133. }
  134. } else {
  135. $newDict = new UserDict();
  136. $newDict->id = app('snowflake')->id();
  137. $newDict->word = $data[$this->cols['word']];
  138. $newDict->type = $this->get($data, 'type');
  139. $newDict->grammar = $this->get($data, 'grammar');
  140. $newDict->parent = $this->get($data, 'parent');
  141. $newDict->mean = $this->get($data, 'mean');
  142. $newDict->note = $this->get($data, 'note');
  143. $newDict->factors = $this->get($data, 'factors');
  144. $newDict->factormean = $this->get($data, 'factormean');
  145. $newDict->status = $this->get($data, 'status');
  146. $newDict->language = $this->get($data, 'language');
  147. $newDict->confidence = $this->get($data, 'confidence');
  148. $newDict->source = $this->get($data, 'source');
  149. $newDict->create_time = (int)(microtime(true) * 1000);
  150. $newDict->creator_id = 0;
  151. $newDict->dict_id = $this->dictInfo['meta']['uuid'];
  152. $newDict->save();
  153. }
  154. $bar->advance();
  155. }
  156. $inputRow++;
  157. }
  158. }
  159. $count++;
  160. $csvFile = $filename . "-{$count}.csv";
  161. }
  162. $bar->finish();
  163. Storage::disk('local')->put("tmp/pm-part.csv", "part,count,word");
  164. if (isset($newPart)) {
  165. foreach ($newPart as $part => $info) {
  166. # 写入磁盘文件
  167. Storage::disk('local')->append("tmp/pm-part.csv", "{$part},{$info[0]},{$info[1]}");
  168. }
  169. }
  170. $this->info("done");
  171. }
  172. }
  173. }
  174. }
  175. //子目录搜素完毕
  176. return;
  177. } else {
  178. //获取子目录失败
  179. $this->error("scandir fail");
  180. return;
  181. }
  182. } else {
  183. $this->error("this is not dir input={$dir}");
  184. return;
  185. }
  186. }
  187. /**
  188. * 获取列的值
  189. */
  190. protected function get($data, $colname, $defualt = "")
  191. {
  192. if (isset($this->cols[$colname])) {
  193. return $data[$this->cols[$colname]];
  194. } else if (isset($this->dictInfo['cols'][$colname])) {
  195. return $this->dictInfo['cols'][$colname];
  196. } else {
  197. return $defualt;
  198. }
  199. }
  200. /**
  201. * Execute the console command.
  202. *
  203. * @return int
  204. */
  205. public function handle()
  206. {
  207. if (\App\Tools\Tools::isStop()) {
  208. return 0;
  209. }
  210. $this->info("upgrade dict start");
  211. $this->scandict(config("mint.path.dict_text"));
  212. $this->info("upgrade dict done");
  213. return 0;
  214. }
  215. }