UpgradeCompound.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Storage;
  5. use App\Models\WordIndex;
  6. use App\Models\WbwTemplate;
  7. use App\Tools\TurboSplit;
  8. use App\Http\Api\DictApi;
  9. use Illuminate\Support\Facades\DB;
  10. use Illuminate\Support\Facades\Http;
  11. class UpgradeCompound extends Command
  12. {
  13. /**
  14. * The name and signature of the console command.
  15. * php -d memory_limit=1024M artisan upgrade:compound --api=https://next.wikipali.org/api --from=182852 --to=30000
  16. * @var string
  17. */
  18. protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test} {--continue} {--api=} {--from=} {--to=}';
  19. /**
  20. * The console command description.
  21. *
  22. * @var string
  23. */
  24. protected $description = 'auto split compound word';
  25. /**
  26. * Create a new command instance.
  27. *
  28. * @return void
  29. */
  30. public function __construct()
  31. {
  32. parent::__construct();
  33. }
  34. /**
  35. * Execute the console command.
  36. *
  37. * @return int
  38. */
  39. public function handle()
  40. {
  41. if(\App\Tools\Tools::isStop()){
  42. $this->info('.stop exists');
  43. return 0;
  44. }
  45. $this->info('['.date('Y-m-d H:i:s', time()).'] upgrade:compound start');
  46. $dict_id = DictApi::getSysDict('robot_compound');
  47. if(!$dict_id){
  48. $this->error('没有找到 robot_compound 字典');
  49. return 1;
  50. }
  51. $start = \microtime(true);
  52. //
  53. if($this->option('test')){
  54. //调试代码
  55. $ts = new TurboSplit();
  56. Storage::disk('local')->put("tmp/compound.md", "# Turbo Split");
  57. //获取需要拆的词
  58. $list = [
  59. [5,20,20],
  60. [21,30,20],
  61. [31,40,10],
  62. [41,60,10],
  63. ];
  64. foreach ($list as $take) {
  65. # code...
  66. $words = WordIndex::where('final',0)
  67. ->whereBetween('len',[$take[0],$take[1]])
  68. ->select('word')
  69. ->take($take[2])->get();
  70. foreach ($words as $word) {
  71. $this->info($word->word);
  72. Storage::disk('local')->append("tmp/compound.md", "## {$word->word}");
  73. $parts = $ts->splitA($word->word);
  74. foreach ($parts as $part) {
  75. # code...
  76. $info = "`{$part['word']}`,{$part['factors']},{$part['confidence']}";
  77. $this->info($info);
  78. Storage::disk('local')->append("tmp/compound.md", "- {$info}");
  79. }
  80. }
  81. }
  82. $this->info("耗时:".\microtime(true)-$start);
  83. return 0;
  84. }
  85. $_word = $this->argument('word');
  86. if(!empty($_word)){
  87. $words = array((object)array('real'=>$_word));
  88. $count = 1;
  89. }else if($this->option('book')){
  90. $words = WbwTemplate::select('real')
  91. ->where('book',$this->option('book'))
  92. ->where('type','<>','.ctl.')
  93. ->where('real','<>','')
  94. ->orderBy('real')
  95. ->groupBy('real')->cursor();
  96. $query = DB::select('SELECT count(*) from (
  97. SELECT "real" from wbw_templates where book = ? and type <> ? and real <> ? group by real) T',
  98. [$this->option('book'),'.ctl.','']);
  99. $count = $query[0]->count;
  100. }else{
  101. $min = WordIndex::min('id');
  102. $max = WordIndex::max('id');
  103. if($this->option('from')){
  104. $from = $min + $this->option('from');
  105. }else{
  106. $from = $min;
  107. }
  108. if($this->option('to')){
  109. $to = $min + $this->option('to');
  110. }else{
  111. $to = $max;
  112. }
  113. $words = WordIndex::whereBetween('id',[$from,$to])
  114. ->where('len','>',7)
  115. ->where('len','<',51)
  116. ->orderBy('id')
  117. ->selectRaw('word as real')
  118. ->cursor();
  119. $count = $to - $from + 1;
  120. }
  121. $sn = 0;
  122. $wordIndex = array();
  123. $result = array();
  124. foreach ($words as $key => $word) {
  125. if(\App\Tools\Tools::isStop()){
  126. return 0;
  127. }
  128. $sn++;
  129. $startAt = microtime(true);
  130. $ts = new TurboSplit();
  131. if($this->option('debug')){
  132. $ts->debug(true);
  133. }
  134. $wordIndex[] = $word->real;
  135. $parts = $ts->splitA($word->real);
  136. $time = round(microtime(true) - $startAt,2);
  137. $percent = (int)($sn * 100 / $count);
  138. $this->info("[{$percent}%][{$sn}] {$word->real} {$time}s");
  139. $resultCount = 0;
  140. foreach ($parts as $part) {
  141. if(isset($part['type']) && $part['type'] === ".v."){
  142. continue;
  143. }
  144. $resultCount++;
  145. $new = array();
  146. $new['word'] = $part['word'];
  147. $new['factors'] = $part['factors'];
  148. if(isset($part['type'])){
  149. $new['type'] = $part['type'];
  150. }else{
  151. $new['type'] = ".cp.";
  152. }
  153. if(isset($part['grammar'])){
  154. $new['grammar'] = $part['grammar'];
  155. }else{
  156. $new['grammar'] = null;
  157. }
  158. if(isset($part['parent'])){
  159. $new['parent'] = $part['parent'];
  160. }else{
  161. $new['parent'] = null;
  162. }
  163. $new['confidence'] = 50*$part['confidence'];
  164. $result[] = $new;
  165. if(!empty($_word)){
  166. $output = "[{$resultCount}],{$part['word']},{$part['type']},{$part['grammar']},{$part['parent']},{$part['factors']},{$part['confidence']}";
  167. $this->info($output);
  168. }
  169. }
  170. if(count($wordIndex) % 100 ===0){
  171. $this->upload($wordIndex,$result,$this->option('api'));
  172. $wordIndex = array();
  173. $result = array();
  174. }
  175. }
  176. $this->upload($wordIndex,$result,$this->option('api'));
  177. $this->info('['.date('Y-m-d H:i:s', time()).'] upgrade:compound finished');
  178. return 0;
  179. }
  180. private function upload($index,$words,$url=null){
  181. if(!$url){
  182. $url = config('app.url').'/api/v2/compound';
  183. }else{
  184. $url = $url.'/v2/compound';
  185. }
  186. $this->info('url = '.$url);
  187. $this->info('uploading size='.strlen(json_encode($words,JSON_UNESCAPED_UNICODE)));
  188. $response = Http::post($url,
  189. [
  190. 'index'=> $index,
  191. 'words'=> $words,
  192. ]);
  193. if($response->ok()){
  194. $this->info('upload ok');
  195. }else{
  196. $this->error('upload fail.');
  197. }
  198. }
  199. }