UpgradeCompound.php 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Storage;
  5. use App\Models\WordIndex;
  6. use App\Models\WbwTemplate;
  7. use App\Models\UserDict;
  8. use App\Tools\TurboSplit;
  9. use App\Http\Api\DictApi;
  10. use Illuminate\Support\Facades\DB;
  11. use Illuminate\Support\Facades\Log;
  12. use Illuminate\Support\Facades\Http;
  13. class UpgradeCompound extends Command
  14. {
  15. /**
  16. * The name and signature of the console command.
  17. * php -d memory_limit=1024M artisan upgrade:compound --api=https://staging.wikipali.org/api --from=20481 --to=30000
  18. * @var string
  19. */
  20. protected $signature = 'upgrade:compound {word?} {--book=} {--debug} {--test} {--continue} {--api=} {--from=} {--to=}';
  21. /**
  22. * The console command description.
  23. *
  24. * @var string
  25. */
  26. protected $description = 'auto split compound word';
  27. /**
  28. * Create a new command instance.
  29. *
  30. * @return void
  31. */
  32. public function __construct()
  33. {
  34. parent::__construct();
  35. }
  36. /**
  37. * Execute the console command.
  38. *
  39. * @return int
  40. */
  41. public function handle()
  42. {
  43. if(\App\Tools\Tools::isStop()){
  44. return 0;
  45. }
  46. if(file_exists(base_path('.stop'))){
  47. $this->info('.stop exists');
  48. return 0;
  49. }
  50. $dict_id = DictApi::getSysDict('robot_compound');
  51. if(!$dict_id){
  52. $this->error('没有找到 robot_compound 字典');
  53. return 1;
  54. }
  55. $start = \microtime(true);
  56. //
  57. if($this->option('test')){
  58. //调试代码
  59. $ts = new TurboSplit();
  60. Storage::disk('local')->put("tmp/compound.md", "# Turbo Split");
  61. //获取需要拆的词
  62. $list = [
  63. [5,20,20],
  64. [21,30,20],
  65. [31,40,10],
  66. [41,60,10],
  67. ];
  68. foreach ($list as $take) {
  69. # code...
  70. $words = WordIndex::where('final',0)
  71. ->whereBetween('len',[$take[0],$take[1]])
  72. ->select('word')
  73. ->take($take[2])->get();
  74. foreach ($words as $word) {
  75. $this->info($word->word);
  76. Storage::disk('local')->append("tmp/compound.md", "## {$word->word}");
  77. $parts = $ts->splitA($word->word);
  78. foreach ($parts as $part) {
  79. # code...
  80. $info = "`{$part['word']}`,{$part['factors']},{$part['confidence']}";
  81. $this->info($info);
  82. Storage::disk('local')->append("tmp/compound.md", "- {$info}");
  83. }
  84. }
  85. }
  86. $this->info("耗时:".\microtime(true)-$start);
  87. return 0;
  88. }
  89. $_word = $this->argument('word');
  90. if(!empty($_word)){
  91. $words = array((object)array('real'=>$_word));
  92. $count = 1;
  93. }else if($this->option('book')){
  94. $words = WbwTemplate::select('real')
  95. ->where('book',$this->option('book'))
  96. ->where('type','<>','.ctl.')
  97. ->where('real','<>','')
  98. ->orderBy('real')
  99. ->groupBy('real')->cursor();
  100. $query = DB::select('SELECT count(*) from (
  101. SELECT "real" from wbw_templates where book = ? and type <> ? and real <> ? group by real) T',
  102. [$this->option('book'),'.ctl.','']);
  103. $count = $query[0]->count;
  104. }else{
  105. $min = WordIndex::min('id');
  106. $max = WordIndex::max('id');
  107. if($this->option('from')){
  108. $from = $min + $this->option('from');
  109. }else{
  110. $from = $min;
  111. }
  112. if($this->option('to')){
  113. $to = $min + $this->option('to');
  114. }else{
  115. $to = $max;
  116. }
  117. $words = WordIndex::whereBetween('id',[$from,$to])
  118. ->where('len','>',7)
  119. ->where('len','<',51)
  120. ->orderBy('id')
  121. ->selectRaw('word as real')
  122. ->cursor();
  123. $count = $to - $from + 1;
  124. }
  125. $sn = 0;
  126. $wordIndex = array();
  127. $result = array();
  128. foreach ($words as $key => $word) {
  129. if(\App\Tools\Tools::isStop()){
  130. return 0;
  131. }
  132. $sn++;
  133. $startAt = microtime(true);
  134. $ts = new TurboSplit();
  135. if($this->option('debug')){
  136. $ts->debug(true);
  137. }
  138. $wordIndex[] = $word->real;
  139. $parts = $ts->splitA($word->real);
  140. $time = round(microtime(true) - $startAt,2);
  141. $percent = (int)($sn * 100 / $count);
  142. $this->info("[{$percent}%][{$sn}] {$word->real} {$time}s");
  143. $resultCount = 0;
  144. foreach ($parts as $part) {
  145. if(isset($part['type']) && $part['type'] === ".v."){
  146. continue;
  147. }
  148. $resultCount++;
  149. $new = array();
  150. $new['word'] = $part['word'];
  151. $new['factors'] = $part['factors'];
  152. if(isset($part['type'])){
  153. $new['type'] = $part['type'];
  154. }else{
  155. $new['type'] = ".cp.";
  156. }
  157. if(isset($part['grammar'])){
  158. $new['grammar'] = $part['grammar'];
  159. }else{
  160. $new['grammar'] = null;
  161. }
  162. if(isset($part['parent'])){
  163. $new['parent'] = $part['parent'];
  164. }else{
  165. $new['parent'] = null;
  166. }
  167. $new['confidence'] = 50*$part['confidence'];
  168. $result[] = $new;
  169. if(!empty($_word)){
  170. $output = "[{$resultCount}],{$part['word']},{$part['type']},{$part['grammar']},{$part['parent']},{$part['factors']},{$part['confidence']}";
  171. $this->info($output);
  172. }
  173. }
  174. if(count($wordIndex) % 100 ===0){
  175. $this->upload($wordIndex,$result,$this->option('api'));
  176. $wordIndex = array();
  177. $result = array();
  178. }
  179. }
  180. $this->upload($wordIndex,$result,$this->option('api'));
  181. return 0;
  182. }
  183. private function upload($index,$words,$url=null){
  184. if(!$url){
  185. $url = config('app.url').'/api/v2/compound';
  186. }else{
  187. $url = $url.'/v2/compound';
  188. }
  189. $this->info('url = '.$url);
  190. $this->info('uploading size='.strlen(json_encode($words,JSON_UNESCAPED_UNICODE)));
  191. $response = Http::post($url,
  192. [
  193. 'index'=> $index,
  194. 'words'=> $words,
  195. ]);
  196. if($response->ok()){
  197. $this->info('upload ok');
  198. }else{
  199. $this->error('upload fail.');
  200. }
  201. }
  202. }