UpgradeWbwAnalyses.php 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use App\Models\Wbw;
  5. use App\Models\WbwAnalysis;
  6. class UpgradeWbwAnalyses extends Command
  7. {
  8. /**
  9. * The name and signature of the console command.
  10. *
  11. * @var string
  12. */
  13. protected $signature = 'upgrade:wbw.analyses {id?}';
  14. /**
  15. * The console command description.
  16. *
  17. * @var string
  18. */
  19. protected $description = '用户逐词解析数据填充wbw analyses表';
  20. /**
  21. * Create a new command instance.
  22. *
  23. * @return void
  24. */
  25. public function __construct()
  26. {
  27. parent::__construct();
  28. }
  29. /**
  30. * Execute the console command.
  31. *
  32. * @return int
  33. */
  34. public function handle()
  35. {
  36. $startAt = time();
  37. $this->info("upgrade:wbwanalyses start");
  38. $bar = $this->output->createProgressBar(Wbw::count());
  39. $counter =0;
  40. if(empty($this->argument('id'))){
  41. $it = Wbw::orderby('id')->cursor();
  42. }else{
  43. $arrId = explode(',',$this->argument('id'));
  44. $it = Wbw::whereIn('id',$arrId)->orderby('id')->cursor();
  45. }
  46. foreach ($it as $wbwrow) {
  47. $counter++;
  48. WbwAnalysis::where('wbw_id',$wbwrow->id)->delete();
  49. # code...
  50. $data = str_replace("&nbsp;",' ',$wbwrow->data);
  51. $data = str_replace("<br>",' ',$data);
  52. $xmlString = "<root>" . $data . "</root>";
  53. try{
  54. $xmlWord = simplexml_load_string($xmlString);
  55. }catch(Exception $e){
  56. continue;
  57. }
  58. $wordsList = $xmlWord->xpath('//word');
  59. foreach ($wordsList as $word) {
  60. $pali = $word->real->__toString();
  61. $factors = [];
  62. foreach ($word as $key => $value) {
  63. $strValue = $value->__toString();
  64. if ($strValue !== "?" && $strValue !== "" && $strValue !== ".ctl." && $strValue !== ".a." && $strValue !== " " && mb_substr($strValue, 0, 3, "UTF-8") !== "[a]" && $strValue !== "_un_auto_factormean_" && $strValue !== "_un_auto_mean_") {
  65. $iType = 0;
  66. $lang = 'pali';
  67. $newData = [
  68. 'wbw_id'=>$wbwrow->id,
  69. 'wbw_word'=>$wbwrow->word,
  70. 'book_id'=>$wbwrow->book_id,
  71. 'paragraph'=>$wbwrow->paragraph,
  72. 'wid'=>$wbwrow->wid,
  73. 'type'=>0,
  74. 'data'=>$strValue,
  75. 'confidence'=>100,
  76. 'lang'=>'en',
  77. 'editor_id'=>$wbwrow->editor_id,
  78. 'created_at'=>$wbwrow->created_at,
  79. 'updated_at'=>$wbwrow->updated_at
  80. ];
  81. #TODO 加虚词
  82. switch ($key) {
  83. case 'type':
  84. $newData['type']=1;
  85. WbwAnalysis::insert($newData);
  86. break;
  87. case 'gramma':
  88. $newData['type']=2;
  89. WbwAnalysis::insert($newData);
  90. break;
  91. case 'mean':
  92. $newData['type']=3;
  93. WbwAnalysis::insert($newData);
  94. break;
  95. case 'org':
  96. $newData['type']=4;
  97. WbwAnalysis::insert($newData);
  98. $factors=explode("+",$strValue);
  99. break;
  100. case 'om':
  101. $newData['type']=5;
  102. WbwAnalysis::insert($newData);
  103. # 存储拆分意思
  104. $newData['type']=7;
  105. $factorMeaning = explode('+',$strValue);
  106. foreach ( $factors as $index => $factor) {
  107. if(isset($factorMeaning[$index]) &&
  108. !empty($factorMeaning[$index]) &&
  109. $factorMeaning[$index] !== "↓↓" ){
  110. $newData['wbw_word'] = $factor;
  111. $newData['data'] = $factorMeaning[$index];
  112. WbwAnalysis::insert($newData);
  113. }
  114. }
  115. break;
  116. case 'parent':
  117. $newData['type']=6;
  118. WbwAnalysis::insert($newData);
  119. break;
  120. case 'rela':
  121. /*
  122. <rela>[{"sour_id":"p199-764-6","sour_spell":"dhammacakkappavattanatthaṃ","dest_id":"p199-764-8","dest_spell":"āmantanā","relation":"ADV","note":""}]</rela>
  123. */
  124. $newData['type']=7;
  125. $rlt = json_decode($strValue);
  126. foreach ($rlt as $rltValue) {
  127. # code...
  128. if(!empty($rltValue->relation)){
  129. $newData['data'] = $rltValue->relation;
  130. if(isset($word->gramma) && !empty($word->gramma)){
  131. $grm = explode('$',$word->gramma);
  132. if(count($grm)>0){
  133. $newData['d1'] = $grm[count($grm)-1];
  134. }else{
  135. $newData['d1'] = $word->type;
  136. }
  137. }
  138. $newData['d2'] = (int)(explode('-',$rltValue->dest_id)[2]) - (int)(explode('-',$rltValue->sour_id)[2]) ;
  139. WbwAnalysis::insert($newData);
  140. }
  141. }
  142. break;
  143. }
  144. }
  145. }
  146. }
  147. $bar->advance();
  148. }
  149. $bar->finish();
  150. $time = time() - $startAt;
  151. $this->info("wbw analyses done in {$time}");
  152. return 0;
  153. }
  154. }