WbwLookupController.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. <?php
  2. namespace App\Http\Controllers;
  3. use App\Models\UserDict;
  4. use App\Models\DictInfo;
  5. use App\Models\WbwTemplate;
  6. use App\Models\Channel;
  7. use App\Models\WbwAnalysis;
  8. use Illuminate\Http\Request;
  9. use App\Tools\CaseMan;
  10. use Illuminate\Support\Facades\Log;
  11. use Illuminate\Support\Facades\Cache;
  12. class WbwLookupController extends Controller
  13. {
  14. private $dictList = [
  15. '85dcc61c-c9e1-4ae0-9b44-cd6d9d9f0d01',//社区汇总
  16. '4d3a0d92-0adc-4052-80f5-512a2603d0e8',// system irregular
  17. '57afac99-0887-455c-b18e-67c8682158b0',// system regular
  18. 'ef620a93-a55d-4756-89c5-e188ab009e45',//社区字典
  19. '8359757e-9575-455b-a772-cc6f036caea0',// system sandhi
  20. 'c42980f0-5967-4833-b695-84183344f68f',// robot compound
  21. '61f23efb-b526-4a8e-999e-076965034e60',// pali myanmar grammar
  22. 'eae9fd6f-7bac-4940-b80d-ad6cd6f433bf',// Concise P-E Dict
  23. '2f93d0fe-3d68-46ee-a80b-11fa445a29c6',// unity
  24. 'beb45062-7c20-4047-bcd4-1f636ba443d1',// U Hau Sein
  25. '8833de18-0978-434c-b281-a2e7387f69be',// 巴汉增订
  26. '3acf0c0f-59a7-4d25-a3d9-bf394a266ebd',// 汉译パーリ语辞典-黃秉榮
  27. ];
  28. /**
  29. * Display a listing of the resource.
  30. * @param \Illuminate\Http\Request $request
  31. *
  32. * @return \Illuminate\Http\Response
  33. */
  34. public function index(Request $request)
  35. {
  36. //
  37. $startAt = microtime(true)*1000;
  38. $words = \explode(',',$request->get("word"));
  39. $bases = \explode(',',$request->get("base"));
  40. # 查询深度
  41. $deep = $request->get("deep",2);
  42. $result = $this->lookup($words,$bases,$deep);
  43. $endAt = microtime(true)*1000;
  44. return $this->ok(["rows"=>$result,
  45. "count"=>count($result),
  46. "time"=>(int)($endAt-$startAt)]);
  47. }
  48. public function lookup($words,$bases,$deep){
  49. $wordPool = array();
  50. $output = array();
  51. foreach ($words as $word) {
  52. $wordPool[$word] = ['base' => false,'done' => false,'apply' => false];
  53. }
  54. foreach ($bases as $base) {
  55. $wordPool[$base] = ['base' => true,'done' => false,'apply' => false];
  56. }
  57. /**
  58. * 先查询字典名称
  59. */
  60. $dict_info = DictInfo::whereIn('id',$this->dictList)->select('id','shortname')->get();
  61. $dict_name = [];
  62. foreach ($dict_info as $key => $value) {
  63. # code...
  64. $dict_name[$value->id] = $value->shortname;
  65. }
  66. $caseman = new CaseMan();
  67. for ($i=0; $i < $deep; $i++) {
  68. $newBase = array();
  69. $newWords = [];
  70. foreach ($wordPool as $word => $info) {
  71. # code...
  72. if($info['done'] === false){
  73. $newWords[] = $word;
  74. $wordPool[$word]['done'] = true;
  75. }
  76. }
  77. $data = UserDict::whereIn('word',$newWords)
  78. ->whereIn('dict_id',$this->dictList)
  79. ->leftJoin('dict_infos', 'user_dicts.dict_id', '=', 'dict_infos.id')
  80. ->orderBy('confidence','desc')
  81. ->get();
  82. foreach ($data as $row) {
  83. # code...
  84. array_push($output,$row);
  85. if(!empty($row->parent) && !isset($wordPool[$row->parent]) ){
  86. //将parent 插入待查询列表
  87. $wordPool[$row->parent] = ['base' => true,'done' => false,'apply' => false];
  88. }
  89. }
  90. /*
  91. foreach ($wordPool as $word => $info) {
  92. # code...
  93. if($info['done'] === false){
  94. $wordPool[$word]['done'] = true;
  95. $count = 0;
  96. foreach ($this->dictList as $dictId) {
  97. # code...
  98. $result = Cache::remember("dict/{$dictId}/".$word,10,function() use($word,$dictId,$dict_name){
  99. $data = UserDict::where('word',$word)->where('dict_id',$dictId)->orderBy('confidence','desc')->get();
  100. foreach ($data as $key => $value) {
  101. # code...
  102. $value->dict_shortname = $dict_name[$dictId];
  103. }
  104. return $data;
  105. });
  106. $count += count($result);
  107. if(count($result)>0){
  108. foreach ($result as $dictword) {
  109. # code...
  110. array_push($output,$dictword);
  111. if(!isset($wordPool[$word]['factors']) && !empty($dictword->factors)){
  112. //将第一个拆分作为最佳拆分存储
  113. $wordPool[$word]['factors'] = $dictword->factors;
  114. }
  115. if(!empty($dictword->parent) && !isset($wordPool[$dictword->parent]) ){
  116. //将parent 插入待查询列表
  117. $wordPool[$dictword->parent] = ['base' => true,'done' => false,'apply' => false];
  118. }
  119. }
  120. }
  121. }
  122. //if($count === 0 && !$wordPool[$word]['base']){
  123. // //第一次循环没查到 去尾查
  124. // $parents = $caseman->WordToBase($word);
  125. // foreach ($parents as $base => $rows) {
  126. // array_push($output,$rows);
  127. // }
  128. //}
  129. }
  130. }
  131. */
  132. //处理查询结果中的拆分信息
  133. $newWordPart = array();
  134. foreach ($wordPool as $word => $info) {
  135. if(!empty($info['factors'])){
  136. $factors = \explode('+',$info['factors']);
  137. foreach ($factors as $factor) {
  138. # 将没有的拆分放入单词查询列表
  139. if(!isset($wordPool[$factor])){
  140. $wordPool[$factor] = ['base' => true,'done' => false,'apply' => false];
  141. }
  142. }
  143. }
  144. }
  145. }
  146. return $output;
  147. }
  148. /**
  149. * 自动查词
  150. *
  151. * @param \Illuminate\Http\Request $request
  152. * @return \Illuminate\Http\Response
  153. */
  154. public function store(Request $request)
  155. {
  156. //
  157. $startAt = microtime(true)*1000;
  158. $channel = Channel::find($request->get('channel_id'));
  159. $orgData = $request->get('data');
  160. //句子中的单词
  161. $words = [];
  162. foreach ($orgData as $word) {
  163. # code...
  164. if($word['type']['value'] !== '.ctl.' && !empty($word['real']['value'])){
  165. $words[] = $word['real']['value'];
  166. }
  167. }
  168. $result = $this->lookup($words,[],2);
  169. $indexed = $this->toIndexed($result);
  170. foreach ($orgData as $key => $word) {
  171. if($word['type']['value'] !== '.ctl.' && !empty($word['real']['value'])){
  172. $data = $word;
  173. if(isset($indexed[$word['real']['value']])){
  174. //parent
  175. $case = [];
  176. $parent = [];
  177. $factors = [];
  178. $factorMeaning = [];
  179. $meaning = [];
  180. $parent2 = [];
  181. $case2 = [];
  182. foreach ($indexed[$word['real']['value']] as $value) {
  183. //非base优先
  184. if(strstr($value->type,'base') === FALSE){
  185. $increment = 10;
  186. }else{
  187. $increment = 1;
  188. }
  189. //将全部结果加上得分放入数组
  190. $parent = $this->insertValue([$value->parent],$parent,$increment);
  191. if($value->type !== ".cp."){
  192. $case = $this->insertValue([$value->type."#".$value->grammar],$case,$increment);
  193. }
  194. $factors = $this->insertValue([$value->factors],$factors,$increment);
  195. $factorMeaning = $this->insertValue([$value->factormean],$factorMeaning,$increment);
  196. $meaning = $this->insertValue(explode('$',$value->mean),$meaning,$increment,false);
  197. }
  198. if(count($case)>0){
  199. arsort($case);
  200. $first = array_keys($case)[0];
  201. $data['case'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  202. }
  203. if(count($parent)>0){
  204. arsort($parent);
  205. $first = array_keys($parent)[0];
  206. $data['parent'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  207. }
  208. if(count($factors)>0){
  209. arsort($factors);
  210. $first = array_keys($factors)[0];
  211. $data['factors'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  212. }
  213. //拆分意思
  214. if(count($factorMeaning)>0){
  215. arsort($factorMeaning);
  216. $first = array_keys($factorMeaning)[0];
  217. $data['factorMeaning'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  218. }
  219. $wbwFactorMeaning = [];
  220. if(!empty($data['factors']['value'])){
  221. foreach (explode("+",$data['factors']['value']) as $factor) {
  222. # code...
  223. $wbwAnalyses = WbwAnalysis::where('wbw_word',$factor)
  224. ->where('type',7)
  225. ->selectRaw('data,count(*)')
  226. ->groupBy("data")
  227. ->orderBy("count", "desc")
  228. ->first();
  229. if($wbwAnalyses){
  230. $wbwFactorMeaning[]=$wbwAnalyses->data;
  231. }else{
  232. $wbwFactorMeaning[]="";
  233. }
  234. }
  235. }
  236. $data['factorMeaning'] = ['value'=>implode('+',$wbwFactorMeaning),'status'=>3];
  237. if(!empty($data['parent'])){
  238. if(isset($indexed[$data['parent']['value']])){
  239. foreach ($indexed[$data['parent']['value']] as $value) {
  240. //根据base 查找词意
  241. //非base优先
  242. $increment = 10;
  243. $meaning = $this->insertValue(explode('$',$value->mean),$meaning,$increment,false);
  244. //查找词源
  245. if(!empty($value->parent) && $value->parent !== $value->word && strstr($value->type,"base") !== FALSE ){
  246. $parent2 = $this->insertValue([$value->grammar."$".$value->parent],$parent2,1,false);
  247. }
  248. }
  249. }
  250. }
  251. if(count($meaning)>0){
  252. arsort($meaning);
  253. $first = array_keys($meaning)[0];
  254. $data['meaning'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  255. }
  256. if(count($parent2)>0){
  257. arsort($parent2);
  258. $first = explode("$",array_keys($parent2)[0]);
  259. $data['parent2'] = ['value'=>$first[1],'status'=>3];
  260. $data['grammar2'] = ['value'=>$first[0],'status'=>3];
  261. }
  262. }
  263. $orgData[$key] = $data;
  264. }
  265. }
  266. return $this->ok($orgData);
  267. }
  268. /**
  269. * 自动查词
  270. *
  271. * @param string $sentId
  272. * @return \Illuminate\Http\Response
  273. */
  274. public function show(Request $request,string $sentId)
  275. {
  276. $startAt = microtime(true)*1000;
  277. $channel = Channel::find($request->get('channel_id'));
  278. //查询句子中的单词
  279. $sent = \explode('-',$sentId);
  280. $wbw = WbwTemplate::where('book',$sent[0])
  281. ->where('paragraph',$sent[1])
  282. ->whereBetween('wid',[$sent[2],$sent[3]])
  283. ->orderBy('wid')
  284. ->get();
  285. $words = [];
  286. foreach ($wbw as $row) {
  287. if($row->type !== '.ctl.' && !empty($row->real)){
  288. $words[] = $row->real;
  289. }
  290. }
  291. $result = $this->lookup($words,[],2);
  292. $indexed = $this->toIndexed($result);
  293. //生成自动填充结果
  294. $wbwContent = [];
  295. foreach ($wbw as $row) {
  296. $type = $row->type=='?'? '':$row->type;
  297. $grammar = $row->gramma=='?'? '':$row->gramma;
  298. $part = $row->part=='?'? '':$row->part;
  299. if(!empty($type) || !empty($grammar)){
  300. $case = "{$type}#$grammar";
  301. }else{
  302. $case = "";
  303. }
  304. $data = [
  305. 'sn'=>[$row->wid],
  306. 'word'=>['value'=>$row->word,'status'=>3],
  307. 'real'=> ['value'=>$row->real,'status'=>3],
  308. 'meaning'=> ['value'=>[],'status'=>3],
  309. 'type'=> ['value'=>$type,'status'=>3],
  310. 'grammar'=> ['value'=>$grammar,'status'=>3],
  311. 'case'=> ['value'=>$case,'status'=>3],
  312. 'style'=> ['value'=>$row->style,'status'=>3],
  313. 'factors'=> ['value'=>$part,'status'=>3],
  314. 'factorMeaning'=> ['value'=>'','status'=>3],
  315. 'confidence'=> 0.5
  316. ];
  317. if($row->type !== '.ctl.' && !empty($row->real)){
  318. if(isset($indexed[$row->real])){
  319. //parent
  320. $case = [];
  321. $parent = [];
  322. $factors = [];
  323. $factorMeaning = [];
  324. $meaning = [];
  325. $parent2 = [];
  326. $case2 = [];
  327. foreach ($indexed[$row->real] as $value) {
  328. //非base优先
  329. if(strstr($value->type,'base') === FALSE){
  330. $increment = 10;
  331. }else{
  332. $increment = 1;
  333. }
  334. //将全部结果加上得分放入数组
  335. $parent = $this->insertValue([$value->parent],$parent,$increment);
  336. $case = $this->insertValue([$value->type."#".$value->grammar],$case,$increment);
  337. $factors = $this->insertValue([$value->factors],$factors,$increment);
  338. $factorMeaning = $this->insertValue([$value->factormean],$factorMeaning,$increment);
  339. $meaning = $this->insertValue(explode('$',$value->mean),$meaning,$increment,false);
  340. }
  341. if(count($case)>0){
  342. arsort($case);
  343. $first = array_keys($case)[0];
  344. $data['case'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  345. }
  346. if(count($parent)>0){
  347. arsort($parent);
  348. $first = array_keys($parent)[0];
  349. $data['parent'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  350. }
  351. if(count($factors)>0){
  352. arsort($factors);
  353. $first = array_keys($factors)[0];
  354. $data['factors'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  355. }
  356. if(count($factorMeaning)>0){
  357. arsort($factorMeaning);
  358. $first = array_keys($factorMeaning)[0];
  359. $data['factorMeaning'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  360. }
  361. //根据base 查找词意
  362. if(!empty($data['parent'])){
  363. if(isset($indexed[$data['parent']['value']])){
  364. Log::info($data['parent']['value']."=".count($indexed[$data['parent']['value']]));
  365. foreach ($indexed[$data['parent']['value']] as $value) {
  366. //非base优先
  367. $increment = 10;
  368. $meaning = $this->insertValue(explode('$',$value->mean),$meaning,$increment,false);
  369. }
  370. }else{
  371. Log::error("no set parent".$data['parent']['value']);
  372. }
  373. }
  374. if(count($meaning)>0){
  375. arsort($meaning);
  376. Log::info('meanings=');
  377. Log::info(array_keys($meaning));
  378. $first = array_keys($meaning)[0];
  379. $data['meaning'] = ['value'=>$first==="_null"?"":$first,'status'=>3];
  380. }
  381. }
  382. }
  383. $wbwContent[] = $data;
  384. }
  385. $endAt = microtime(true)*1000;
  386. return $this->ok(["rows"=>$wbwContent,
  387. "count"=>count($wbwContent),
  388. "time"=>(int)($endAt-$startAt)]);
  389. }
  390. private function toIndexed($words){
  391. //转成索引数组
  392. $indexed = [];
  393. foreach ($words as $key => $value) {
  394. # code...
  395. $indexed[$value->word][] = $value;
  396. }
  397. return $indexed;
  398. }
  399. private function insertValue($value,$container,$increment,$empty=true){
  400. foreach ($value as $one) {
  401. if($empty === false){
  402. if(empty($one)){
  403. break;
  404. }
  405. }
  406. $one=trim($one);
  407. $key = $one;
  408. if(empty($key)){
  409. $key = '_null';
  410. }
  411. if(isset($container[$key])){
  412. $container[$key] += $increment;
  413. }else{
  414. $container[$key] = $increment;
  415. }
  416. }
  417. return $container;
  418. }
  419. /**
  420. * Update the specified resource in storage.
  421. *
  422. * @param \Illuminate\Http\Request $request
  423. * @param \App\Models\UserDict $userDict
  424. * @return \Illuminate\Http\Response
  425. */
  426. public function update(Request $request, UserDict $userDict)
  427. {
  428. //
  429. }
  430. /**
  431. * Remove the specified resource from storage.
  432. *
  433. * @param \App\Models\UserDict $userDict
  434. * @return \Illuminate\Http\Response
  435. */
  436. public function destroy(UserDict $userDict)
  437. {
  438. //
  439. }
  440. }