WbwLookupController.php 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. <?php
  2. namespace App\Http\Controllers;
  3. use App\Models\UserDict;
  4. use App\Models\DictInfo;
  5. use App\Models\WbwTemplate;
  6. use App\Models\Channel;
  7. use Illuminate\Http\Request;
  8. use App\Tools\CaseMan;
  9. use Illuminate\Support\Facades\Log;
  10. use Illuminate\Support\Facades\Cache;
  11. use App\Http\Api\DictApi;
  12. use App\Http\Api\AuthApi;
  13. class WbwLookupController extends Controller
  14. {
  15. private $dictList = [
  16. '85dcc61c-c9e1-4ae0-9b44-cd6d9d9f0d01', //社区汇总
  17. '4d3a0d92-0adc-4052-80f5-512a2603d0e8', // system irregular
  18. '8359757e-9575-455b-a772-cc6f036caea0', // system sandhi
  19. '61f23efb-b526-4a8e-999e-076965034e60', // pali myanmar grammar
  20. 'eae9fd6f-7bac-4940-b80d-ad6cd6f433bf', // Concise P-E Dict
  21. '2f93d0fe-3d68-46ee-a80b-11fa445a29c6', // unity
  22. 'beb45062-7c20-4047-bcd4-1f636ba443d1', // U Hau Sein
  23. '8833de18-0978-434c-b281-a2e7387f69be', // 巴汉增订
  24. '3acf0c0f-59a7-4d25-a3d9-bf394a266ebd', // 汉译パーリ语辞典-黃秉榮
  25. '9ce6a53b-e28f-4fb7-b69d-b35fd5d76a24', //缅英字典
  26. ];
  27. /**
  28. * Create a new command instance.
  29. *
  30. * @return void
  31. */
  32. private function initSysDict()
  33. {
  34. // system regular
  35. $this->dictList[] = DictApi::getSysDict('system_regular');
  36. $this->dictList[] = DictApi::getSysDict('robot_compound');
  37. $this->dictList[] = DictApi::getSysDict('community');
  38. $this->dictList[] = DictApi::getSysDict('community_extract');
  39. }
  40. /**
  41. * Display a listing of the resource.
  42. * @param \Illuminate\Http\Request $request
  43. *
  44. * @return \Illuminate\Http\Response
  45. */
  46. public function index(Request $request)
  47. {
  48. //
  49. $startAt = microtime(true) * 1000;
  50. $this->initSysDict();
  51. $words = \explode(',', $request->get("word"));
  52. $bases = \explode(',', $request->get("base"));
  53. # 查询深度
  54. $deep = $request->get("deep", 2);
  55. $result = $this->lookup($words, $bases, $deep);
  56. $endAt = microtime(true) * 1000;
  57. return $this->ok([
  58. "rows" => $result,
  59. "count" => count($result),
  60. "time" => (int)($endAt - $startAt)
  61. ]);
  62. }
  63. //查用户字典获取全部结果
  64. public function lookup($words, $bases, $deep)
  65. {
  66. $wordPool = array();
  67. $output = array();
  68. foreach ($words as $word) {
  69. $wordPool[$word] = ['base' => false, 'done' => false, 'apply' => false];
  70. }
  71. foreach ($bases as $base) {
  72. $wordPool[$base] = ['base' => true, 'done' => false, 'apply' => false];
  73. }
  74. /**
  75. * 先查询字典名称
  76. */
  77. $dict_info = DictInfo::whereIn('id', $this->dictList)->select('id', 'shortname')->get();
  78. $dict_name = [];
  79. foreach ($dict_info as $key => $value) {
  80. # code...
  81. $dict_name[$value->id] = $value->shortname;
  82. }
  83. $caseman = new CaseMan();
  84. for ($i = 0; $i < $deep; $i++) {
  85. $newBase = array();
  86. $newWords = [];
  87. foreach ($wordPool as $word => $info) {
  88. # code...
  89. if ($info['done'] === false) {
  90. $newWords[] = $word;
  91. $wordPool[$word]['done'] = true;
  92. }
  93. }
  94. $data = UserDict::whereIn('word', $newWords)
  95. ->whereIn('dict_id', $this->dictList)
  96. ->leftJoin('dict_infos', 'user_dicts.dict_id', '=', 'dict_infos.id')
  97. ->orderBy('confidence', 'desc')
  98. ->get();
  99. foreach ($data as $row) {
  100. # code...
  101. array_push($output, $row);
  102. if (!empty($row->parent) && !isset($wordPool[$row->parent])) {
  103. //将parent 插入待查询列表
  104. $wordPool[$row->parent] = ['base' => true, 'done' => false, 'apply' => false];
  105. }
  106. }
  107. //处理查询结果中的拆分信息
  108. $newWordPart = array();
  109. foreach ($wordPool as $word => $info) {
  110. if (!empty($info['factors'])) {
  111. $factors = \explode('+', $info['factors']);
  112. foreach ($factors as $factor) {
  113. # 将没有的拆分放入单词查询列表
  114. if (!isset($wordPool[$factor])) {
  115. $wordPool[$factor] = ['base' => true, 'done' => false, 'apply' => false];
  116. }
  117. }
  118. }
  119. }
  120. }
  121. return $output;
  122. }
  123. /**
  124. *
  125. */
  126. private function langCheck($query, $lang)
  127. {
  128. if ($query === []) {
  129. return true;
  130. } else {
  131. if (in_array(strtolower($lang), $query)) {
  132. return true;
  133. } else {
  134. $langFamily = explode('-', $lang)[0];
  135. foreach ($query as $value) {
  136. if (strpos($value, $langFamily) !== false) {
  137. return true;
  138. }
  139. }
  140. }
  141. }
  142. return false;
  143. }
  144. private function wbwPreference($word, $field, $userId)
  145. {
  146. $prefix = 'wbw-preference';
  147. $fieldMap = [
  148. 'type' => 1,
  149. 'grammar' => 2,
  150. 'meaning' => 3,
  151. 'factors' => 4,
  152. 'factorMeaning' => 5,
  153. 'parent' => 6,
  154. 'part' => 7,
  155. 'case' => 8,
  156. ];
  157. $fieldId = $fieldMap[$field];
  158. $myPreference = Cache::get("{$prefix}/{$word}/{$fieldId}/{$userId}");
  159. if (!empty($myPreference)) {
  160. Log::debug($word . '命中我的wbw-' . $field, ['data' => $myPreference]);
  161. return ['value' => $myPreference, 'status' => 5];
  162. } else {
  163. $myPreference = Cache::get("{$prefix}/{$word}/3/0");
  164. if (!empty($myPreference)) {
  165. Log::debug($word . '命中社区wbw-' . $field, ['data' => $myPreference]);
  166. return ['value' => $myPreference, 'status' => 5];
  167. }
  168. }
  169. return false;
  170. }
  171. /**
  172. * 自动查词
  173. *
  174. * @param \Illuminate\Http\Request $request
  175. * @return \Illuminate\Http\Response
  176. */
  177. public function store(Request $request)
  178. {
  179. //
  180. $user = AuthApi::current($request);
  181. if (!$user) {
  182. //未登录用户
  183. return $this->error(__('auth.failed'), 401, 401);
  184. }
  185. $startAt = microtime(true) * 1000;
  186. // system regular
  187. $this->initSysDict();
  188. $channel = Channel::find($request->get('channel_id'));
  189. $orgData = $request->get('data');
  190. $lang = [];
  191. foreach ($request->get('lang', []) as $value) {
  192. $lang[] = strtolower($value);
  193. }
  194. //句子中的单词
  195. $words = [];
  196. foreach ($orgData as $word) {
  197. # code...
  198. if (isset($word['type']) && $word['type']['value'] === '.ctl.') {
  199. continue;
  200. }
  201. if (!empty($word['real']['value'])) {
  202. $words[] = $word['real']['value'];
  203. }
  204. }
  205. $result = $this->lookup($words, [], 2);
  206. $indexed = $this->toIndexed($result);
  207. foreach ($orgData as $key => $word) {
  208. if (isset($word['type']) && $word['type']['value'] === '.ctl.') {
  209. continue;
  210. }
  211. if (empty($word['real']['value'])) {
  212. continue;
  213. }
  214. $data = $word;
  215. $preference = $this->wbwPreference($word['real']['value'], 'meaning', $user['user_id']);
  216. if ($preference !== false) {
  217. $data['meaning'] = $preference;
  218. }
  219. $preference = $this->wbwPreference($word['real']['value'], 'factors', $user['user_id']);
  220. if ($preference !== false) {
  221. $data['factors'] = $preference;
  222. }
  223. $preference = $this->wbwPreference($word['real']['value'], 'factorMeaning', $user['user_id']);
  224. if ($preference !== false) {
  225. $data['factorMeaning'] = $preference;
  226. }
  227. $preference = $this->wbwPreference($word['real']['value'], 'case', $user['user_id']);
  228. if ($preference !== false) {
  229. $data['case'] = $preference;
  230. }
  231. $preference = $this->wbwPreference($word['real']['value'], 'parent', $user['user_id']);
  232. if ($preference !== false) {
  233. $data['parent'] = $preference;
  234. }
  235. if (isset($indexed[$word['real']['value']])) {
  236. //parent
  237. $case = [];
  238. $parent = [];
  239. $factors = [];
  240. $factorMeaning = [];
  241. $meaning = [];
  242. $parent2 = [];
  243. $case2 = [];
  244. foreach ($indexed[$word['real']['value']] as $value) {
  245. //非base优先
  246. if (strstr($value->type, 'base') === FALSE) {
  247. $increment = 10;
  248. } else {
  249. $increment = 1;
  250. }
  251. //将全部结果加上得分放入数组
  252. if ($value->type !== '.cp.') {
  253. $parent = $this->insertValue([$value->parent], $parent, $increment);
  254. }
  255. if (isset($data['case']) && $data['case']['status'] < 5) {
  256. if (!empty($value->type) && $value->type !== ".cp.") {
  257. $case = $this->insertValue([$value->type . "#" . $value->grammar], $case, $increment);
  258. }
  259. }
  260. if ($data['factors']['status'] < 50) {
  261. $factors = $this->insertValue([$value->factors], $factors, $increment);
  262. }
  263. if (isset($data['factorMeaning']) && $data['factorMeaning']['status'] < 50) {
  264. $factorMeaning = $this->insertValue([$value->factormean], $factorMeaning, $increment, false);
  265. }
  266. if ($data['meaning']['status'] < 50) {
  267. if ($this->langCheck($lang, $value->language)) {
  268. $meaning = $this->insertValue(explode('$', $value->mean), $meaning, $increment, false);
  269. }
  270. }
  271. }
  272. if (count($case) > 0) {
  273. arsort($case);
  274. $first = array_keys($case)[0];
  275. $data['case'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  276. }
  277. if (count($parent) > 0) {
  278. arsort($parent);
  279. $first = array_keys($parent)[0];
  280. $data['parent'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  281. } else {
  282. $data['parent'] = ['value' => "", 'status' => 3];
  283. }
  284. if (count($factors) > 0 && empty($data['factors']['value'])) {
  285. arsort($factors);
  286. $first = array_keys($factors)[0];
  287. $data['factors'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  288. }
  289. if (count($factorMeaning) > 0) {
  290. arsort($factorMeaning);
  291. $first = array_keys($factorMeaning)[0];
  292. $data['factorMeaning'] = ['value' => $first === "_null" ? "" : $first, 'status' => 5];
  293. }
  294. if (isset($data['factorMeaning']) && $data['factorMeaning']['status'] < 5) {
  295. $wbwFactorMeaning = [];
  296. if (!empty($data['factors']['value'])) {
  297. foreach (explode("+", $data['factors']['value']) as $factor) {
  298. $preference = $this->wbwPreference($factor, 'meaning', $user['user_id']);
  299. if ($preference !== false) {
  300. $wbwFactorMeaning[] = $preference['value'];
  301. } else {
  302. $wbwFactorMeaning[] = $factor;
  303. }
  304. }
  305. }
  306. $data['factorMeaning'] = ['value' => implode('+', $wbwFactorMeaning), 'status' => 3];
  307. }
  308. if (empty($data['meaning']['value']) && !empty($data['parent']['value'])) {
  309. if (isset($indexed[$data['parent']['value']])) {
  310. foreach ($indexed[$data['parent']['value']] as $value) {
  311. //根据base 查找词意
  312. //非base优先
  313. $increment = 10;
  314. if ($this->langCheck($lang, $value->language)) {
  315. $meaning = $this->insertValue(explode('$', $value->mean), $meaning, $increment, false);
  316. }
  317. //查找词源
  318. if (!empty($value->parent) && $value->parent !== $value->word && strstr($value->type, "base") !== FALSE) {
  319. $parent2 = $this->insertValue([$value->grammar . "$" . $value->parent], $parent2, 1, false);
  320. }
  321. }
  322. }
  323. }
  324. if (count($meaning) > 0) {
  325. arsort($meaning);
  326. $first = array_keys($meaning)[0];
  327. $data['meaning'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  328. }
  329. if (count($parent2) > 0) {
  330. arsort($parent2);
  331. $first = explode("$", array_keys($parent2)[0]);
  332. $data['parent2'] = ['value' => $first[1], 'status' => 3];
  333. $data['grammar2'] = ['value' => $first[0], 'status' => 3];
  334. }
  335. }
  336. if (
  337. !isset($data['factorMeaning']['value']) ||
  338. $this->fmEmpty($data['factorMeaning']['value'])
  339. ) {
  340. $factorMeaning = [];
  341. //生成自动的拆分意思
  342. $autoMeaning = '';
  343. $currFactors = explode('+', $data['factors']['value']);
  344. $autoFM = [];
  345. foreach ($currFactors as $factor) {
  346. $subFactors = explode('-', $factor);
  347. $autoSubFM = [];
  348. foreach ($subFactors as $subFactor) {
  349. $preference = $this->wbwPreference($subFactor, 'factorMeaning', $user['user_id']);
  350. if ($preference !== false) {
  351. $autoSubFM[] = $preference['value'];
  352. } else {
  353. $preference = $this->wbwPreference($subFactor, 'meaning', $user['user_id']);
  354. if ($preference !== false) {
  355. $autoSubFM[] = $preference['value'];
  356. } else {
  357. $autoSubFM[] = '';
  358. }
  359. }
  360. }
  361. $autoFM[] = implode('-', $autoSubFM);
  362. $autoMeaning .= implode('', $autoSubFM);
  363. }
  364. $autoMeaning .= implode('', $autoFM);
  365. if (count($autoFM) > 0) {
  366. $data['factorMeaning'] = ['value' => implode('+', $autoFM), 'status' => 3];
  367. if (empty($data['meaning']['value']) && !empty($autoMeaning)) {
  368. $data['meaning'] = ['value' => $autoMeaning, 'status' => 3];
  369. }
  370. }
  371. }
  372. $orgData[$key] = $data;
  373. }
  374. return $this->ok($orgData);
  375. }
  376. private function fmEmpty($value)
  377. {
  378. if (str_replace(['+', '-', ' '], '', $value) === '') {
  379. return true;
  380. } else {
  381. return false;
  382. }
  383. }
  384. /**
  385. * 自动查词
  386. *
  387. * @param string $sentId
  388. * @return \Illuminate\Http\Response
  389. */
  390. public function show(Request $request, string $sentId)
  391. {
  392. $startAt = microtime(true) * 1000;
  393. $channel = Channel::find($request->get('channel_id'));
  394. //查询句子中的单词
  395. $sent = \explode('-', $sentId);
  396. $wbw = WbwTemplate::where('book', $sent[0])
  397. ->where('paragraph', $sent[1])
  398. ->whereBetween('wid', [$sent[2], $sent[3]])
  399. ->orderBy('wid')
  400. ->get();
  401. $words = [];
  402. foreach ($wbw as $row) {
  403. if ($row->type !== '.ctl.' && !empty($row->real)) {
  404. $words[] = $row->real;
  405. }
  406. }
  407. $result = $this->lookup($words, [], 2);
  408. $indexed = $this->toIndexed($result);
  409. //生成自动填充结果
  410. $wbwContent = [];
  411. foreach ($wbw as $row) {
  412. $type = $row->type == '?' ? '' : $row->type;
  413. $grammar = $row->gramma == '?' ? '' : $row->gramma;
  414. $part = $row->part == '?' ? '' : $row->part;
  415. if (!empty($type) || !empty($grammar)) {
  416. $case = "{$type}#$grammar";
  417. } else {
  418. $case = "";
  419. }
  420. $data = [
  421. 'sn' => [$row->wid],
  422. 'word' => ['value' => $row->word, 'status' => 3],
  423. 'real' => ['value' => $row->real, 'status' => 3],
  424. 'meaning' => ['value' => [], 'status' => 3],
  425. 'type' => ['value' => $type, 'status' => 3],
  426. 'grammar' => ['value' => $grammar, 'status' => 3],
  427. 'case' => ['value' => $case, 'status' => 3],
  428. 'style' => ['value' => $row->style, 'status' => 3],
  429. 'factors' => ['value' => $part, 'status' => 3],
  430. 'factorMeaning' => ['value' => '', 'status' => 3],
  431. 'confidence' => 0.5
  432. ];
  433. if ($row->type !== '.ctl.' && !empty($row->real)) {
  434. if (isset($indexed[$row->real])) {
  435. //parent
  436. $case = [];
  437. $parent = [];
  438. $factors = [];
  439. $factorMeaning = [];
  440. $meaning = [];
  441. $parent2 = [];
  442. $case2 = [];
  443. foreach ($indexed[$row->real] as $value) {
  444. //非base优先
  445. if (strstr($value->type, 'base') === FALSE) {
  446. $increment = 10;
  447. } else {
  448. $increment = 1;
  449. }
  450. //将全部结果加上得分放入数组
  451. $parent = $this->insertValue([$value->parent], $parent, $increment);
  452. $case = $this->insertValue([$value->type . "#" . $value->grammar], $case, $increment);
  453. $factors = $this->insertValue([$value->factors], $factors, $increment);
  454. $factorMeaning = $this->insertValue([$value->factormean], $factorMeaning, $increment);
  455. $meaning = $this->insertValue(explode('$', $value->mean), $meaning, $increment, false);
  456. }
  457. if (count($case) > 0) {
  458. arsort($case);
  459. $first = array_keys($case)[0];
  460. $data['case'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  461. }
  462. if (count($parent) > 0) {
  463. arsort($parent);
  464. $first = array_keys($parent)[0];
  465. $data['parent'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  466. }
  467. if (count($factors) > 0) {
  468. arsort($factors);
  469. $first = array_keys($factors)[0];
  470. $data['factors'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  471. }
  472. if (count($factorMeaning) > 0) {
  473. arsort($factorMeaning);
  474. $first = array_keys($factorMeaning)[0];
  475. $data['factorMeaning'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  476. }
  477. //根据base 查找词意
  478. if (!empty($data['parent'])) {
  479. if (isset($indexed[$data['parent']['value']])) {
  480. foreach ($indexed[$data['parent']['value']] as $value) {
  481. //非base优先
  482. $increment = 10;
  483. $meaning = $this->insertValue(explode('$', $value->mean), $meaning, $increment, false);
  484. }
  485. } else {
  486. //Log::error("no set parent".$data['parent']['value']);
  487. }
  488. }
  489. if (count($meaning) > 0) {
  490. arsort($meaning);
  491. $first = array_keys($meaning)[0];
  492. $data['meaning'] = ['value' => $first === "_null" ? "" : $first, 'status' => 3];
  493. }
  494. }
  495. }
  496. $wbwContent[] = $data;
  497. }
  498. $endAt = microtime(true) * 1000;
  499. return $this->ok([
  500. "rows" => $wbwContent,
  501. "count" => count($wbwContent),
  502. "time" => (int)($endAt - $startAt)
  503. ]);
  504. }
  505. private function toIndexed($words)
  506. {
  507. //转成索引数组
  508. $indexed = [];
  509. foreach ($words as $key => $value) {
  510. # code...
  511. $indexed[$value->word][] = $value;
  512. }
  513. return $indexed;
  514. }
  515. /**
  516. * $empty:是否允许空值
  517. */
  518. private function insertValue($value, $container, $increment, $empty = true)
  519. {
  520. foreach ($value as $one) {
  521. if ($empty === false) {
  522. if ($this->fmEmpty($one)) {
  523. break;
  524. }
  525. }
  526. $one = trim($one);
  527. $key = $one;
  528. if (empty($key)) {
  529. $key = '_null';
  530. }
  531. if (isset($container[$key])) {
  532. $container[$key] += $increment;
  533. } else {
  534. $container[$key] = $increment;
  535. }
  536. }
  537. return $container;
  538. }
  539. /**
  540. * Update the specified resource in storage.
  541. *
  542. * @param \Illuminate\Http\Request $request
  543. * @param \App\Models\UserDict $userDict
  544. * @return \Illuminate\Http\Response
  545. */
  546. public function update(Request $request, UserDict $userDict)
  547. {
  548. //
  549. }
  550. /**
  551. * Remove the specified resource from storage.
  552. *
  553. * @param \App\Models\UserDict $userDict
  554. * @return \Illuminate\Http\Response
  555. */
  556. public function destroy(UserDict $userDict)
  557. {
  558. //
  559. }
  560. }