ending as $ending) { # code... if($ending[4]<$confidence){ continue; } switch ($type) { case '.n:base.': if($ending[2] !== '.n.' || strpos($ending[3],$grammar)!==0){continue 2;} break; case '.ti:base.': if($ending[2] !== '.ti.' || strpos($ending[3],$grammar)!==0){continue 2;} break; case '.adj:base.': if($ending[2] !== '.ti.' || strpos($ending[3],$grammar)!==0){continue 2;} break; case '.v:base.': if($ending[2] !== '.v.'){continue 2;} break; default: continue 2; break; } $endingLen = mb_strlen($ending[0], "UTF-8"); $wordEnd = mb_substr($base, 0 - $endingLen, null, "UTF-8"); if ($wordEnd === $ending[0]) { //匹配成功 $word = mb_substr($base, 0, mb_strlen($base, "UTF-8") - $endingLen, "UTF-8") . $ending[1]; $count = WordIndex::where('word',$word)->select(['count','bold'])->first(); if($count){ $newWord[] = ['word'=>$word, 'ending'=>$ending[1], 'grammar'=>$ending[3], 'count'=>$count->count, 'bold'=>$count->bold ]; } } } /* $result = []; foreach ($newWord as $key => $value) { $result[] = ['word'=>$key,'ending',"count"=>$value["count"],"bold"=>$value["bold"]]; } */ return $newWord; } /** * 从词干到单词的变化 * * @return void */ public function BaseToWord($base,$confidence=0.5){ $newWord = array(); $case = new CaseEnding(); foreach ($case->ending as $ending) { # code... if($ending[4]<$confidence){ continue; } $endingLen = mb_strlen($ending[0], "UTF-8"); $wordEnd = mb_substr($base, 0 - $endingLen, null, "UTF-8"); if ($wordEnd === $ending[0]) { //匹配成功 $word = mb_substr($base, 0, mb_strlen($base, "UTF-8") - $endingLen, "UTF-8") . $ending[1]; if(!isset($newWord[$word])){ $count = WordIndex::where('word',$word)->select(['count','bold'])->first(); if($count){ $newWord[$word] = ["count"=>$count->count,"bold"=>$count->bold]; }else{ $newWord[$word] = false; } } } } $result = []; foreach ($newWord as $key => $value) { # code... if($value !== false){ $result[] = ['word'=>$key,'ending',"count"=>$value["count"],"bold"=>$value["bold"]]; } } return $result; } /** * 从单词到词干的变化 * 小蝌蚪找妈妈 * @return void */ public function WordToBase($word,$deep=1,$verify=true){ $newWords = array(); $newBase = array(); $input[$word] = true; $case = new CaseEnding(); for ($i=0; $i < $deep; $i++) { # code... foreach ($input as $currWord => $status) { # code... if($status){ $input[$currWord] = false; foreach ($case->ending as $ending) { # code... if($ending[1] < 0.5){ continue; } $endingLen = mb_strlen($ending[1], "UTF-8"); $wordEnd = mb_substr($currWord, 0 - $endingLen, null, "UTF-8"); if ($wordEnd === $ending[1]) { //匹配成功 $base = mb_substr($currWord, 0, mb_strlen($currWord, "UTF-8") - $endingLen, "UTF-8") . $ending[0]; if(!isset($newBase[$base])){ $newBase[$base] = array(); } array_push($newBase[$base],[ 'word'=>$currWord, 'type'=>$ending[2], 'grammar'=>$ending[3], 'parent'=>$base, 'factors'=>"{$base}+[{$ending[1]}]", 'confidence'=>$ending[4], ]); } } } } foreach ($newBase as $currWord => $value) { # 把新词加入列表 if(!isset($input[$currWord])){ $input[$currWord] = true; } } } if($verify){ $output = array(); foreach ($newBase as $base => $rows) { # code... if(($verify = $this->VerifyBase($base,$rows)) !== false){ if(count($verify)>0){ $output[$base] = $verify; } } } if(count($output)==0){ //如果验证失败 输出最可能的结果 $short = 10000; $shortBase = ""; foreach ($newBase as $base => $rows) { if(mb_strlen($base,"UTF-8") < $short){ $short = mb_strlen($base,"UTF-8"); $shortBase = $base; } } foreach ($newBase as $base => $rows) { if($base == $shortBase){ $output[$base] = $rows; } } } return $output; }else{ return $newBase; } } /** * 验证base在字典中是否存在 */ public function VerifyBase($base,$rows){ # $output = array(); $dictWords = UserDict::where('word',$base)->select(['type','grammar'])->groupBy(['type','grammar'])->get(); if(count($dictWords)>0){ $newBase[$base] = 1; $case = array(); //字典中这个拼写的单词的语法信息 foreach ($dictWords as $value) { # code... $case["{$value->type}{$value->grammar}"] = 1; } foreach ($rows as $value) { //根据输入的猜测的type,grammar拼接合理的 parent 语法信息 switch ($value['type']) { case '.n.': $parentType = '.n:base.'; break; case '.ti.': $parentType = '.ti:base.'; break; case '.v.': $parentType = '.v:base.'; break; default: $parentType = ''; break; } if(!empty($value['grammar']) && $value['type'] !== ".v."){ $arrGrammar = explode('$',$value['grammar']); $parentType .= $arrGrammar[0]; } # 只保存语法信息合理的数据 if(isset($case[$parentType])){ array_push($output,$value); } } return $output; }else{ return false; } } }