пре 1 година · 5ab5143438
--- a/api-v8/app/Tools/CaseMan.php
+++ b/api-v8/app/Tools/CaseMan.php
@@ -1,4 +1,5 @@
 
				 <?php
			
 
				+
			
 
				 namespace App\Tools;
			
 
				 
			
 
				 use Illuminate\Support\Facades\Cache;
			
@@ -9,7 +10,7 @@ use App\Models\WordIndex;
 
				 
			
 
				 class CaseMan
			
 
				 {
			
 
				-	/**
			
 
				+    /**
			
 
				      * Create a new class instance.
			
 
				      *
			
 
				      * @return void
			
@@ -19,32 +20,41 @@ class CaseMan
 
				         return;
			
 
				     }
			
 
				 
			
 
				-    	/**
			
 
				+    /**
			
 
				      * 从词干到单词的变化
			
 
				      *
			
 
				      * @return void
			
 
				      */
			
 
				-	public function Declension($base,$type=null,$grammar='',$confidence=0.5){
			
 
				+    public function Declension($base, $type = null, $grammar = '', $confidence = 0.5)
			
 
				+    {
			
 
				         $newWord = array();
			
 
				         $case = new CaseEnding();
			
 
				         foreach ($case->ending as  $ending) {
			
 
				             # code...
			
 
				-            if($ending[4]<$confidence){
			
 
				+            if ($ending[4] < $confidence) {
			
 
				                 continue;
			
 
				             }
			
 
				 
			
 
				             switch ($type) {
			
 
				                 case '.n:base.':
			
 
				-                    if($ending[2] !== '.n.' || strpos($ending[3],$grammar)!==0){continue 2;}
			
 
				+                    if ($ending[2] !== '.n.' || strpos($ending[3], $grammar) !== 0) {
			
 
				+                        continue 2;
			
 
				+                    }
			
 
				                     break;
			
 
				                 case '.ti:base.':
			
 
				-                    if($ending[2] !== '.ti.' && $ending[2] !== '.n.' ){continue 2;}
			
 
				+                    if ($ending[2] !== '.ti.' && $ending[2] !== '.n.') {
			
 
				+                        continue 2;
			
 
				+                    }
			
 
				                     break;
			
 
				                 case '.adj:base.':
			
 
				-                    if($ending[2] !== '.ti.' && $ending[2] !== '.n.' ){continue 2;}
			
 
				+                    if ($ending[2] !== '.ti.' && $ending[2] !== '.n.') {
			
 
				+                        continue 2;
			
 
				+                    }
			
 
				                     break;
			
 
				                 case '.v:base.':
			
 
				-                    if($ending[2] !== '.v.'){continue 2;}
			
 
				+                    if ($ending[2] !== '.v.') {
			
 
				+                        continue 2;
			
 
				+                    }
			
 
				                     break;
			
 
				                 default:
			
 
				                     continue 2;
			
@@ -60,51 +70,55 @@ class CaseMan
 
				                 //TODO 加两个sandhi
			
 
				                 $hasSandhi = false;
			
 
				                 foreach ($case->union as $sandhi) {
			
 
				-                    $sandhiLen = mb_strlen($sandhi[0],'UTF-8');
			
 
				+                    $sandhiLen = mb_strlen($sandhi[0], 'UTF-8');
			
 
				                     $sandhiEnd = mb_substr($word, 0 - $sandhiLen, null, "UTF-8");
			
 
				                     if ($sandhiEnd === $sandhi[0]) {
			
 
				                         $sandhiWord = mb_substr($word, 0, mb_strlen($word, "UTF-8") - $sandhiLen, "UTF-8") . $sandhi[1];
			
 
				-                        $count = WordIndex::where('word',$sandhiWord)->select(['count','bold'])->first();
			
 
				-                        if($count){
			
 
				+                        $count = WordIndex::where('word', $sandhiWord)->select(['count', 'bold'])->first();
			
 
				+                        if ($count) {
			
 
				                             $hasSandhi = true;
			
 
				-                            $newWord[] = ['word'=>$sandhiWord,
			
 
				-                                'ending'=>$ending[1],
			
 
				-                                'type'=>'.un.',
			
 
				-                                'grammar'=>'',
			
 
				-                                'factors'=>"{$word}+{$sandhi[2]}",
			
 
				-                                'count'=>$count->count,
			
 
				-                                'bold'=>$count->bold
			
 
				-                                ];
			
 
				-                                //添加一个去掉ti的数据
			
 
				-                            if($sandhi[2] === 'iti'){
			
 
				-                                $newWord[] = ['word'=>mb_substr($sandhiWord,0,-2,'UTF-8'),
			
 
				-                                    'ending'=>$ending[1],
			
 
				-                                    'grammar'=>$ending[3],
			
 
				-                                    'factors'=>"{$base}+[{$ending[1]}]",
			
 
				-                                    'count'=>$count->count,
			
 
				-                                    'bold'=>$count->bold
			
 
				+                            $newWord[] = [
			
 
				+                                'word' => $sandhiWord,
			
 
				+                                'ending' => $ending[1],
			
 
				+                                'type' => '.un.',
			
 
				+                                'grammar' => '',
			
 
				+                                'factors' => "{$word}+{$sandhi[2]}",
			
 
				+                                'count' => $count->count,
			
 
				+                                'bold' => $count->bold
			
 
				+                            ];
			
 
				+                            //添加一个去掉ti的数据
			
 
				+                            if ($sandhi[2] === 'iti') {
			
 
				+                                $newWord[] = [
			
 
				+                                    'word' => mb_substr($sandhiWord, 0, -2, 'UTF-8'),
			
 
				+                                    'ending' => $ending[1],
			
 
				+                                    'grammar' => $ending[3],
			
 
				+                                    'factors' => "{$base}+[{$ending[1]}]",
			
 
				+                                    'count' => $count->count,
			
 
				+                                    'bold' => $count->bold
			
 
				                                 ];
			
 
				                             }
			
 
				                         }
			
 
				                     }
			
 
				                 }
			
 
				-                $count = WordIndex::where('word',$word)->select(['count','bold'])->first();
			
 
				-                if($count || $hasSandhi){
			
 
				-                    $newWord[] = ['word'=>$word,
			
 
				-                                  'ending'=>$ending[1],
			
 
				-                                  'grammar'=>$ending[3],
			
 
				-                                  'factors'=>"{$base}+[{$ending[1]}]",
			
 
				-                                  'count'=>$count?$count->count:0,
			
 
				-                                  'bold'=>$count?$count->bold:0
			
 
				-                                ];
			
 
				+                $count = WordIndex::where('word', $word)->select(['count', 'bold'])->first();
			
 
				+                if ($count || $hasSandhi) {
			
 
				+                    $newWord[] = [
			
 
				+                        'word' => $word,
			
 
				+                        'ending' => $ending[1],
			
 
				+                        'grammar' => $ending[3],
			
 
				+                        'factors' => "{$base}+[{$ending[1]}]",
			
 
				+                        'count' => $count ? $count->count : 0,
			
 
				+                        'bold' => $count ? $count->bold : 0
			
 
				+                    ];
			
 
				                 }
			
 
				             }
			
 
				         }
			
 
				 
			
 
				         return $newWord;
			
 
				-	}
			
 
				+    }
			
 
				 
			
 
				-    private function endingMatch($base,$ending,$array=null){
			
 
				+    private function endingMatch($base, $ending, $array = null)
			
 
				+    {
			
 
				         $case = new CaseEnding();
			
 
				         $output = array();
			
 
				         $endingLen = mb_strlen($ending[0], "UTF-8");
			
@@ -112,16 +126,16 @@ class CaseMan
 
				         if ($wordEnd === $ending[0]) {
			
 
				             //匹配成功
			
 
				             $word = mb_substr($base, 0, mb_strlen($base, "UTF-8") - $endingLen, "UTF-8") . $ending[1];
			
 
				-            if(is_array($array)){
			
 
				-                if(!isset($array[$word])){
			
 
				-                    $count = WordIndex::where('word',$word)->select(['count','bold'])->first();
			
 
				+            if (is_array($array)) {
			
 
				+                if (!isset($array[$word])) {
			
 
				+                    $count = WordIndex::where('word', $word)->select(['count', 'bold'])->first();
			
 
				                 }
			
 
				-            }else{
			
 
				-                $count = WordIndex::where('word',$word)->select(['count','bold'])->first();
			
 
				+            } else {
			
 
				+                $count = WordIndex::where('word', $word)->select(['count', 'bold'])->first();
			
 
				             }
			
 
				-            if(isset($count) && $count){
			
 
				-                $output[$word] = ["count"=>$count->count,"bold"=>$count->bold];
			
 
				-            }else{
			
 
				+            if (isset($count) && $count) {
			
 
				+                $output[$word] = ["count" => $count->count, "bold" => $count->bold];
			
 
				+            } else {
			
 
				                 $output[$word] = false;
			
 
				             }
			
 
				 
			
@@ -132,16 +146,16 @@ class CaseMan
 
				                 $sandhiEnd = mb_substr($word, 0 - $sandhiLen, null, "UTF-8");
			
 
				                 if ($sandhiEnd === $sandhi[0]) {
			
 
				                     $sandhiWord = mb_substr($word, 0, mb_strlen($word, "UTF-8") - $sandhiLen, "UTF-8") . $sandhi[1];
			
 
				-                    if(is_array($array)){
			
 
				-                        if(!isset($array[$sandhiWord])){
			
 
				-                            $count = WordIndex::where('word',$sandhiWord)->select(['count','bold'])->first();
			
 
				+                    if (is_array($array)) {
			
 
				+                        if (!isset($array[$sandhiWord])) {
			
 
				+                            $count = WordIndex::where('word', $sandhiWord)->select(['count', 'bold'])->first();
			
 
				                         }
			
 
				-                    }else{
			
 
				-                        $count = WordIndex::where('word',$sandhiWord)->select(['count','bold'])->first();
			
 
				+                    } else {
			
 
				+                        $count = WordIndex::where('word', $sandhiWord)->select(['count', 'bold'])->first();
			
 
				                     }
			
 
				-                    if(isset($count) && $count){
			
 
				-                        $output[$sandhiWord] = ["count"=>$count->count,"bold"=>$count->bold];
			
 
				-                    }else{
			
 
				+                    if (isset($count) && $count) {
			
 
				+                        $output[$sandhiWord] = ["count" => $count->count, "bold" => $count->bold];
			
 
				+                    } else {
			
 
				                         $output[$sandhiWord] = false;
			
 
				                     }
			
 
				                 }
			
@@ -149,17 +163,18 @@ class CaseMan
 
				         }
			
 
				         return $output;
			
 
				     }
			
 
				-	/**
			
 
				+    /**
			
 
				      * 从词干到单词的变化
			
 
				      *
			
 
				      * @return void
			
 
				      */
			
 
				-	public function BaseToWord($base,$confidence=0.5){
			
 
				+    public function BaseToWord($base, $confidence = 0.5)
			
 
				+    {
			
 
				         $newWord = array();
			
 
				         $case = new CaseEnding();
			
 
				         foreach ($case->ending as  $ending) {
			
 
				             # code...
			
 
				-            if($ending[4]<$confidence){
			
 
				+            if ($ending[4] < $confidence) {
			
 
				                 continue;
			
 
				             }
			
 
				             /*
			
@@ -174,171 +189,168 @@ class CaseMan
 
				             if ($wordEnd === $ending[0]) {
			
 
				                 //匹配成功
			
 
				                 $word = mb_substr($base, 0, mb_strlen($base, "UTF-8") - $endingLen, "UTF-8") . $ending[1];
			
 
				-                if(!isset($newWord[$word])){
			
 
				-                    $count = WordIndex::where('word',$word)->select(['count','bold'])->first();
			
 
				-                    if($count){
			
 
				-                        $newWord[$word] = ["count"=>$count->count,"bold"=>$count->bold];
			
 
				-                    }else{
			
 
				+                if (!isset($newWord[$word])) {
			
 
				+                    $count = WordIndex::where('word', $word)->select(['count', 'bold'])->first();
			
 
				+                    if ($count) {
			
 
				+                        $newWord[$word] = ["count" => $count->count, "bold" => $count->bold];
			
 
				+                    } else {
			
 
				                         $newWord[$word] = false;
			
 
				                     }
			
 
				                 }
			
 
				                 //尝试sandhi
			
 
				                 //TODO 加两个sandhi
			
 
				                 foreach ($case->union as $sandhi) {
			
 
				-                    $sandhiLen = mb_strlen($sandhi[0],'UTF-8');
			
 
				+                    $sandhiLen = mb_strlen($sandhi[0], 'UTF-8');
			
 
				                     $sandhiEnd = mb_substr($word, 0 - $sandhiLen, null, "UTF-8");
			
 
				                     if ($sandhiEnd === $sandhi[0]) {
			
 
				                         $sandhiWord = mb_substr($word, 0, mb_strlen($word, "UTF-8") - $sandhiLen, "UTF-8") . $sandhi[1];
			
 
				-                        if(!isset($newWord[$sandhiWord])){
			
 
				-                            $count = WordIndex::where('word',$sandhiWord)->select(['count','bold'])->first();
			
 
				-                            if($count){
			
 
				-                                $newWord[$sandhiWord] = ["count"=>$count->count,"bold"=>$count->bold];
			
 
				-                            }else{
			
 
				+                        if (!isset($newWord[$sandhiWord])) {
			
 
				+                            $count = WordIndex::where('word', $sandhiWord)->select(['count', 'bold'])->first();
			
 
				+                            if ($count) {
			
 
				+                                $newWord[$sandhiWord] = ["count" => $count->count, "bold" => $count->bold];
			
 
				+                            } else {
			
 
				                                 $newWord[$sandhiWord] = false;
			
 
				                             }
			
 
				                         }
			
 
				                     }
			
 
				                 }
			
 
				             }
			
 
				-
			
 
				         }
			
 
				         $result = [];
			
 
				         foreach ($newWord as $key => $value) {
			
 
				             # code...
			
 
				-            if($value !== false){
			
 
				-                $result[] = ['word'=>$key,'ending',"count"=>$value["count"],"bold"=>$value["bold"]];
			
 
				+            if ($value !== false) {
			
 
				+                $result[] = ['word' => $key, 'ending', "count" => $value["count"], "bold" => $value["bold"]];
			
 
				             }
			
 
				         }
			
 
				         return $result;
			
 
				-	}
			
 
				+    }
			
 
				 
			
 
				-	/**
			
 
				+    /**
			
 
				      * 从单词到词干的变化
			
 
				      * 小蝌蚪找妈妈
			
 
				-     * @return void
			
 
				+     * @return array
			
 
				      */
			
 
				-	public function WordToBase($word,$deep=1,$verify=true){
			
 
				-		$newWords = array();
			
 
				-		$newBase = array();
			
 
				-		$input[$word] = true;
			
 
				-		$case = new CaseEnding();
			
 
				-		for ($i=0; $i < $deep; $i++) {
			
 
				-			# code...
			
 
				-			foreach ($input as $currWord => $status) {
			
 
				-				# code...
			
 
				-				if($status){
			
 
				-					$input[$currWord] = false;
			
 
				-					foreach ($case->ending as  $ending) {
			
 
				-						# code...
			
 
				-                        if($ending[4] < 0.5){
			
 
				+    public function WordToBase($word, $deep = 1, $verify = true)
			
 
				+    {
			
 
				+        $newWords = array();
			
 
				+        $newBase = array();
			
 
				+        $input[$word] = true;
			
 
				+        $case = new CaseEnding();
			
 
				+        for ($i = 0; $i < $deep; $i++) {
			
 
				+            # code...
			
 
				+            foreach ($input as $currWord => $status) {
			
 
				+                # code...
			
 
				+                if ($status) {
			
 
				+                    $input[$currWord] = false;
			
 
				+                    foreach ($case->ending as  $ending) {
			
 
				+                        # code...
			
 
				+                        if ($ending[4] < 0.5) {
			
 
				                             continue;
			
 
				                         }
			
 
				-						$endingLen = mb_strlen($ending[1], "UTF-8");
			
 
				-						$wordEnd = mb_substr($currWord, 0 - $endingLen, null, "UTF-8");
			
 
				-						if ($wordEnd === $ending[1]) {
			
 
				-							//匹配成功
			
 
				-							$base = mb_substr($currWord, 0, mb_strlen($currWord, "UTF-8") - $endingLen, "UTF-8") . $ending[0];
			
 
				-							if(!isset($newBase[$base])){
			
 
				-								$newBase[$base] = array();
			
 
				-							}
			
 
				-							array_push($newBase[$base],[
			
 
				-								'word'=>$currWord,
			
 
				-								'type'=>$ending[2],
			
 
				-								'grammar'=>$ending[3],
			
 
				-								'parent'=>$base,
			
 
				-								'factors'=>"{$base}+[{$ending[1]}]",
			
 
				-								'confidence'=>$ending[4],
			
 
				-							]);
			
 
				-						}
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-			foreach ($newBase as $currWord => $value) {
			
 
				-				# 把新词加入列表
			
 
				-				if(!isset($input[$currWord])){
			
 
				-					$input[$currWord] = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if($verify){
			
 
				-			$output = array();
			
 
				-			foreach ($newBase as $base => $rows) {
			
 
				-				# code...
			
 
				-				if(($verify = $this->VerifyBase($base,$rows)) !== false){
			
 
				-					if(count($verify)>0){
			
 
				-						$output[$base] = $verify;
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-			if(count($output)==0){
			
 
				-				//如果验证失败 输出最可能的结果
			
 
				-				$short = 10000;
			
 
				-				$shortBase = "";
			
 
				-				foreach ($newBase as $base => $rows) {
			
 
				-					if(mb_strlen($base,"UTF-8") < $short){
			
 
				-						$short = mb_strlen($base,"UTF-8");
			
 
				-						$shortBase = $base;
			
 
				-					}
			
 
				-				}
			
 
				-				foreach ($newBase as $base => $rows) {
			
 
				-					if($base == $shortBase){
			
 
				-						$output[$base] = $rows;
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-			return $output;
			
 
				-		}else{
			
 
				-			return $newBase;
			
 
				-		}
			
 
				-
			
 
				+                        $endingLen = mb_strlen($ending[1], "UTF-8");
			
 
				+                        $wordEnd = mb_substr($currWord, 0 - $endingLen, null, "UTF-8");
			
 
				+                        if ($wordEnd === $ending[1]) {
			
 
				+                            //匹配成功
			
 
				+                            $base = mb_substr($currWord, 0, mb_strlen($currWord, "UTF-8") - $endingLen, "UTF-8") . $ending[0];
			
 
				+                            if (!isset($newBase[$base])) {
			
 
				+                                $newBase[$base] = array();
			
 
				+                            }
			
 
				+                            array_push($newBase[$base], [
			
 
				+                                'word' => $currWord,
			
 
				+                                'type' => $ending[2],
			
 
				+                                'grammar' => $ending[3],
			
 
				+                                'parent' => $base,
			
 
				+                                'factors' => "{$base}+[{$ending[1]}]",
			
 
				+                                'confidence' => $ending[4],
			
 
				+                            ]);
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+            foreach ($newBase as $currWord => $value) {
			
 
				+                # 把新词加入列表
			
 
				+                if (!isset($input[$currWord])) {
			
 
				+                    $input[$currWord] = true;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				 
			
 
				-	}
			
 
				-	/**
			
 
				-	 * 验证base在字典中是否存在
			
 
				-	 */
			
 
				-	public function VerifyBase($base,$rows){
			
 
				-		#
			
 
				-		$output = array();
			
 
				-		$dictWords = UserDict::where('word',$base)->select(['type','grammar'])->groupBy(['type','grammar'])->get();
			
 
				-		if(count($dictWords)>0){
			
 
				-			$newBase[$base] = 1;
			
 
				-			$case = array();
			
 
				-			//字典中这个拼写的单词的语法信息
			
 
				-			foreach ($dictWords as $value) {
			
 
				-				# code...
			
 
				-				$case["{$value->type}{$value->grammar}"] = 1;
			
 
				-			}
			
 
				-			foreach ($rows as $value) {
			
 
				-				//根据输入的猜测的type,grammar拼接合理的 parent 语法信息
			
 
				-				switch ($value['type']) {
			
 
				-					case '.n.':
			
 
				-						$parentType = '.n:base.';
			
 
				-						break;
			
 
				-					case '.ti.':
			
 
				-						$parentType = '.ti:base.';
			
 
				-						break;
			
 
				-					case '.v.':
			
 
				-						$parentType = '.v:base.';
			
 
				-						break;
			
 
				-					default:
			
 
				-						$parentType = '';
			
 
				-						break;
			
 
				-				}
			
 
				-				if(!empty($value['grammar']) && $value['type'] !== ".v."){
			
 
				-					$arrGrammar = explode('$',$value['grammar']);
			
 
				-					$parentType .=  $arrGrammar[0];
			
 
				-				}
			
 
				-				# 只保存语法信息合理的数据
			
 
				-				if(isset($case[$parentType])){
			
 
				-					array_push($output,$value);
			
 
				-				}
			
 
				-			}
			
 
				-			return $output;
			
 
				-		}else{
			
 
				-			return false;
			
 
				-		}
			
 
				-	}
			
 
				+        if ($verify) {
			
 
				+            $output = array();
			
 
				+            foreach ($newBase as $base => $rows) {
			
 
				+                # code...
			
 
				+                if (($verify = $this->VerifyBase($base, $rows)) !== false) {
			
 
				+                    if (count($verify) > 0) {
			
 
				+                        $output[$base] = $verify;
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+            if (count($output) == 0) {
			
 
				+                //如果验证失败 输出最可能的结果
			
 
				+                $short = 10000;
			
 
				+                $shortBase = "";
			
 
				+                foreach ($newBase as $base => $rows) {
			
 
				+                    if (mb_strlen($base, "UTF-8") < $short) {
			
 
				+                        $short = mb_strlen($base, "UTF-8");
			
 
				+                        $shortBase = $base;
			
 
				+                    }
			
 
				+                }
			
 
				+                foreach ($newBase as $base => $rows) {
			
 
				+                    if ($base == $shortBase) {
			
 
				+                        $output[$base] = $rows;
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+            return $output;
			
 
				+        } else {
			
 
				+            return $newBase;
			
 
				+        }
			
 
				+    }
			
 
				+    /**
			
 
				+     * 验证base在字典中是否存在
			
 
				+     */
			
 
				+    public function VerifyBase($base, $rows)
			
 
				+    {
			
 
				+        #
			
 
				+        $output = array();
			
 
				+        $dictWords = UserDict::where('word', $base)->select(['type', 'grammar'])->groupBy(['type', 'grammar'])->get();
			
 
				+        if (count($dictWords) > 0) {
			
 
				+            $newBase[$base] = 1;
			
 
				+            $case = array();
			
 
				+            //字典中这个拼写的单词的语法信息
			
 
				+            foreach ($dictWords as $value) {
			
 
				+                # code...
			
 
				+                $case["{$value->type}{$value->grammar}"] = 1;
			
 
				+            }
			
 
				+            foreach ($rows as $value) {
			
 
				+                //根据输入的猜测的type,grammar拼接合理的 parent 语法信息
			
 
				+                switch ($value['type']) {
			
 
				+                    case '.n.':
			
 
				+                        $parentType = '.n:base.';
			
 
				+                        break;
			
 
				+                    case '.ti.':
			
 
				+                        $parentType = '.ti:base.';
			
 
				+                        break;
			
 
				+                    case '.v.':
			
 
				+                        $parentType = '.v:base.';
			
 
				+                        break;
			
 
				+                    default:
			
 
				+                        $parentType = '';
			
 
				+                        break;
			
 
				+                }
			
 
				+                if (!empty($value['grammar']) && $value['type'] !== ".v.") {
			
 
				+                    $arrGrammar = explode('$', $value['grammar']);
			
 
				+                    $parentType .=  $arrGrammar[0];
			
 
				+                }
			
 
				+                # 只保存语法信息合理的数据
			
 
				+                if (isset($case[$parentType])) {
			
 
				+                    array_push($output, $value);
			
 
				+                }
			
 
				+            }
			
 
				+            return $output;
			
 
				+        } else {
			
 
				+            return false;
			
 
				+        }
			
 
				+    }
			
 
				 }
			
 
				-
			
 
				-