Browse Source

:art: formatting

visuddhinanda 1 year ago
parent
commit
5ab5143438
1 changed files with 207 additions and 195 deletions
  1. 207 195
      api-v8/app/Tools/CaseMan.php

+ 207 - 195
api-v8/app/Tools/CaseMan.php

@@ -1,4 +1,5 @@
 <?php
+
 namespace App\Tools;
 
 use Illuminate\Support\Facades\Cache;
@@ -9,7 +10,7 @@ use App\Models\WordIndex;
 
 class CaseMan
 {
-	/**
+    /**
      * Create a new class instance.
      *
      * @return void
@@ -19,32 +20,41 @@ class CaseMan
         return;
     }
 
-    	/**
+    /**
      * 从词干到单词的变化
      *
      * @return void
      */
-	public function Declension($base,$type=null,$grammar='',$confidence=0.5){
+    public function Declension($base, $type = null, $grammar = '', $confidence = 0.5)
+    {
         $newWord = array();
         $case = new CaseEnding();
         foreach ($case->ending as  $ending) {
             # code...
-            if($ending[4]<$confidence){
+            if ($ending[4] < $confidence) {
                 continue;
             }
 
             switch ($type) {
                 case '.n:base.':
-                    if($ending[2] !== '.n.' || strpos($ending[3],$grammar)!==0){continue 2;}
+                    if ($ending[2] !== '.n.' || strpos($ending[3], $grammar) !== 0) {
+                        continue 2;
+                    }
                     break;
                 case '.ti:base.':
-                    if($ending[2] !== '.ti.' && $ending[2] !== '.n.' ){continue 2;}
+                    if ($ending[2] !== '.ti.' && $ending[2] !== '.n.') {
+                        continue 2;
+                    }
                     break;
                 case '.adj:base.':
-                    if($ending[2] !== '.ti.' && $ending[2] !== '.n.' ){continue 2;}
+                    if ($ending[2] !== '.ti.' && $ending[2] !== '.n.') {
+                        continue 2;
+                    }
                     break;
                 case '.v:base.':
-                    if($ending[2] !== '.v.'){continue 2;}
+                    if ($ending[2] !== '.v.') {
+                        continue 2;
+                    }
                     break;
                 default:
                     continue 2;
@@ -60,51 +70,55 @@ class CaseMan
                 //TODO 加两个sandhi
                 $hasSandhi = false;
                 foreach ($case->union as $sandhi) {
-                    $sandhiLen = mb_strlen($sandhi[0],'UTF-8');
+                    $sandhiLen = mb_strlen($sandhi[0], 'UTF-8');
                     $sandhiEnd = mb_substr($word, 0 - $sandhiLen, null, "UTF-8");
                     if ($sandhiEnd === $sandhi[0]) {
                         $sandhiWord = mb_substr($word, 0, mb_strlen($word, "UTF-8") - $sandhiLen, "UTF-8") . $sandhi[1];
-                        $count = WordIndex::where('word',$sandhiWord)->select(['count','bold'])->first();
-                        if($count){
+                        $count = WordIndex::where('word', $sandhiWord)->select(['count', 'bold'])->first();
+                        if ($count) {
                             $hasSandhi = true;
-                            $newWord[] = ['word'=>$sandhiWord,
-                                'ending'=>$ending[1],
-                                'type'=>'.un.',
-                                'grammar'=>'',
-                                'factors'=>"{$word}+{$sandhi[2]}",
-                                'count'=>$count->count,
-                                'bold'=>$count->bold
-                                ];
-                                //添加一个去掉ti的数据
-                            if($sandhi[2] === 'iti'){
-                                $newWord[] = ['word'=>mb_substr($sandhiWord,0,-2,'UTF-8'),
-                                    'ending'=>$ending[1],
-                                    'grammar'=>$ending[3],
-                                    'factors'=>"{$base}+[{$ending[1]}]",
-                                    'count'=>$count->count,
-                                    'bold'=>$count->bold
+                            $newWord[] = [
+                                'word' => $sandhiWord,
+                                'ending' => $ending[1],
+                                'type' => '.un.',
+                                'grammar' => '',
+                                'factors' => "{$word}+{$sandhi[2]}",
+                                'count' => $count->count,
+                                'bold' => $count->bold
+                            ];
+                            //添加一个去掉ti的数据
+                            if ($sandhi[2] === 'iti') {
+                                $newWord[] = [
+                                    'word' => mb_substr($sandhiWord, 0, -2, 'UTF-8'),
+                                    'ending' => $ending[1],
+                                    'grammar' => $ending[3],
+                                    'factors' => "{$base}+[{$ending[1]}]",
+                                    'count' => $count->count,
+                                    'bold' => $count->bold
                                 ];
                             }
                         }
                     }
                 }
-                $count = WordIndex::where('word',$word)->select(['count','bold'])->first();
-                if($count || $hasSandhi){
-                    $newWord[] = ['word'=>$word,
-                                  'ending'=>$ending[1],
-                                  'grammar'=>$ending[3],
-                                  'factors'=>"{$base}+[{$ending[1]}]",
-                                  'count'=>$count?$count->count:0,
-                                  'bold'=>$count?$count->bold:0
-                                ];
+                $count = WordIndex::where('word', $word)->select(['count', 'bold'])->first();
+                if ($count || $hasSandhi) {
+                    $newWord[] = [
+                        'word' => $word,
+                        'ending' => $ending[1],
+                        'grammar' => $ending[3],
+                        'factors' => "{$base}+[{$ending[1]}]",
+                        'count' => $count ? $count->count : 0,
+                        'bold' => $count ? $count->bold : 0
+                    ];
                 }
             }
         }
 
         return $newWord;
-	}
+    }
 
-    private function endingMatch($base,$ending,$array=null){
+    private function endingMatch($base, $ending, $array = null)
+    {
         $case = new CaseEnding();
         $output = array();
         $endingLen = mb_strlen($ending[0], "UTF-8");
@@ -112,16 +126,16 @@ class CaseMan
         if ($wordEnd === $ending[0]) {
             //匹配成功
             $word = mb_substr($base, 0, mb_strlen($base, "UTF-8") - $endingLen, "UTF-8") . $ending[1];
-            if(is_array($array)){
-                if(!isset($array[$word])){
-                    $count = WordIndex::where('word',$word)->select(['count','bold'])->first();
+            if (is_array($array)) {
+                if (!isset($array[$word])) {
+                    $count = WordIndex::where('word', $word)->select(['count', 'bold'])->first();
                 }
-            }else{
-                $count = WordIndex::where('word',$word)->select(['count','bold'])->first();
+            } else {
+                $count = WordIndex::where('word', $word)->select(['count', 'bold'])->first();
             }
-            if(isset($count) && $count){
-                $output[$word] = ["count"=>$count->count,"bold"=>$count->bold];
-            }else{
+            if (isset($count) && $count) {
+                $output[$word] = ["count" => $count->count, "bold" => $count->bold];
+            } else {
                 $output[$word] = false;
             }
 
@@ -132,16 +146,16 @@ class CaseMan
                 $sandhiEnd = mb_substr($word, 0 - $sandhiLen, null, "UTF-8");
                 if ($sandhiEnd === $sandhi[0]) {
                     $sandhiWord = mb_substr($word, 0, mb_strlen($word, "UTF-8") - $sandhiLen, "UTF-8") . $sandhi[1];
-                    if(is_array($array)){
-                        if(!isset($array[$sandhiWord])){
-                            $count = WordIndex::where('word',$sandhiWord)->select(['count','bold'])->first();
+                    if (is_array($array)) {
+                        if (!isset($array[$sandhiWord])) {
+                            $count = WordIndex::where('word', $sandhiWord)->select(['count', 'bold'])->first();
                         }
-                    }else{
-                        $count = WordIndex::where('word',$sandhiWord)->select(['count','bold'])->first();
+                    } else {
+                        $count = WordIndex::where('word', $sandhiWord)->select(['count', 'bold'])->first();
                     }
-                    if(isset($count) && $count){
-                        $output[$sandhiWord] = ["count"=>$count->count,"bold"=>$count->bold];
-                    }else{
+                    if (isset($count) && $count) {
+                        $output[$sandhiWord] = ["count" => $count->count, "bold" => $count->bold];
+                    } else {
                         $output[$sandhiWord] = false;
                     }
                 }
@@ -149,17 +163,18 @@ class CaseMan
         }
         return $output;
     }
-	/**
+    /**
      * 从词干到单词的变化
      *
      * @return void
      */
-	public function BaseToWord($base,$confidence=0.5){
+    public function BaseToWord($base, $confidence = 0.5)
+    {
         $newWord = array();
         $case = new CaseEnding();
         foreach ($case->ending as  $ending) {
             # code...
-            if($ending[4]<$confidence){
+            if ($ending[4] < $confidence) {
                 continue;
             }
             /*
@@ -174,171 +189,168 @@ class CaseMan
             if ($wordEnd === $ending[0]) {
                 //匹配成功
                 $word = mb_substr($base, 0, mb_strlen($base, "UTF-8") - $endingLen, "UTF-8") . $ending[1];
-                if(!isset($newWord[$word])){
-                    $count = WordIndex::where('word',$word)->select(['count','bold'])->first();
-                    if($count){
-                        $newWord[$word] = ["count"=>$count->count,"bold"=>$count->bold];
-                    }else{
+                if (!isset($newWord[$word])) {
+                    $count = WordIndex::where('word', $word)->select(['count', 'bold'])->first();
+                    if ($count) {
+                        $newWord[$word] = ["count" => $count->count, "bold" => $count->bold];
+                    } else {
                         $newWord[$word] = false;
                     }
                 }
                 //尝试sandhi
                 //TODO 加两个sandhi
                 foreach ($case->union as $sandhi) {
-                    $sandhiLen = mb_strlen($sandhi[0],'UTF-8');
+                    $sandhiLen = mb_strlen($sandhi[0], 'UTF-8');
                     $sandhiEnd = mb_substr($word, 0 - $sandhiLen, null, "UTF-8");
                     if ($sandhiEnd === $sandhi[0]) {
                         $sandhiWord = mb_substr($word, 0, mb_strlen($word, "UTF-8") - $sandhiLen, "UTF-8") . $sandhi[1];
-                        if(!isset($newWord[$sandhiWord])){
-                            $count = WordIndex::where('word',$sandhiWord)->select(['count','bold'])->first();
-                            if($count){
-                                $newWord[$sandhiWord] = ["count"=>$count->count,"bold"=>$count->bold];
-                            }else{
+                        if (!isset($newWord[$sandhiWord])) {
+                            $count = WordIndex::where('word', $sandhiWord)->select(['count', 'bold'])->first();
+                            if ($count) {
+                                $newWord[$sandhiWord] = ["count" => $count->count, "bold" => $count->bold];
+                            } else {
                                 $newWord[$sandhiWord] = false;
                             }
                         }
                     }
                 }
             }
-
         }
         $result = [];
         foreach ($newWord as $key => $value) {
             # code...
-            if($value !== false){
-                $result[] = ['word'=>$key,'ending',"count"=>$value["count"],"bold"=>$value["bold"]];
+            if ($value !== false) {
+                $result[] = ['word' => $key, 'ending', "count" => $value["count"], "bold" => $value["bold"]];
             }
         }
         return $result;
-	}
+    }
 
-	/**
+    /**
      * 从单词到词干的变化
      * 小蝌蚪找妈妈
-     * @return void
+     * @return array
      */
-	public function WordToBase($word,$deep=1,$verify=true){
-		$newWords = array();
-		$newBase = array();
-		$input[$word] = true;
-		$case = new CaseEnding();
-		for ($i=0; $i < $deep; $i++) {
-			# code...
-			foreach ($input as $currWord => $status) {
-				# code...
-				if($status){
-					$input[$currWord] = false;
-					foreach ($case->ending as  $ending) {
-						# code...
-                        if($ending[4] < 0.5){
+    public function WordToBase($word, $deep = 1, $verify = true)
+    {
+        $newWords = array();
+        $newBase = array();
+        $input[$word] = true;
+        $case = new CaseEnding();
+        for ($i = 0; $i < $deep; $i++) {
+            # code...
+            foreach ($input as $currWord => $status) {
+                # code...
+                if ($status) {
+                    $input[$currWord] = false;
+                    foreach ($case->ending as  $ending) {
+                        # code...
+                        if ($ending[4] < 0.5) {
                             continue;
                         }
-						$endingLen = mb_strlen($ending[1], "UTF-8");
-						$wordEnd = mb_substr($currWord, 0 - $endingLen, null, "UTF-8");
-						if ($wordEnd === $ending[1]) {
-							//匹配成功
-							$base = mb_substr($currWord, 0, mb_strlen($currWord, "UTF-8") - $endingLen, "UTF-8") . $ending[0];
-							if(!isset($newBase[$base])){
-								$newBase[$base] = array();
-							}
-							array_push($newBase[$base],[
-								'word'=>$currWord,
-								'type'=>$ending[2],
-								'grammar'=>$ending[3],
-								'parent'=>$base,
-								'factors'=>"{$base}+[{$ending[1]}]",
-								'confidence'=>$ending[4],
-							]);
-						}
-					}
-				}
-			}
-			foreach ($newBase as $currWord => $value) {
-				# 把新词加入列表
-				if(!isset($input[$currWord])){
-					$input[$currWord] = true;
-				}
-			}
-		}
-
-		if($verify){
-			$output = array();
-			foreach ($newBase as $base => $rows) {
-				# code...
-				if(($verify = $this->VerifyBase($base,$rows)) !== false){
-					if(count($verify)>0){
-						$output[$base] = $verify;
-					}
-				}
-			}
-			if(count($output)==0){
-				//如果验证失败 输出最可能的结果
-				$short = 10000;
-				$shortBase = "";
-				foreach ($newBase as $base => $rows) {
-					if(mb_strlen($base,"UTF-8") < $short){
-						$short = mb_strlen($base,"UTF-8");
-						$shortBase = $base;
-					}
-				}
-				foreach ($newBase as $base => $rows) {
-					if($base == $shortBase){
-						$output[$base] = $rows;
-					}
-				}
-			}
-			return $output;
-		}else{
-			return $newBase;
-		}
-
+                        $endingLen = mb_strlen($ending[1], "UTF-8");
+                        $wordEnd = mb_substr($currWord, 0 - $endingLen, null, "UTF-8");
+                        if ($wordEnd === $ending[1]) {
+                            //匹配成功
+                            $base = mb_substr($currWord, 0, mb_strlen($currWord, "UTF-8") - $endingLen, "UTF-8") . $ending[0];
+                            if (!isset($newBase[$base])) {
+                                $newBase[$base] = array();
+                            }
+                            array_push($newBase[$base], [
+                                'word' => $currWord,
+                                'type' => $ending[2],
+                                'grammar' => $ending[3],
+                                'parent' => $base,
+                                'factors' => "{$base}+[{$ending[1]}]",
+                                'confidence' => $ending[4],
+                            ]);
+                        }
+                    }
+                }
+            }
+            foreach ($newBase as $currWord => $value) {
+                # 把新词加入列表
+                if (!isset($input[$currWord])) {
+                    $input[$currWord] = true;
+                }
+            }
+        }
 
-	}
-	/**
-	 * 验证base在字典中是否存在
-	 */
-	public function VerifyBase($base,$rows){
-		#
-		$output = array();
-		$dictWords = UserDict::where('word',$base)->select(['type','grammar'])->groupBy(['type','grammar'])->get();
-		if(count($dictWords)>0){
-			$newBase[$base] = 1;
-			$case = array();
-			//字典中这个拼写的单词的语法信息
-			foreach ($dictWords as $value) {
-				# code...
-				$case["{$value->type}{$value->grammar}"] = 1;
-			}
-			foreach ($rows as $value) {
-				//根据输入的猜测的type,grammar拼接合理的 parent 语法信息
-				switch ($value['type']) {
-					case '.n.':
-						$parentType = '.n:base.';
-						break;
-					case '.ti.':
-						$parentType = '.ti:base.';
-						break;
-					case '.v.':
-						$parentType = '.v:base.';
-						break;
-					default:
-						$parentType = '';
-						break;
-				}
-				if(!empty($value['grammar']) && $value['type'] !== ".v."){
-					$arrGrammar = explode('$',$value['grammar']);
-					$parentType .=  $arrGrammar[0];
-				}
-				# 只保存语法信息合理的数据
-				if(isset($case[$parentType])){
-					array_push($output,$value);
-				}
-			}
-			return $output;
-		}else{
-			return false;
-		}
-	}
+        if ($verify) {
+            $output = array();
+            foreach ($newBase as $base => $rows) {
+                # code...
+                if (($verify = $this->VerifyBase($base, $rows)) !== false) {
+                    if (count($verify) > 0) {
+                        $output[$base] = $verify;
+                    }
+                }
+            }
+            if (count($output) == 0) {
+                //如果验证失败 输出最可能的结果
+                $short = 10000;
+                $shortBase = "";
+                foreach ($newBase as $base => $rows) {
+                    if (mb_strlen($base, "UTF-8") < $short) {
+                        $short = mb_strlen($base, "UTF-8");
+                        $shortBase = $base;
+                    }
+                }
+                foreach ($newBase as $base => $rows) {
+                    if ($base == $shortBase) {
+                        $output[$base] = $rows;
+                    }
+                }
+            }
+            return $output;
+        } else {
+            return $newBase;
+        }
+    }
+    /**
+     * 验证base在字典中是否存在
+     */
+    public function VerifyBase($base, $rows)
+    {
+        #
+        $output = array();
+        $dictWords = UserDict::where('word', $base)->select(['type', 'grammar'])->groupBy(['type', 'grammar'])->get();
+        if (count($dictWords) > 0) {
+            $newBase[$base] = 1;
+            $case = array();
+            //字典中这个拼写的单词的语法信息
+            foreach ($dictWords as $value) {
+                # code...
+                $case["{$value->type}{$value->grammar}"] = 1;
+            }
+            foreach ($rows as $value) {
+                //根据输入的猜测的type,grammar拼接合理的 parent 语法信息
+                switch ($value['type']) {
+                    case '.n.':
+                        $parentType = '.n:base.';
+                        break;
+                    case '.ti.':
+                        $parentType = '.ti:base.';
+                        break;
+                    case '.v.':
+                        $parentType = '.v:base.';
+                        break;
+                    default:
+                        $parentType = '';
+                        break;
+                }
+                if (!empty($value['grammar']) && $value['type'] !== ".v.") {
+                    $arrGrammar = explode('$', $value['grammar']);
+                    $parentType .=  $arrGrammar[0];
+                }
+                # 只保存语法信息合理的数据
+                if (isset($case[$parentType])) {
+                    array_push($output, $value);
+                }
+            }
+            return $output;
+        } else {
+            return false;
+        }
+    }
 }
-
-