Просмотр исходного кода

拆分失败的,找可能的base

visuddhinanda 2 лет назад
Родитель
Сommit
78fb543728
1 измененных файлов с 20 добавлено и 1 удалено
  1. 20 1
      app/Tools/TurboSplit.php

+ 20 - 1
app/Tools/TurboSplit.php

@@ -567,6 +567,7 @@ class TurboSplit
 	}
 
 	public function splitA($word){
+        $caseman = new CaseMan();
 		$output = array();
 		//预处理连音词
 		$word1 = $this->splitSandhi($word);
@@ -578,6 +579,9 @@ class TurboSplit
 		}
 
 		foreach ($arrword as $oneword) {
+            if(mb_strlen($oneword)<5){
+                continue;
+            }
 			$this->result = array(); //清空递归程序的输出容器
             $node = ['word'=>"",'remain'=>$oneword,'children'=>[]];
 			if(mb_strlen($oneword)>35){
@@ -635,7 +639,7 @@ class TurboSplit
                             }
                         }
 						$this->log("结尾词:".$endOfFactor);
-						$caseman = new CaseMan();
+
 						//猜测单词的base
 						$parents = $caseman->WordToBase($oneword,1,false);
 						//找到结尾单词的base
@@ -700,6 +704,21 @@ class TurboSplit
 				}
 			} else {
                 $this->log("{$oneword} 切分失败");
+                $this->log("猜测可能的格位");
+                //猜测单词的base
+				$wordWithType = ['word'=>$oneword,'type'=>'','grammar'=>'','parent'=>'','factors'=>'','confidence'=>0];
+                $parents = $caseman->WordToBase($oneword,1,false);
+                foreach ($parents as $base=>$case) {
+                    foreach ($case as $value) {
+                        $wordWithType['type'] = $value['type'];
+                        $wordWithType['grammar'] = $value['grammar'];
+                        $wordWithType['factors'] = $value['factors'];
+                        $wordWithType['parent'] = $base;
+                        $wordWithType['confidence'] = $value['confidence'];
+                        $this->log("word:{$wordWithType['word']} ; type:{$wordWithType['type']}; grammar:{$wordWithType['grammar']};parent:{$wordWithType['parent']}");
+                        array_push($output,$wordWithType);
+                    }
+                }
 			}
 		}
 		return $output;