Преглед изворни кода

格位自动计算增加有效性判定

visuddhinanda пре 3 година
родитељ
комит
8b411b2e88
3 измењених фајлова са 39 додато и 16 уклоњено
  1. 2 5
      app/Http/Controllers/WbwLookupController.php
  2. 19 1
      app/Tools/CaseMan.php
  3. 18 10
      app/Tools/TurboSplit.php

+ 2 - 5
app/Http/Controllers/WbwLookupController.php

@@ -73,11 +73,8 @@ class WbwLookupController extends Controller
 						$newBase = array();
 						$parents = $caseman->WordToBase($word);
 						foreach ($parents as $base => $rows) {
-							# 只保存语法信息合理的数据
-							if(count($rows)>0){
-								Log::info("found:{$value['type']}-{$value['grammar']}-{$value['parent']}");
-								array_push($output,$rows);
-							}
+							Log::info("found:{$value['type']}-{$value['grammar']}-{$value['parent']}");
+							array_push($output,$rows);
 						}
 						Log::info("去尾查结束");
 					}

+ 19 - 1
app/Tools/CaseMan.php

@@ -78,7 +78,25 @@ class CaseMan
 			foreach ($newBase as $base => $rows) {
 				# code...
 				if(($verify = $this->VerifyBase($base,$rows)) !== false){
-					$output[$base] = $verify;
+					if(count($verify)>0){
+						$output[$base] = $verify;
+					}
+				}
+			}
+			if(count($output)==0){
+				//如果验证失败 输出最可能的结果
+				$short = 10000;
+				$shortBase = "";
+				foreach ($newBase as $base => $rows) {
+					if(mb_strlen($base,"UTF-8") < $short){
+						$short = mb_strlen($base,"UTF-8");
+						$shortBase = $base;
+					}
+				}
+				foreach ($newBase as $base => $rows) {
+					if($base == $shortBase){
+						$output[$base] = $rows;
+					}
 				}
 			}
 			return $output;

+ 18 - 10
app/Tools/TurboSplit.php

@@ -17,8 +17,9 @@ class TurboSplit
 	protected $currPathCf;
 	//结果数组
 	protected $result = array();
-	//最大结果数量
+	//过程中最大结果数量
 	protected $MAX_RESULT = 100;
+	//返回值最大结果数量
 	protected $MAX_RESULT2 = 5;
 	//最大递归深度
 	protected $MAX_DEEP = 16;
@@ -56,12 +57,12 @@ class TurboSplit
 		["a" => "[ṃ]", "b" => "iti", "c" => "nti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
 		["a" => "[ṃ]", "b" => "a", "c" => "ma", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
 		["a" => "ṃ", "b" => "a", "c" => "m", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
-		["a" => "[ṃ]", "b" => "ā", "c" => "mā", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
-		["a" => "[ṃ]", "b" => "u", "c" => "mu", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
-		["a" => "[ṃ]", "b" => "h", "c" => "ñh", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
-		["a" => "ā", "b" => "[ṃ]", "c" => "am", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
-		["a" => "a", "b" => "[ṃ]", "c" => "am", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
-		["a" => "ī", "b" => "[ṃ]", "c" => "im", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
+		["a" => "[ṃ]", "b" => "ā", "c" => "mā", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
+		["a" => "[ṃ]", "b" => "u", "c" => "mu", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
+		["a" => "[ṃ]", "b" => "h", "c" => "ñh", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
+		["a" => "ā", "b" => "[ṃ]", "c" => "am", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
+		["a" => "a", "b" => "[ṃ]", "c" => "am", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
+		["a" => "ī", "b" => "[ṃ]", "c" => "im", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
 		["a" => "ati", "b" => "tabba", "c" => "atabba", "len" => 6, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
 		["a" => "ati", "b" => "tabba", "c" => "itabba", "len" => 6, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
 		["a" => "iti", "b" => "a", "c" => "icca", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
@@ -614,7 +615,14 @@ class TurboSplit
 				arsort($this->result); //按信心指数排序
 				$iCount = 0;
 				foreach ($this->result as $row => $value) {
-					$newword = ['word'=>$oneword,'type'=>'','grammar'=>'','parent'=>'','factors'=>$row,'confidence'=>$value];
+					$factors = $row;
+					if(strpos($row,'[') !== FALSE){
+						$type = '.un.';
+						$factors = \str_replace(['+[ṃ]+','[ṃ]+'],'ṃ+',$row);
+					}else{
+						$type = '.cp.';
+					}
+					$newword = ['word'=>$oneword,'type'=>$type,'grammar'=>'','parent'=>'','factors'=>$factors,'confidence'=>$value];
 
 					if($iCount==0){
 						//后处理 找到base
@@ -669,8 +677,8 @@ class TurboSplit
 					//后处理 进一步切分没有意思的长词
 					Log::info("后处理 进一步切分没有意思的长词");
 					$new = $this->split2($row);
-					if($new!==$row){
-						$newword = ['word'=>$oneword,'type'=>'','grammar'=>'','parent'=>'','factors'=>$row,'confidence'=>$value];
+					if($new !== $row){
+						$newword = ['word'=>$oneword,'type'=>$type,'grammar'=>'','parent'=>'','factors'=>$new,'confidence'=>$value];
 						array_push($output,$newword);
 						#再处理一次
 						$new2 = split2($new);