3 jaren geleden · 79e40f5aa5
--- a/app/Tools/TurboSplit.php
+++ b/app/Tools/TurboSplit.php
@@ -15,11 +15,8 @@ class TurboSplit
 
				 	protected $isDebug = false;
			
 
				 	#当前搜索路径信心指数，如果过低，马上终止这个路径的搜索
			
 
				 	protected $currPathCf;
			
 
				-	#内存信心指数表
			
 
				-	protected $confidence = array();
			
 
				 	//结果数组
			
 
				 	protected $result = array();
			
 
				-	protected $part = array();
			
 
				 	//最大结果数量
			
 
				 	protected $MAX_RESULT = 100;
			
 
				 	protected $MAX_RESULT2 = 5;
			
@@ -224,41 +221,27 @@ class TurboSplit
 
				 		$isFound = false;
			
 
				 		$count = 0;
			
 
				 		$cacheKey = "turbosplit/part/";
			
 
				-		if (isset($this->part["{$word}"])) {
			
 
				-			$word_count = $this->part["{$word}"][0];
			
 
				-			$case_len = $this->part["{$word}"][1];
			
 
				-			if ($word_count > 0) {
			
 
				-				$this->log("查到：{$word}:{$word_count}个");
			
 
				-				$isFound = true;
			
 
				-				$count = $word_count + 1;
			
 
				-			}
			
 
				-		} else {
			
 
				-			$db = $this->dict_lookup($word);
			
 
				-			$word_count = $db[0];
			
 
				-			$case_len = $db[1];
			
 
				-			//加入查询缓存
			
 
				-			$this->part["{$word}"] = $db;
			
 
				-			if ($word_count > 0) {
			
 
				-				Log::info("查到：{$word}:{$word_count}个");
			
 
				-				$isFound = true;
			
 
				-				$count = $word_count + 1;
			
 
				-			}
			
 
				-		}
			
 
				+		$wordPart  = Cache::remember($cacheKey.$word,1000,function() use($word){
			
 
				+			return implode(',',$this->dict_lookup($word));
			
 
				+		});
			
 
				+		$arrWordPart = explode(',',$wordPart);
			
 
				+		$word_count = $arrWordPart[0];
			
 
				+		$case_len = $arrWordPart[1];
			
 
				+		if ($word_count > 0) {
			
 
				+			$this->log("查到：{$word}:{$word_count}个");
			
 
				+			$isFound = true;
			
 
				+			$count = $word_count + 1;
			
 
				+		}		
			
 
				+
			
 
				 		//fomular of confidence value 信心值计算公式
			
 
				 		if ($isFound) {
			
 
				-			if (isset($this->confidence["{$word}"])) {
			
 
				-				$cf = $this->confidence["{$word}"];
			
 
				-			} else {
			
 
				+			$cf  = Cache::remember("turbosplit/confidence/".$word,1000,function() use($word,$count,$case_len){
			
 
				 				$len = mb_strlen($word, "UTF-8") - $case_len;
			
 
				 				$len_correct = 1.2;
			
 
				 				$count2 = 1.1 + pow($count, 1.18);
			
 
				 				$conf_num = pow(1 / $count2, pow(($len - 0.5), $len_correct));
			
 
				-				$cf = round(1 / (1 + 640 * $conf_num), 9);
			
 
				-				//$cf = round((1-0.02*$case_len) / (1 + 640 * $conf_num), 9);
			
 
				-				$this->confidence["{$word}"] = $cf;
			
 
				-				Log::info("信心指数：{$word}:{$cf}");
			
 
				-
			
 
				-			}
			
 
				+				return round(1 / (1 + 640 * $conf_num), 9);
			
 
				+			});
			
 
				 			return ($cf);
			
 
				 		} else {
			
 
				 			return (-1);
			
@@ -314,12 +297,12 @@ class TurboSplit
 
				 		//直接找到
			
 
				 		$confidence = isExsit($strWord, $adj_len);
			
 
				 		if ($confidence > $c_threshhold) {
			
 
				-			$output[] = array($strWord, "", $confidence);
			
 
				+			array_push($output, array($strWord, "", $confidence));
			
 
				 		} 
			
 
				 		else {
			
 
				 			$confidence = isExsit("[" . $strWord . "]");
			
 
				 			if ($confidence > $c_threshhold) {
			
 
				-				$output[] = array("[" . $strWord . "]", "", $confidence);
			
 
				+				array_push($output, array("[" . $strWord . "]", "", $confidence));
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -349,7 +332,7 @@ class TurboSplit
 
				 							$confidence = isExsit($str1, $adj_len)*$row["cf"];
			
 
				 							if ($confidence > $c_threshhold) {
			
 
				 								//信心指数大于预设的阈值，插入
			
 
				-								$output[] = array($str1, $str2, $confidence, $row["adj_len"]);
			
 
				+								array_push($output, array($str1, $str2, $confidence, $row["adj_len"]));
			
 
				 								$this->log("插入结构数组：{$str1} 剩余{$str2} 应用：{$row["a"]}-{$row["b"]}-{$row["c"]}");
			
 
				 								if ($express) {
			
 
				 									break;
			
@@ -370,7 +353,7 @@ class TurboSplit
 
				 							$str2 = $row["b"] . mb_substr($strWord, $i + $row["len"], null, "UTF-8");
			
 
				 							$confidence = isExsit($str2, $adj_len)*$row["cf"];
			
 
				 							if ($confidence > $c_threshhold) {
			
 
				-								$output[] = array($str2, $str1, $confidence, $row["adj_len"]);
			
 
				+								array_push($output, array($str2, $str1, $confidence, $row["adj_len"]));
			
 
				 								$this->log("将此次结果插入结果数组：剩余={$str2}");
			
 
				 								if ($express) {
			
 
				 									break;
			
@@ -516,11 +499,11 @@ class TurboSplit
 
				 			}
			
 
				 			if(mb_strlen($word,"UTF-8")>4){
			
 
				 				# 先看有没有中文意思
			
 
				-				Log::info("先看有没有中文意思");
			
 
				+				//Log::info("先看有没有中文意思");
			
 
				 				if(UserDict::where('word',$word)->where('mean','<>','')->where('language','<>','my')->exists()){
			
 
				 					$newword[]=$word;
			
 
				 				}else{
			
 
				-					Log::info("如果没有查巴缅替换拆分");
			
 
				+					//Log::info("如果没有查巴缅替换拆分");
			
 
				 					#如果没有查巴缅替换拆分
			
 
				 					if(UserDict::where('word',$word)->where('dict_id','61f23efb-b526-4a8e-999e-076965034e60')->exists()){
			
 
				 						$pmPart = explode("+",UserDict::where('word',$word)->where('dict_id','61f23efb-b526-4a8e-999e-076965034e60')->value('factors')) ;
			
@@ -530,12 +513,12 @@ class TurboSplit
 
				 						}
			
 
				 					}
			
 
				 					else{
			
 
				-						Log::info("如果没有查规则变形");
			
 
				+						//Log::info("如果没有查规则变形");
			
 
				 						#如果没有查规则变形
			
 
				 						if(UserDict::where('word',$word)->where('source','_SYS_REGULAR_')->exists()){
			
 
				 							$rglPart = explode("+",UserDict::where('word',$word)->where('source','_SYS_REGULAR_')->value('factors')) ;
			
 
				 							#看巴缅有没有第一部分
			
 
				-							Log::info("看巴缅有没有第一部分");
			
 
				+							//Log::info("看巴缅有没有第一部分");
			
 
				 							if(UserDict::where('word',$rglPart[0])->where('dict_id','61f23efb-b526-4a8e-999e-076965034e60')->exists()){
			
 
				 								$pmPart = explode("+",UserDict::where('word',$rglPart[0])->where('dict_id','61f23efb-b526-4a8e-999e-076965034e60')->value('factors')) ;
			
 
				 								foreach ($pmPart as  $pm) {
			
@@ -551,7 +534,7 @@ class TurboSplit
 
				 						}
			
 
				 						else{
			
 
				 							#还没有就认命了
			
 
				-							Log::info("还没有就认命了");
			
 
				+							//Log::info("还没有就认命了");
			
 
				 							$newword[]=$word;
			
 
				 						}
			
 
				 					}
			
@@ -584,7 +567,7 @@ class TurboSplit
 
				 					$newWord = $word2 . "-" .$newWord;
			
 
				 					$firstWord = $word1;
			
 
				 					$isFound=true;
			
 
				-				break;
			
 
				+					break;
			
 
				 				}
			
 
				 			}
			
 
				 		} while ($isFound);
			
@@ -595,13 +578,12 @@ class TurboSplit
 
				 	public function splitA($word){
			
 
				 		$output = array();
			
 
				 		//预处理连音词
			
 
				-		$word = $this->splitSandhi($word);
			
 
				-
			
 
				+		$word1 = $this->splitSandhi($word);
			
 
				 		# 处理双元音
			
 
				 		Log::info("处理双元音");
			
 
				-		$arrword = $this->splitDiphthong($word);
			
 
				+		$arrword = $this->splitDiphthong($word1);
			
 
				 		if (count($arrword) > 1) {
			
 
				-			array_push($output,['word'=>$word,'factors'=>implode("+", $arrword),'confidence'=>0.9999]);
			
 
				+			array_push($output,['word'=>$word,'type'=>'.un.','factors'=>implode("+", $arrword),'confidence'=>0.9999]);
			
 
				 		}
			
 
				 
			
 
				 		foreach ($arrword as $oneword) {