пре 3 година · 6c75cf283a
--- a/app/Console/Commands/UpgradeCompound.php
+++ b/app/Console/Commands/UpgradeCompound.php
@@ -16,7 +16,7 @@ class UpgradeCompound extends Command
 
															      *
														
 
															      * @var string
														
 
															      */
														
 
															-    protected $signature = 'upgrade:compound {word?} {--test}';
														
 
															+    protected $signature = 'upgrade:compound {word?} {--debug} {--test}';
														
 
															     /**
														
 
															      * The console command description.
														
@@ -55,11 +55,16 @@ class UpgradeCompound extends Command
 
															 		$_word = $this->argument('word');
														
 
															 		if(!empty($_word)){
														
 
															 			$ts = new TurboSplit();
														
 
															+            if($this->option('debug')){
														
 
															+                $ts->debug(true);
														
 
															+            }
														
 
															 			$results = $ts->splitA($_word);
														
 
															 			Storage::disk('local')->put("tmp/compound1.csv", "word,type,grammar,parent,factors");
														
 
															 			foreach ($results as $key => $value) {
														
 
															 				# code...
														
 
															-				Storage::disk('local')->append("tmp/compound1.csv", "{$value['word']},{$value['type']},{$value['grammar']},{$value['parent']},{$value['factors']}");
														
 
															+                $output = "{$value['word']},{$value['type']},{$value['grammar']},{$value['parent']},{$value['factors']}";
														
 
															+                $this->info($output);
														
 
															+				Storage::disk('local')->append("tmp/compound1.csv", $output);
														
 
															 			}
														
 
															 			return 0;
														
 
															 		}
														
@@ -85,8 +90,9 @@ class UpgradeCompound extends Command
 
															 					$parts = $ts->splitA($word->word);
														
 
															 					foreach ($parts as $part) {
														
 
															 						# code...
														
 
															-						$this->info("{$part['word']},{$part['factors']},{$part['confidence']}");
														
 
															-						Storage::disk('local')->append("tmp/compound.md", "- `{$part['word']}`,{$part['factors']},{$part['confidence']}");
														
 
															+                        $info = "`{$part['word']}`,{$part['factors']},{$part['confidence']}";
														
 
															+						$this->info($info);
														
 
															+						Storage::disk('local')->append("tmp/compound.md", "- {$info}");
														
 
															 					}
														
 
															 				}
														
 
															 			}
														
--- a/app/Tools/TurboSplit.php
+++ b/app/Tools/TurboSplit.php
@@ -11,6 +11,7 @@ use Illuminate\Support\Arr;
 
															 class TurboSplit
														
 
															 {
														
 
															+    protected $node = [];
														
 
															 	protected $path = array();
														
 
															 	protected $isDebug = false;
														
 
															 	#当前搜索路径信心指数，如果过低，马上终止这个路径的搜索
														
@@ -58,6 +59,7 @@ class TurboSplit
 
															 		["a" => "[ṃ]", "b" => "a", "c" => "ma", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
														
 
															 		["a" => "ṃ", "b" => "a", "c" => "m", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>0.9999],
														
 
															 		["a" => "[ṃ]", "b" => "ā", "c" => "mā", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
														
 
															+		["a" => "ṃ", "b" => "ā", "c" => "mā", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.9],
														
 
															 		["a" => "[ṃ]", "b" => "u", "c" => "mu", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
														
 
															 		["a" => "[ṃ]", "b" => "h", "c" => "ñh", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
														
 
															 		["a" => "ā", "b" => "[ṃ]", "c" => "am", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>0.8],
														
@@ -130,7 +132,7 @@ class TurboSplit
 
															 		for($i=0;$i<$this->MAX_DEEP;$i++ ){
														
 
															 			array_push($this->path, array("", 0));
														
 
															 		}
														
 
															-		
														
 
															+
														
 
															         return;
														
 
															     }
														
@@ -163,16 +165,20 @@ class TurboSplit
 
															 		if (strlen($word) <= 1) {
														
 
															 			return array(0,0);
														
 
															 		}
														
 
															+        $search = $word;
														
 
															 		//去掉单词首尾的 []
														
 
															-		if(mb_substr($word,0,1) !== "["){
														
 
															+		/*
														
 
															+        if(mb_substr($word,0,1) !== "[")
														
 
															+        {
														
 
															 			$search = $word;
														
 
															 		}
														
 
															 		else{
														
 
															 			$search = str_replace("[","",$word);
														
 
															-			$search = str_replace("]","",$search);		
														
 
															+			$search = str_replace("]","",$search);
														
 
															 		}
														
 
															+        */
														
 
															 		//获取单词权重
														
 
															-		$row = Cache::remember('palicanon/wordpart/weight/'.$search, 100 , function() use($search) {
														
 
															+		$row = Cache::remember('palicanon/wordpart/weight/'.$search, 10 , function() use($search) {
														
 
															 			return WordPart::where('word',$search)->value('weight');
														
 
															 		});
														
 
															 		if ($row) {
														
@@ -195,7 +201,7 @@ class TurboSplit
 
															 			$base_weight = 0;
														
 
															 			$len = 0;
														
 
															 			foreach ($newWord as $x => $x_value) {
														
 
															-				$row = Cache::remember('palicanon/wordpart/weight/'.$search, 100 , function() use($x) {
														
 
															+				$row = Cache::remember('palicanon/wordpart/weight/'.$search, 10 , function() use($x) {
														
 
															 					return WordPart::where('word',$x)->value('weight');
														
 
															 				});
														
 
															 				if ($row) {
														
@@ -214,17 +220,16 @@ class TurboSplit
 
															 	 * 返回信心指数
														
 
															 	 * look up single word in dictionary vocabulary
														
 
															 	 * return the confidence value
														
 
															-	 * 
														
 
															-	 * 
														
 
															-	 * 
														
 
															+	 *
														
 
															+	 *
														
 
															+	 *
														
 
															 	 */
														
 
															 	public function isExsit($word, $adj_len = 0){
														
 
															 		$this->log("正在查询：{$word}");
														
 
															 		$isFound = false;
														
 
															 		$count = 0;
														
 
															-		$cacheKey = "turbosplit/part/";
														
 
															-		$wordPart  = Cache::remember($cacheKey.$word,1000,function() use($word){
														
 
															+		$wordPart  = Cache::remember("turbosplit/part/{$word}",10,function() use($word){
														
 
															 			return implode(',',$this->dict_lookup($word));
														
 
															 		});
														
 
															 		$arrWordPart = explode(',',$wordPart);
														
@@ -234,7 +239,7 @@ class TurboSplit
 
															 			$this->log("查到：{$word}:{$word_count}个");
														
 
															 			$isFound = true;
														
 
															 			$count = $word_count + 1;
														
 
															-		}		
														
 
															+		}
														
 
															 		//fomular of confidence value 信心值计算公式
														
 
															 		if ($isFound) {
														
@@ -251,27 +256,38 @@ class TurboSplit
 
															 		}
														
 
															 	}
														
 
															+    /**
														
 
															+     * word
														
 
															+     * cf
														
 
															+     * children[]
														
 
															+     */
														
 
															 	/**
														
 
															 	 * 核心拆分函数
														
 
															 	 * $strWord, word to be look up 要查询的词
														
 
															 	 * $deep, 当前递归深度
														
 
															+     * $forward 搜索方向
														
 
															+     *   true 正向
														
 
															+     *   false 反向
														
 
															 	 * $express=true, 快速查询
														
 
															 	 * $adj_len=0 长度校正系数
														
 
															 	 * $c_threshhold 信心指数阈值
														
 
															-	 * 
														
 
															-	 * 
														
 
															-	 * 
														
 
															+	 *
														
 
															+	 *
														
 
															+	 *
														
 
															 	 */
														
 
															-	function split($strWord, $deep = 0, $express = false, $adj_len = 0, $c_threshhold = 0.8, $w_threshhold = 0.8, $forward = true, $sandhi_advance = false)
														
 
															+	function split(&$node, $deep = 0, $express = false, $adj_len = 0, $c_threshhold = 0.8, $w_threshhold = 0.8, $forward = true, $sandhi_advance = false)
														
 
															 	{
														
 
															+        $strWord = $node["remain"];
														
 
															 		$this->log("spliting word={$strWord} deep={$deep}");
														
 
															 		$output = array();
														
 
															-		#当前搜索路径信心指数，如果过低，马上终止这个路径的搜索
														
 
															+		#currPathCf是当前搜索路径信心指数，如果过低，马上终止这个路径的搜索
														
 
															 		if($deep == 0){
														
 
															 			$this->currPathCf = 1;
														
 
															 		}
														
 
															 		//达到最大搜索深度，返回
														
 
															 		if ($deep >= $this->MAX_DEEP) {
														
 
															+            return ;
														
 
															+            /*
														
 
															 			$word = "";
														
 
															 			$cf = 1.0;
														
 
															 			for ($i = 0; $i < $deep; $i++) {
														
@@ -295,17 +311,37 @@ class TurboSplit
 
															 				$this->result[$reverseWord] = $cf;
														
 
															 				return 0;
														
 
															 			}
														
 
															-			
														
 
															+*/
														
 
															 		}
														
 
															 		//直接找到
														
 
															+
														
 
															 		$confidence = $this->isExsit($strWord, $adj_len);
														
 
															 		if ($confidence > $c_threshhold) {
														
 
															 			array_push($output, array($strWord, "", $confidence));
														
 
															-		} 
														
 
															-		else {
														
 
															-			$confidence = $this->isExsit("[" . $strWord . "]");
														
 
															+            if(isset($node['sum_cf'])){
														
 
															+                $parent_sum_cf = $node['sum_cf'];
														
 
															+            }else{
														
 
															+                $parent_sum_cf = 1;
														
 
															+            }
														
 
															+            $sum_cf = $parent_sum_cf * $confidence;
														
 
															+            $node['children'][] = ['word'=>$strWord,'remain'=>"",'cf'=>$confidence,"sum_cf"=>$sum_cf];
														
 
															+            $this->log("直接找到{$strWord}-{$confidence}");
														
 
															+		}
														
 
															+		else if(mb_strlen($strWord,"UTF-8")<6){
														
 
															+            //按照语尾查询
														
 
															+            $search = "[{$strWord}]";
														
 
															+			$confidence = $this->isExsit($search);
														
 
															+            $this->log("查询:{$search}-信心指数{$confidence}");
														
 
															 			if ($confidence > $c_threshhold) {
														
 
															-				array_push($output, array("[" . $strWord . "]", "", $confidence));
														
 
															+				array_push($output, array($search, "", $confidence));
														
 
															+                if(isset($node['sum_cf'])){
														
 
															+                    $parent_sum_cf = $node['sum_cf'];
														
 
															+                }else{
														
 
															+                    $parent_sum_cf = 1;
														
 
															+                }
														
 
															+                $sum_cf = $parent_sum_cf * $confidence;
														
 
															+                $node['children'][] = ['word'=>$search,'remain'=>"",'cf'=>$confidence,"sum_cf"=>$sum_cf];
														
 
															+                $this->log("直接找到{$strWord}-{$confidence}");
														
 
															 			}
														
 
															 		}
														
@@ -324,8 +360,8 @@ class TurboSplit
 
															 				#正向切
														
 
															 				$this->log("正向切");
														
 
															 				for ($i = $len; $i > 1; $i--) {
														
 
															+                    //应用连音规则切分单词
														
 
															 					foreach ($this->sandhi as $key => $row) {
														
 
															-						//应用连音规则切分单词
														
 
															 						if ($sandhi_advance == false && $row["advance"] == true) {
														
 
															 							//continue;
														
 
															 						}
														
@@ -336,10 +372,27 @@ class TurboSplit
 
															 							if ($confidence > $c_threshhold) {
														
 
															 								//信心指数大于预设的阈值，插入
														
 
															 								array_push($output, array($str1, $str2, $confidence, $row["adj_len"]));
														
 
															+                                if(isset($node['sum_cf'])){
														
 
															+                                    $parent_sum_cf = $node['sum_cf'];
														
 
															+                                }else{
														
 
															+                                    $parent_sum_cf = 1;
														
 
															+                                }
														
 
															+                                $sum_cf = $parent_sum_cf * $confidence;
														
 
															+                                if($sum_cf>$c_threshhold){
														
 
															+                                    $node['children'][] = [
														
 
															+                                        'word'=>$str1,
														
 
															+                                        'remain'=>$str2,
														
 
															+                                        'cf'=>$confidence,
														
 
															+                                        'sum_cf'=>$sum_cf,
														
 
															+                                        'children'=>[]
														
 
															+                                    ];
														
 
															+                                }
														
 
															+
														
 
															 								$this->log("插入结构数组：{$str1} 剩余{$str2} 应用：{$row["a"]}-{$row["b"]}-{$row["c"]}");
														
 
															 								if ($express) {
														
 
															 									break;
														
 
															 								}
														
 
															+
														
 
															 							}
														
 
															 						}
														
 
															 					}
														
@@ -357,6 +410,21 @@ class TurboSplit
 
															 							$confidence = $this->isExsit($str2, $adj_len)*$row["cf"];
														
 
															 							if ($confidence > $c_threshhold) {
														
 
															 								array_push($output, array($str2, $str1, $confidence, $row["adj_len"]));
														
 
															+                                if(isset($node['sum_cf'])){
														
 
															+                                    $parent_sum_cf = $node['sum_cf'];
														
 
															+                                }else{
														
 
															+                                    $parent_sum_cf = 1;
														
 
															+                                }
														
 
															+                                $sum_cf = $parent_sum_cf * $confidence;
														
 
															+                                if($sum_cf>$c_threshhold){
														
 
															+                                    $node['children'][] = [
														
 
															+                                        'word'=>$str2,
														
 
															+                                        'remain'=>$str1,
														
 
															+                                        'cf'=>$confidence,
														
 
															+                                        'sum_cf'=>$sum_cf,
														
 
															+                                        'children'=>[],
														
 
															+                                    ];
														
 
															+                                }
														
 
															 								$this->log("将此次结果插入结果数组：剩余={$str2}");
														
 
															 								if ($express) {
														
 
															 									break;
														
@@ -367,9 +435,18 @@ class TurboSplit
 
															 				}
														
 
															 			}
														
 
															 		}
														
 
															-		
														
 
															+
														
 
															 		$word = "";
														
 
															 		$this->log("结果数组个数：".count($output));
														
 
															+        //print_r($node);
														
 
															+        //遍历children
														
 
															+        foreach ($node['children'] as $key => $child) {
														
 
															+            # code...
														
 
															+            if(isset($child) && !empty($child['remain'])){
														
 
															+                $this->split($node['children'][$key], ($deep + 1), $express, $adj_len, $c_threshhold, $w_threshhold, $forward, $sandhi_advance);
														
 
															+            }
														
 
															+        }
														
 
															+        /*
														
 
															 		if (count($output) > 0) {
														
 
															 			foreach ($output as $part) {
														
 
															 				$checked = $part[0];
														
@@ -409,6 +486,7 @@ class TurboSplit
 
															 						}
														
 
															 					}
														
 
															 				} else {
														
 
															+                    //还有剩余部分
														
 
															 					#计算当前信心指数
														
 
															 					$cf = 1.0;
														
 
															 					for ($i = 0; $i < $deep; $i++) {
														
@@ -421,7 +499,7 @@ class TurboSplit
 
															 					}else{
														
 
															 						#接着切
														
 
															 						$this->log("接着切:{$remainder}");
														
 
															-						$this->split($remainder, ($deep + 1), $express, $adj_len, $c_threshhold, $w_threshhold, $forward, $sandhi_advance);					
														
 
															+						$this->split($remainder, ($deep + 1), $express, $adj_len, $c_threshhold, $w_threshhold, $forward, $sandhi_advance);
														
 
															 					}
														
 
															 				}
														
 
															 			}
														
@@ -438,15 +516,15 @@ class TurboSplit
 
															 				$word .= "+";
														
 
															 				$cf = $cf * $this->path[$i][1];
														
 
															 			}
														
 
															-			
														
 
															+
														
 
															 			$len = pow(mb_strlen($strWord, "UTF-8"), 3);
														
 
															-			
														
 
															+
														
 
															 			if ($forward) {
														
 
															 				$cf =(1-$cf) * $len / ($len + 150);
														
 
															 			} else {
														
 
															 				$cf =(1-$cf) * $len / ($len + 5);
														
 
															 			}
														
 
															-			
														
 
															+
														
 
															 			if ($this->isDebug) {
														
 
															 				$word = $word.$strWord . "(0)";
														
 
															 			} else {
														
@@ -457,7 +535,7 @@ class TurboSplit
 
															 				if ($forward == true) {
														
 
															 					$this->result[$word] = $cf;
														
 
															 					return 0;
														
 
															-				} 
														
 
															+				}
														
 
															 				else {
														
 
															 					$reverseWord = $this->word_reverse($word);
														
 
															 					$this->result[$reverseWord] = $cf;
														
@@ -465,9 +543,30 @@ class TurboSplit
 
															 				}
														
 
															 			}
														
 
															 		}
														
 
															-
														
 
															+*/
														
 
															 	}
														
 
															+    /**
														
 
															+     * 遍历树状结构，获取结果
														
 
															+     */
														
 
															+    private function get_result($node,&$path){
														
 
															+
														
 
															+            # code...
														
 
															+            $path[] = $node['word'];
														
 
															+            if(isset($node['children']) && count($node['children'])>0){
														
 
															+                foreach ($node['children'] as $key => $value) {
														
 
															+                    $this->get_result($value,$path);
														
 
															+                }
														
 
															+            }else{
														
 
															+                if(empty($node['remain'])){
														
 
															+                    $factors = trim(implode("+",$path),'+');
														
 
															+                    $this->result[$factors] = $node['sum_cf'];
														
 
															+                }else{
														
 
															+
														
 
															+                }
														
 
															+            }
														
 
															+            array_pop($path);
														
 
															+    }
														
 
															 	/**
														
 
															 	 * 颠倒词序
														
 
															 	 */
														
@@ -502,11 +601,11 @@ class TurboSplit
 
															 			}
														
 
															 			if(mb_strlen($word,"UTF-8")>4){
														
 
															 				# 先看有没有中文意思
														
 
															-				//Log::info("先看有没有中文意思");
														
 
															+				//$this->log("先看有没有中文意思");
														
 
															 				if(UserDict::where('word',$word)->where('mean','<>','')->where('language','<>','my')->exists()){
														
 
															 					$newword[]=$word;
														
 
															 				}else{
														
 
															-					//Log::info("如果没有查巴缅替换拆分");
														
 
															+					//$this->log("如果没有查巴缅替换拆分");
														
 
															 					#如果没有查巴缅替换拆分
														
 
															 					if(UserDict::where('word',$word)->where('dict_id','61f23efb-b526-4a8e-999e-076965034e60')->exists()){
														
 
															 						$pmPart = explode("+",UserDict::where('word',$word)->where('dict_id','61f23efb-b526-4a8e-999e-076965034e60')->value('factors')) ;
														
@@ -516,12 +615,12 @@ class TurboSplit
 
															 						}
														
 
															 					}
														
 
															 					else{
														
 
															-						//Log::info("如果没有查规则变形");
														
 
															+						//$this->log("如果没有查规则变形");
														
 
															 						#如果没有查规则变形
														
 
															 						if(UserDict::where('word',$word)->where('source','_SYS_REGULAR_')->exists()){
														
 
															 							$rglPart = explode("+",UserDict::where('word',$word)->where('source','_SYS_REGULAR_')->value('factors')) ;
														
 
															 							#看巴缅有没有第一部分
														
 
															-							//Log::info("看巴缅有没有第一部分");
														
 
															+							//$this->log("看巴缅有没有第一部分");
														
 
															 							if(UserDict::where('word',$rglPart[0])->where('dict_id','61f23efb-b526-4a8e-999e-076965034e60')->exists()){
														
 
															 								$pmPart = explode("+",UserDict::where('word',$rglPart[0])->where('dict_id','61f23efb-b526-4a8e-999e-076965034e60')->value('factors')) ;
														
 
															 								foreach ($pmPart as  $pm) {
														
@@ -537,7 +636,7 @@ class TurboSplit
 
															 						}
														
 
															 						else{
														
 
															 							#还没有就认命了
														
 
															-							//Log::info("还没有就认命了");
														
 
															+							//$this->log("还没有就认命了");
														
 
															 							$newword[]=$word;
														
 
															 						}
														
 
															 					}
														
@@ -583,7 +682,7 @@ class TurboSplit
 
															 		//预处理连音词
														
 
															 		$word1 = $this->splitSandhi($word);
														
 
															 		# 处理双元音
														
 
															-		Log::info("处理双元音");
														
 
															+		$this->log("处理双元音");
														
 
															 		$arrword = $this->splitDiphthong($word1);
														
 
															 		if (count($arrword) > 1) {
														
 
															 			array_push($output,['word'=>$word,'type'=>'.un.','grammar'=>'','parent'=>'','factors'=>implode("+", $arrword),'confidence'=>0.9999]);
														
@@ -591,34 +690,40 @@ class TurboSplit
 
															 		foreach ($arrword as $oneword) {
														
 
															 			$this->result = array(); //清空递归程序的输出容器
														
 
															-			
														
 
															+            $node = ['word'=>"",'remain'=>$oneword,'children'=>[]];
														
 
															 			if(mb_strlen($oneword)>35){
														
 
															 				//长词使用快速切分 正向切分 不使用少见sandi规则
														
 
															-				$this->split($oneword, 0, true, 0.8, 0.9, 0, true, false);
														
 
															+				$this->split($node, 0, true, 0.8, 0.9, 0, true, false);
														
 
															 				$min_result = 1;
														
 
															 			}else{
														
 
															-				$this->split($oneword, 0, false, 0.8, 0.9, 0, true, false);
														
 
															-				$min_result=3;
														
 
															+				$this->split($node, 0, false, 0.8, 0.9, 0, true, false);
														
 
															+				$min_result=2;
														
 
															 			}
														
 
															-			Log::info("正向切分结束 结果数量".count($this->result));
														
 
															-			
														
 
															+            $path = [];
														
 
															+            $this->log($node);
														
 
															+            $this->get_result($node,$path);
														
 
															+
														
 
															+			$this->log("正向切分结束 结果数量".count($this->result));
														
 
															+
														
 
															 			if(count($this->result)<$min_result){
														
 
															 				//有效结果过少
														
 
															-				$this->split($oneword, 0, false, 0.2, 0.8, 0, true, true);
														
 
															-				Log::info("有效结果过少 再次正切".count($this->result) );
														
 
															+                $node = ['word'=>"",'remain'=>$oneword,'children'=>[]];
														
 
															+				$this->split($node, 0, false, 0.2, 0.8, 0, true, true);
														
 
															+				$this->log("有效结果过少 再次正切".count($this->result) );
														
 
															 				if(count($this->result)<2){
														
 
															-					$this->split($oneword, 0, false, 0.2, 0.8, 0, false, true);
														
 
															-					Log::info("有效结果过少 再次反切：结果数量" . count($this->result));
														
 
															+                    $node = ['word'=>"",'remain'=>$oneword,'children'=>[]];
														
 
															+					$this->split($node, 0, false, 0.2, 0.8, 0, false, true);
														
 
															+					$this->log("有效结果过少 再次反切：结果数量" . count($this->result));
														
 
															 				}
														
 
															 			}
														
 
															-			Log::info("{$oneword}:" . count($this->result));
														
 
															+			$this->log("{$oneword}:" . count($this->result));
														
 
															 			if (count($this->result) > 0) {
														
 
															-				arsort($this->result); //按信心指数排序
														
 
															+				arsort($this->result); //按信心指数升序排序
														
 
															 				$iCount = 0;
														
 
															 				foreach ($this->result as $row => $value) {
														
 
															 					$factors = $row;
														
 
															-					if(strpos($row,'[') !== FALSE){
														
 
															+					if(strpos($row,']+') !== FALSE){
														
 
															 						$type = '.un.';
														
 
															 						$factors = \str_replace(['+[ṃ]+','[ṃ]+'],'ṃ+',$row);
														
 
															 					}else{
														
@@ -630,13 +735,17 @@ class TurboSplit
 
															 					if($iCount==0){
														
 
															 						//对于最优结果进行处理 找到base
														
 
															 						$wordWithType = ['word'=>$oneword,'type'=>'','grammar'=>'','parent'=>'','factors'=>$factors,'confidence'=>$value];
														
 
															-						
														
 
															-						Log::info("查找base");
														
 
															-						
														
 
															+
														
 
															+						$this->log("查找base");
														
 
															+
														
 
															 						$factors = explode('+',$row);
														
 
															 						$endOfFactor = end($factors);
														
 
															-
														
 
															-						Log::info("结尾词：".$endOfFactor);
														
 
															+                        if(strpos($endOfFactor,"[") !== FALSE){
														
 
															+                            if(count($factors)>=2){
														
 
															+                                $endOfFactor = $factors[count($factors)-2];
														
 
															+                            }
														
 
															+                        }
														
 
															+						$this->log("结尾词：".$endOfFactor);
														
 
															 						$caseman = new CaseMan();
														
 
															 						//猜测单词的base
														
 
															 						$parents = $caseman->WordToBase($oneword,1,false);
														
@@ -649,19 +758,21 @@ class TurboSplit
 
															 								if(count($end_parents)>0){
														
 
															 									foreach ($end_parents as $base2=>$case2) {
														
 
															 										if(\mb_substr($base2,-2)===\mb_substr($base,-2)){
														
 
															-											Log::info("{$base} ok");
														
 
															+											$this->log("{$base} ok");
														
 
															 											foreach ($case as $value) {
														
 
															 												# code...
														
 
															 												foreach ($case2 as $value2) {
														
 
															 													//验证语法信息是否正确
														
 
															-													if($value['type'] == $value2['type'] && $value['grammar'] == $value2['grammar']){
														
 
															+													if($value['type'] == $value2['type'] &&
														
 
															+                                                        substr($value['grammar'],0,3) === substr($value2['grammar'],0,3) &&
														
 
															+                                                        $value['confidence']>0.5){
														
 
															 														$wordWithType['type'] = $value['type'];
														
 
															 														$wordWithType['grammar'] = $value['grammar'];
														
 
															 														$wordWithType['factors'] = $value['factors'];
														
 
															 														$wordWithType['parent'] = $base;
														
 
															 														$wordWithType['confidence'] = $value2['confidence'];
														
 
															-														Log::info("word:{$wordWithType['word']} ; type:{$wordWithType['type']}; grammar:{$wordWithType['grammar']};parent:{$wordWithType['parent']}");
														
 
															-														array_push($output,$wordWithType);	
														
 
															+														$this->log("word:{$wordWithType['word']} ; type:{$wordWithType['type']}; grammar:{$wordWithType['grammar']};parent:{$wordWithType['parent']}");
														
 
															+														array_push($output,$wordWithType);
														
 
															 													}
														
 
															 												}
														
 
															 											}
														
@@ -674,14 +785,14 @@ class TurboSplit
 
															 										$wordWithType['factors'] = $value['factors'];
														
 
															 										$wordWithType['parent'] = $base;
														
 
															 										$wordWithType['confidence'] = 0.1;
														
 
															-										array_push($output,$wordWithType);	
														
 
															+										array_push($output,$wordWithType);
														
 
															 									}
														
 
															 								}
														
 
															 							}
														
 
															 						}
														
 
															 					}
														
 
															 					//后处理 进一步切分没有意思的长词
														
 
															-					Log::info("后处理 进一步切分没有意思的长词");
														
 
															+					$this->log("后处理 进一步切分没有意思的长词");
														
 
															 					$new = $this->split2($row);
														
 
															 					if($new !== $row){
														
 
															 						$newword['factors'] = $new;
														
@@ -713,6 +824,10 @@ class TurboSplit
 
															 		return $this->result;
														
 
															 	}
														
 
															+	public function debug($debug){
														
 
															+        $this->isDebug = $debug;
														
 
															+    }
														
 
															+
														
 
															 	private function log($message){
														
 
															 		if ($this->isDebug) {
														
 
															 			Log::info($message);