ソースを参照

:bug: 拆分复合词增加aa+a=aa规则

visuddhinanda 5 年 前
コミット
a452a8c7eb
1 ファイル変更40 行追加11 行削除
  1. 40 11
      app/dict/split.php

+ 40 - 11
app/dict/split.php

@@ -19,13 +19,28 @@ else{
 <!--debug only-->
 <!--debug only-->
 <form action="split.php" method="post">
 <form action="split.php" method="post">
 Words: <textarea type="text" name="word"></textarea>
 Words: <textarea type="text" name="word"></textarea>
-<input name="debug" />
+<input name="debug" type="hidden" />批量查询,单词之间用英文逗号分隔。
+<div>
+<input type="checkbox" name = "express" checked /> 快速搜索(遇到第一个连音规则成功就返回)
+</div>
 <input type="submit">
 <input type="submit">
 </form>
 </form>
 
 
 <?php
 <?php
 	return;
 	return;
 }
 }
+
+if(isset($_POST["express"])){
+	if($_POST["express"]==="on"){
+		$_express = true;
+	}
+	else{
+		$_express = false;
+	}
+}
+else{
+	$_express = false;
+}
 global $dbh;
 global $dbh;
 $dns = "sqlite:"._FILE_DB_PART_;
 $dns = "sqlite:"._FILE_DB_PART_;
 $dbh = new PDO($dns, "", "",array(PDO::ATTR_PERSISTENT=>true));
 $dbh = new PDO($dns, "", "",array(PDO::ATTR_PERSISTENT=>true));
@@ -61,6 +76,7 @@ global $sandhi ;
 	$sandhi[]=array("a"=>"a","b"=>"a","c"=>"ā","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"a","c"=>"ā","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"ā","b"=>"ā","c"=>"ā","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"ā","b"=>"ā","c"=>"ā","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"ā","c"=>"ā","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"ā","c"=>"ā","len"=>1,"adj_len"=>0,"advance"=>false);
+	$sandhi[]=array("a"=>"ā","b"=>"a","c"=>"ā","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"e","c"=>"e","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"e","c"=>"e","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"i","c"=>"i","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"i","c"=>"i","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"o","c"=>"o","len"=>1,"adj_len"=>0,"advance"=>false);
 	$sandhi[]=array("a"=>"a","b"=>"o","c"=>"o","len"=>1,"adj_len"=>0,"advance"=>false);
@@ -165,20 +181,26 @@ foreach($arrWords as $oneword){
 		$result = array();//全局变量,递归程序的输出容器
 		$result = array();//全局变量,递归程序的输出容器
 
 
 		if(mb_strlen($oneword,"UTF-8")<30){
 		if(mb_strlen($oneword,"UTF-8")<30){
-			mySplit2($oneword,0,true);
+			mySplit2($oneword,0,$_express);
 		}
 		}
 		else{
 		else{
-			mySplit2($oneword,0,true);
+			mySplit2($oneword,0,$_express);
 		}
 		}
 		
 		
-		
 		arsort($result);//按信心指数排序
 		arsort($result);//按信心指数排序
 		$wordlist = array();
 		$wordlist = array();
+		$iMax = 5;
+		$iCount = 0;
 		foreach($result as $row=>$value){
 		foreach($result as $row=>$value){
+			$iCount++;
 			$word_part  = array();
 			$word_part  = array();
 			$word_part["word"] = $row;
 			$word_part["word"] = $row;
 			$word_part["confidence"] = $value;
 			$word_part["confidence"] = $value;
 			$wordlist[] = $word_part;
 			$wordlist[] = $word_part;
+			if($iCount>=$iMax){
+			break;
+			}
+
 		}
 		}
 		$output[] = $wordlist;
 		$output[] = $wordlist;
 
 
@@ -196,9 +218,6 @@ foreach($arrWords as $oneword){
 			if(isset($_POST["debug"])){
 			if(isset($_POST["debug"])){
 				echo $row."-[".$value."]<br>";
 				echo $row."-[".$value."]<br>";
 			}
 			}
-			else{	
-				//echo $row."-[".round($level)."] ";
-			}
 		}
 		}
 		
 		
 		/*
 		/*
@@ -265,7 +284,9 @@ function isExsit($word,$adj_len=0){
 	global $confidence;
 	global $confidence;
 	$auto_split_times++;
 	$auto_split_times++;
 	
 	
-	//echo "<div>正在查询:{$word}</div>";
+	if(isset($_POST["debug"])){
+		echo "<div>正在查询:{$word}</div>";
+	}
 	$isFound=false;
 	$isFound=false;
 	if(isset($part["{$word}"]))
 	if(isset($part["{$word}"]))
 	{
 	{
@@ -282,6 +303,9 @@ function isExsit($word,$adj_len=0){
 			$isFound=true;
 			$isFound=true;
 			$count=$db+1;
 			$count=$db+1;
 		}
 		}
+		else{
+			
+		}
 	} 
 	} 
 
 
 	if($isFound)
 	if($isFound)
@@ -311,8 +335,9 @@ function isExsit($word,$adj_len=0){
 
 
 $strWord, 要查询的词
 $strWord, 要查询的词
 $deep, 当前递归深度
 $deep, 当前递归深度
-$turbo=false, 简洁查询
+$express=true, 快速查询
 $adj_len=0 长度校正系数
 $adj_len=0 长度校正系数
+$c_threshhold 信心指数阈值
 */
 */
 
 
 function mySplit2($strWord,$deep,$express=false,$adj_len=0,$c_threshhold=0.8){
 function mySplit2($strWord,$deep,$express=false,$adj_len=0,$c_threshhold=0.8){
@@ -404,7 +429,9 @@ function mySplit2($strWord,$deep,$express=false,$adj_len=0,$c_threshhold=0.8){
 					$word .= "({$part[2]})";
 					$word .= "({$part[2]})";
 				}
 				}
 				$cf=$cf+$part[2]*0.1;
 				$cf=$cf+$part[2]*0.1;
-				$result[$word]=$cf;
+				if($cf >= $c_threshhold){
+					$result[$word]=$cf;
+				}
 			}
 			}
 		}
 		}
 	}
 	}
@@ -427,7 +454,9 @@ function mySplit2($strWord,$deep,$express=false,$adj_len=0,$c_threshhold=0.8){
 			$word .= $strWord;
 			$word .= $strWord;
 		}
 		}
 		
 		
-		$result[$word]=$cf;
+		if($cf >= $c_threshhold){
+			$result[$word]=$cf;
+		}
 	}
 	}
 }
 }