Procházet zdrojové kódy

词典优化拆分显示。默认显示拆分

visuddhinanda před 5 roky
rodič
revize
9ed0c20d8c

+ 23 - 8
app/dict/comp_csv.php

@@ -19,8 +19,8 @@ else{
 }
 
 global $result;
-$myfile = fopen(_DIR_TEMP_ . "/comp.csv", "a");
-$filefail = fopen(_DIR_TEMP_ . "/comp_fail.txt", "a");
+$myfile = fopen(_DIR_TEMP_DICT_TEXT_ . "/comp.csv", "a");
+$filefail = fopen(_DIR_TEMP_DICT_TEXT_ . "/comp_fail.txt", "a");
 $iMax = 2;//输出前三个结果
 /*
 $dns = "" . _FILE_DB_WORD_INDEX_;
@@ -54,25 +54,26 @@ while ($words = $redis->sscan("pali_word", $i)) {
         # code...
         $arrword = split_diphthong($word);
         if (count($arrword) > 1) {
-            fputcsv($myfile, array($word, implode("+", $arrword), 0.99));
+			$data = array($counter,$word,'.comp.','','','','',implode("+", $arrword),'',1,50,6,'comp','en');
+            fputcsv($myfile, $data);
         }
 
         foreach ($arrword as $oneword) {
             $counter++;
 			$result = array(); //全局变量,递归程序的输出容器
 			mySplit2($oneword, 0, true, 0.5, 0.9, 0, true, false);
-			if(count($result)<2){
-				mySplit2($oneword, 0, $_express, 0.2, 0.9, 0, true, true);
+			mySplit2($oneword, 0, true, 0.5, 0.9, 0, false, false);
+			if(count($result)<5){
+				mySplit2($oneword, 0, false, 0.2, 0.8, 0, true, true);
 				if (isset($_POST["debug"])) {
 					echo "正切:" . count($result) . "\n";
 				}
 				if(count($result)<2){
-					mySplit2($oneword, 0, $_express, 0.2, 0.8, 0, false, true);
+					mySplit2($oneword, 0, false, 0.2, 0.8, 0, false, true);
 					if (isset($_POST["debug"])) {
 						echo "反切:" . count($result) . "\n";
 					}					
 				}
-				
 			}
 
             /*
@@ -88,7 +89,21 @@ while ($words = $redis->sscan("pali_word", $i)) {
                 arsort($result); //按信心指数排序
                 $iCount = 0;
                 foreach ($result as $row => $value) {
-					fputcsv($myfile, array($oneword, $row, $value));
+					$data = array($counter,$oneword,'.comp.','','','','',$row,'',1,round($value*70),6,'comp','en');
+					fputcsv($myfile, $data);
+
+								//后处理 进一步切分没有意思的长词
+					$new = split2($row);
+					if($new!==$row){
+						$data = array($counter,$oneword,'.comp.','','','','',$new,'',1,round($value*70),6,'comp','en');
+						fputcsv($myfile, $data);
+						#再处理一次
+						$new2 = split2($new);
+						if($new2!==$new){
+							$data = array($counter,$oneword,'.comp.','','','','',$new2,'',1,round($value*70),6,'comp','en');
+							fputcsv($myfile, $data);				
+						}				
+					}
 					$iCount++;
                     if ($iCount > $iMax) {
                         break;

+ 4 - 3
app/dict/css/style.css

@@ -30,9 +30,10 @@ textarea {
 
 #dict_ref_search_input {
 	margin-left: 0.5em;
-	width: 100%;
-	font-size: 140%;
-	padding: 0.6em;
+	max-width: 500px;
+	font-size: 130%;
+	font-weight: 500;
+	padding: 0.3em 0.6em;
 	color: var(--btn-hover-bg-color);
 	background-color: var(--btn-color);
 }

+ 15 - 0
app/dict/css/style_mobile.css

@@ -130,4 +130,19 @@ body {
 
 #trubo_split {
 	font-size: unset;
+}
+
+#right_bar{
+	display:none;
+}
+.search_toolbar{
+	padding:0.5em 0;
+}
+
+guide{
+	display:none;
+}
+
+#search_info {
+    display: block;
 }

+ 63 - 9
app/dict/dict.js

@@ -1,8 +1,10 @@
 var dict_pre_searching = false;
 var dict_pre_search_curr_word = "";
 var dict_search_xml_http = null;
+var _autoSplit = true;
 
-function dict_search(word) {
+function dict_search(word, autoSplit = true) {
+	_autoSplit = autoSplit;
 	$("#pre_search_result").hide();
 	if (!localStorage.searchword) {
 		localStorage.searchword = "";
@@ -19,6 +21,7 @@ function dict_search(word) {
 		localStorage.searchword = word + "," + oldHistory;
 	}
 	word = standardize(word);
+	word = com_getPaliReal(word);
 
 	$.get(
 		"dict_lookup.php",
@@ -28,10 +31,16 @@ function dict_search(word) {
 		function (data, status) {
 			$("#dict_search_result").html(data);
 			$("#dict_list").append($("#dictlist"));
+
+			$("#right_bar").html("");
+			$("#right_bar").append($("#dict_user"));
+
+			$("#search_result_shell").html("");
 			$("#search_result_shell").append($("#search_summary"));
+
 			guide_init();
 			let word_count = parseInt($("#word_count").val());
-			if (word_count < 3) {
+			if (_autoSplit == true && word_count < 3) {
 				trubo_split();
 			}
 		}
@@ -131,6 +140,7 @@ function dict_input_keyup(e, obj) {
 		}
 	}
 }
+var t;
 
 function dict_input_split(word) {
 	if (word.indexOf("+") >= 0) {
@@ -143,9 +153,12 @@ function dict_input_split(word) {
 			"点击查词<div class='dropdown_ctl'><div class='content'><div class='main_view' >" +
 			strParts +
 			"</div></div></div>";
-		$("#input_parts").html(html);
+		$("#manual_split").html(html);
+		clearTimeout(t);
+		t = setTimeout("getPartMeaning()", 1000);
 	} else {
-		$("#input_parts").html("");
+		$("#manual_split").html("");
+		$("#part_mean_shell").slideUp();
 	}
 }
 
@@ -173,8 +186,12 @@ function cls_word_search_history() {
 }
 
 function trubo_split() {
+	let strSpliting = "正在自动切分复合词……";
+	if ($("#input_parts").html() == strSpliting) {
+		return;
+	}
 	$("#pre_search_result").hide();
-	$("#input_parts").html("正在自动切分复合词……");
+	$("#input_parts").html(strSpliting);
 	$.post(
 		"split.php",
 		{
@@ -184,9 +201,13 @@ function trubo_split() {
 			try {
 				let result = JSON.parse(data);
 				let html = "<div>";
+				let firstWord = new Array();
 				if (result.length > 0) {
+					html += "拆分";
+					let level1Count = 0;
 					for (const part of result[0]["data"]) {
-						html += '自动拆分结果<div class="dropdown_ctl">';
+						firstWord.push(part[0].word);
+						html += '<div class="dropdown_ctl">';
 						html += '<div class="content">';
 						html +=
 							'<div class="main_view">' +
@@ -202,12 +223,14 @@ function trubo_split() {
 						}
 						html += "</div>";
 						html += "</div>";
+						level1Count++;
 					}
 				} else {
 					html += "无法拆分";
 				}
 				html += "</div>";
 				$("#input_parts").html(html);
+				getPartMeaning(firstWord.join("+"));
 
 				$(".more_button").click(function () {
 					$(this).parent().siblings(".menu").toggle();
@@ -217,19 +240,50 @@ function trubo_split() {
 					let html = "<part>" + $(this).text().replace(/\+/g, "</part><part>") + "</part>";
 					$(this).parent().parent().find(".main_view").html(html);
 					$(this).parent().hide();
+					getPartMeaning($(this).text());
 					$("part").click(function () {
-						dict_search($(this).text());
+						dict_search($(this).text(), false);
 					});
 				});
 
 				$("part").click(function () {
-					dict_search($(this).text());
+					dict_search($(this).text(), false);
 				});
 			} catch (e) {}
 		}
 	);
 }
-
+function getPartMeaning(word = "") {
+	let sWord = word;
+	if (word == "") {
+		sWord = $("#dict_ref_search_input").val();
+	}
+	$.get(
+		"../dict/get_first_mean.php",
+		{
+			word: sWord,
+		},
+		function (data, status) {
+			try {
+				let result = JSON.parse(data);
+				let html = "<div>";
+				if (result.length > 0) {
+					for (const part of result) {
+						html +=
+							"<div class='auto_mean'><span class='spell'>" +
+							part.word +
+							"</span><span class='meaning'>" +
+							part.mean +
+							"</span></div>";
+					}
+				}
+				html += "</div>";
+				$("#part_mean").html(html);
+				$("#part_mean_shell").slideDown();
+			} catch (error) {}
+		}
+	);
+}
 function setNaviVisibility(strObjId = "") {
 	var objNave = document.getElementById("dict_list");
 	var objblack = document.getElementById("dict_list_shell");

+ 11 - 8
app/dict/dict_lookup.php

@@ -21,11 +21,8 @@ $dict_list = array();
 $right_word_list = "";
 
         add_edit_event(_DICT_LOOKUP_, $word);
-		echo "<div id='dict_list_shell' onclick='setNaviVisibility()'>";
-		echo "<div id='dict_list' class='dict_list_off'></div>";
-		echo "</div>";
+
 		echo "<div id='dict_ref'>";
-		echo "<div id='search_result_shell'></div>";
 		echo "<div class='pali_spell'><a name='{word_$word}'></a>" . $word . "</div>";
         $dict_list_a = [];
         //社区字典开始
@@ -321,7 +318,9 @@ $right_word_list = "";
         echo "<div><a href='word_statistics.php?word={$word}'>";
         echo "<svg t='1596783175334' class='icon' style='font-size: xxx-large; fill: var(--link-hover-color); margin: 5px;' viewBox='0 0 1024 1024' version='1.1' xmlns='http://www.w3.org/2000/svg' p-id='7755' width='200' height='200'><path d='M1019.904 450.56L536.576 557.056l417.792 208.896C999.424 692.224 1024 606.208 1024 512c0-20.48 0-40.96-4.096-61.44z m-12.288-61.44C958.464 184.32 786.432 28.672 573.44 4.096L446.464 512l561.152-122.88zM737.28 970.752c73.728-36.864 139.264-90.112 188.416-159.744L507.904 602.112l229.376 368.64zM512 0C229.376 0 0 229.376 0 512s229.376 512 512 512c61.44 0 118.784-12.288 172.032-28.672L385.024 512 512 0z' p-id='7756'></path></svg>";
         echo "<span>{$_local->gui->click_to_chart}</span></a></div>";
-        echo $right_word_list;
+		echo $right_word_list;
+		
+		/*
         echo "<div class='dict_word' ><b>{$_local->gui->undone_function}</b>";
         echo "<div class='' onclick=\"dict_show_edit()\">{$_local->gui->edit}</div>";
         echo "<div class='pali'>{$word}</div>";
@@ -345,9 +344,13 @@ $right_word_list = "";
         echo "<fieldset class='broder-1 broder-r'><legend>{$_local->gui->factor}</legend><input type='input' value=''/></fieldset>";
         echo "<fieldset class='broder-1 broder-r'><legend>{$_local->gui->f_mean}</legend><input type='input' value=''/></fieldset>";
         echo "<div class=''><button>{$_local->gui->add_to} {$_local->gui->my_dictionary}</button></div>";
-        echo "</div>";
-        echo "</div>";
-        echo "</div>";
+		echo "</div>";
+		
+
+		echo "</div>";
+		*/
+		echo "</div>";
+
         //查用户词典结束
 
 

+ 1 - 1
app/dict/dict_lookup_pre.php

@@ -35,7 +35,7 @@ if($redis!==false){
 	}
 	$arrResult = json_decode($arrWordIdx,true);
 	foreach ($arrResult as $key => $value) {
-		# code...
+		# 获取字典里的第一个意思
 		$arrResult[$key]["mean"]=getRefFirstMeaning($arrResult[$key]["word"],$currLanguage,$redis);
 	}
 	echo json_encode($arrResult, JSON_UNESCAPED_UNICODE);

+ 8 - 3
app/dict/function.php

@@ -10,9 +10,14 @@ function getRefFirstMeaning($word,$lang,$redis){
 			PDO_Connect(_FILE_DB_REF_, _DB_USERNAME_, _DB_PASSWORD_);
 			$query = "SELECT mean,language as lang from " . _TABLE_DICT_REF_ . " where word = ?  group by language";
 			$Fetch = PDO_FetchAll($query, array($word));
-			foreach ($Fetch as $key => $value) {
-				# code...
-				$redis->hset("ref_first_mean_".$word,$value["lang"],$value["mean"]);
+			if(count($Fetch)){
+				foreach ($Fetch as $key => $value) {
+					# code...
+					$redis->hset("ref_first_mean_".$word,$value["lang"],$value["mean"]);
+				}				
+			}
+			else{
+				
 			}
 		}
 		$mean = $redis->hGet("ref_first_mean_".$word,$lang);

+ 37 - 0
app/dict/get_first_mean.php

@@ -0,0 +1,37 @@
+<?php
+require_once '../path.php';
+require_once '../public/_pdo.php';
+require_once '../redis/function.php';
+require_once '../dict/function.php';
+
+if (isset($_GET["language"])) {
+    $currLanguage = $_GET["language"];
+} else {
+    if (isset($_COOKIE["language"])) {
+        $currLanguage = $_COOKIE["language"];
+    } else {
+        $currLanguage = "en";
+    }
+}
+$currLanguage = explode("-", $currLanguage)[0];
+
+$output=array();
+if(isset($_GET["word"])){
+	$arrWords = explode("+",$_GET["word"]);
+}
+else{
+	echo json_encode($output, JSON_UNESCAPED_UNICODE);
+	exit;
+}
+$redis = redis_connect();
+
+if($redis!==false){
+	foreach ($arrWords as $key => $word) {
+		# code...
+		$output[]=array("word"=>$word,"mean"=>getRefFirstMeaning($word,$currLanguage,$redis));
+	}
+}
+
+echo json_encode($output, JSON_UNESCAPED_UNICODE);
+
+?>

+ 77 - 16
app/dict/index.php

@@ -141,7 +141,7 @@ if (!(isset($_GET["builtin"]) && $_GET["builtin"] == 'true')) {
 
 		.pre_serach_block {
 			border-bottom: 1px solid var(--shadow-color);
-			padding: 5px 8px;
+			padding: 5px 0;
 		}
 
 		.pre_serach_block_title {
@@ -150,7 +150,7 @@ if (!(isset($_GET["builtin"]) && $_GET["builtin"] == 'true')) {
 		}
 
 		.pre_serach_content {
-			padding: 4px 4px 4px 15px;
+
 		}
 
 		#footer_nav {
@@ -220,7 +220,7 @@ if (!(isset($_GET["builtin"]) && $_GET["builtin"] == 'true')) {
 		}
 
 		#pre_search_result{
-			background-color: var(--btn-color);
+			background-color: var(--bg-color);
 			z-index: 50;
 			display:none;
 		}
@@ -236,26 +236,72 @@ if (!(isset($_GET["builtin"]) && $_GET["builtin"] == 'true')) {
 			-webkit-box-orient: vertical;
 			-webkit-line-clamp: 1;
 			padding-left: 1em;
+			color: var(--main-color1);
+		}
+		.dict_word_list{
+			padding: 2px 10px 5px 10px;
+		}
+		.dict_word_list:hover{
+			background-color: var(--link-color);
+			color: var(--btn-hover-color);
 		}
-
 		.section_inner{
 			max-width:1024px;
 			margin: 0 auto;
 		}
 		.spell{
-			font-size: 110%;
-    		font-weight: 700;
-		}
-		.dict_word_list:hover{
-			color: var(--link-hover-color);
+			font-size: 100%;
+    		font-weight: 500;
 		}
 
+
 		.pali_spell{
 			font-size:200%;
 			font-weight:700;
 			margin-top:15px;
 			padding-bottom:0
 		}
+		#main_view{
+			display:flex;
+		}
+		#main_result{
+			flex:7;
+		}
+		#right_bar{
+			flex:3;
+		}
+		.auto_mean{
+			display:flex;
+		}
+		.auto_mean>.spell{
+			font-weight: 700;
+			margin-right: 1em;
+		}
+		.auto_mean>.meaning{
+			overflow: hidden;
+			text-overflow: ellipsis;
+			display: -webkit-box;
+			-webkit-box-orient: vertical;
+			-webkit-line-clamp: 2;
+			color: var(--main-color1);
+		}
+		#word_parts{
+
+		}
+		#search_info{
+			display:flex;
+			justify-content: space-between;
+		}
+		#part_mean{
+			margin: 1em;
+			padding: 1em;
+			border: 1px solid var(--border-line-color);
+			background-color: var(--bg-color);
+			box-shadow: 0 5px 7px rgb(0 0 0 / 5%);
+		}
+		#part_mean_shell{
+			display:none;
+		}
 	</style>
 	<link type="text/css" rel="stylesheet" href="./css/style.css" >
 	<link type="text/css" rel="stylesheet" href="./css/style_mobile.css" media="screen and (max-width:800px)">
@@ -279,12 +325,10 @@ if (!(isset($_GET["builtin"]) && $_GET["builtin"] == 'true')) {
 			<div style="flex:6;">
 				<div>
 					<div>
-						<input id="dict_ref_search_input" type="text" autocomplete="off" placeholder="<?php echo $_local->gui->search; ?> 单词里面添加+ 预览拆词结果" onkeyup="dict_input_keyup(event,this)" style="" onfocus="dict_input_onfocus()" />
+						<input id="dict_ref_search_input" type="text" autocomplete="off" placeholder="<?php echo $_local->gui->search; ?> 单词里面添加 '+' 预览拆词结果" onkeyup="dict_input_keyup(event,this)" style="" onfocus="dict_input_onfocus()" />
 					</div>
 					<div id="result_msg"></div>
-					<div id="word_parts">
-						<div id="input_parts" style="font-size: 1.1em;padding: 2px 1em;"></div>
-					</div>
+					<div id="manual_split"></div>
 				</div>
 
 				<div id="pre_search_result" >
@@ -351,10 +395,27 @@ if (!(isset($_GET["builtin"]) && $_GET["builtin"] == 'true')) {
 		</button>
 	</div>
 
-<div>
-	<div class='section_inner' id="dict_search_result" style="background-color:white;color:black;">
+	<div>
+		<div id="main_view" class='section_inner'  style="background-color:white;color:black;">
+			<div id='dict_list_shell' style="display:none" onclick='setNaviVisibility()'>
+				<div id='dict_list' class='dict_list_off'></div>
+			</div>
+			<div id="main_result">
+				<div id="search_info">
+					<div id='search_result_shell'></div>
+					<div id="word_parts">
+						<div id="input_parts" style="font-size: 1.1em;padding: 2px 1em;"></div>
+					</div>
+				</div>
+				
+				<div id="part_mean_shell">
+					<div id="part_mean"></div>
+				</div>
+				<div id="dict_search_result"></div>
+			</div>
+			<div id="right_bar"></div>
+		</div>
 	</div>
-</div>
 	<script>
 <?php
 if (isset($_GET["key"]) && !empty($_GET["key"])) {

+ 0 - 0
app/dict/pali_word_list_to_redis.php → app/dict/redis_pali_word_list.php


+ 24 - 0
app/dict/redis_pm_part.php

@@ -0,0 +1,24 @@
+<?php
+require_once "../path.php";
+require_once "../redis/function.php";
+
+if (PHP_SAPI == "cli") {
+	$redis = redis_connect();
+	if ($redis != false) {
+		$dbh = new PDO(_DICT_DB_PM_, "", "", array(PDO::ATTR_PERSISTENT => true));
+		$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
+		
+		$query = "SELECT pali,parts from "._TABLE_DICT_PM_." where 1 group by pali";
+		$stmt = $dbh->query($query);
+		while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
+			# code...
+			if(!empty($row["parts"])){
+				$redis->hSet("dict_pm_part",$row["pali"],$row["parts"]);
+			}
+			
+		}
+	}
+	echo "all done";
+}
+
+?>

+ 22 - 0
app/dict/redis_ref_with_mean.php

@@ -0,0 +1,22 @@
+<?php
+require_once "../path.php";
+require_once "../redis/function.php";
+
+if (PHP_SAPI == "cli") {
+	$redis = redis_connect();
+	if ($redis != false) {
+		$dns =  _FILE_DB_REF_;
+		$dbh = new PDO($dns, "", "", array(PDO::ATTR_PERSISTENT => true));
+		$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
+		
+		$query = "SELECT word from "._TABLE_DICT_REF_." where dict_id in (3,4,6,7,8,10,12,13,15,18,19,21,22,23,24) group by word";
+		$stmt = $dbh->query($query);
+		while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
+			# code...
+			$redis->hSet("dict_ref_with_mean",$row["word"],"1");
+		}
+	}
+	echo "all done";
+}
+
+?>

+ 21 - 0
app/dict/redis_sys_rgl_part.php

@@ -0,0 +1,21 @@
+<?php
+require_once "../path.php";
+require_once "../redis/function.php";
+
+if (PHP_SAPI == "cli") {
+	$redis = redis_connect();
+	if ($redis != false) {
+		$dbh = new PDO(_DICT_DB_REGULAR_, "", "", array(PDO::ATTR_PERSISTENT => true));
+		$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
+		
+		$query = "SELECT pali,parent,parts from "._TABLE_DICT_REGULAR_." where 1 group by pali";
+		$stmt = $dbh->query($query);
+		while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
+			# code...
+			$redis->hSet("dict_regular_part",$row["pali"],$row["parts"]);
+		}
+	}
+	echo "all done";
+}
+
+?>

+ 52 - 0
app/dict/sandhi.php

@@ -0,0 +1,52 @@
+<?php
+$sandhi[] = array("a" => "ṃ", "b" => "ca", "c" => "ñca", "len" => 3, "adj_len" => 0, "advance" => false);
+
+$sandhi[] = array("a" => "ṃ", "b" => "hi", "c" => "ñhi", "len" => 3, "adj_len" => 0, "advance" => false);
+
+$sandhi[] = array("a" => "a", "b" => "iti", "c" => "āti", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ā", "b" => "iti", "c" => "āti", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "e", "b" => "iti", "c" => "eti", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "i", "b" => "iti", "c" => "īti", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ī", "b" => "iti", "c" => "īti", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "o", "b" => "iti", "c" => "oti", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "u", "b" => "iti", "c" => "ūti", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ū", "b" => "iti", "c" => "ūti", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ṃ", "b" => "iti", "c" => "nti", "len" => 3, "adj_len" => 0, "advance" => false);
+
+$sandhi[] = array("a" => "ṃ", "b" => "cet", "c" => "ñcet", "len" => 4, "adj_len" => 0, "advance" => false);
+
+$sandhi[] = array("a" => "a", "b" => "eva", "c" => "eva", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ā", "b" => "eva", "c" => "āyeva", "len" => 5, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "e", "b" => "eva", "c" => "eva", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "i", "b" => "eva", "c" => "yeva", "len" => 4, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "iyeva", "len" => 5, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "īyeva", "len" => 5, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "o", "b" => "eva", "c" => "ova", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "u", "b" => "eva", "c" => "veva", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ṃ", "b" => "eva", "c" => "meva", "len" => 4, "adj_len" => 0, "advance" => false);
+
+$sandhi[] = array("a" => "a", "b" => "api", "c" => "āpi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ā", "b" => "api", "c" => "āpi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "e", "b" => "api", "c" => "epi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "i", "b" => "api", "c" => "īpi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ī", "b" => "api", "c" => "īpi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "o", "b" => "api", "c" => "opi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "u", "b" => "api", "c" => "ūpi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "u", "b" => "api", "c" => "upi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ū", "b" => "api", "c" => "ūpi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ṃ", "b" => "api", "c" => "mpi", "len" => 3, "adj_len" => 0, "advance" => false);
+
+$sandhi[] = array("a" => "a", "b" => "ahaṃ", "c" => "āhaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ā", "b" => "ahaṃ", "c" => "āhaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "e", "b" => "ahaṃ", "c" => "ehaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "i", "b" => "ahaṃ", "c" => "ihaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ī", "b" => "ahaṃ", "c" => "īhaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "o", "b" => "ahaṃ", "c" => "ohaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "u", "b" => "ahaṃ", "c" => "ūhaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "u", "b" => "ahaṃ", "c" => "uhaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ū", "b" => "ahaṃ", "c" => "ūhaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ṃ", "b" => "ahaṃ", "c" => "mhaṃ", "len" => 3, "adj_len" => 0, "advance" => false);
+
+$sandhi[] = array("a" => "o", "b" => "asi", "c" => "osi", "len" => 3, "adj_len" => 0, "advance" => false);
+
+?>

+ 34 - 13
app/dict/split.php

@@ -78,22 +78,24 @@ foreach ($arrWords as $currword) {
     //将双元音拆开
     //step 1 : split at diphthong . ~aa~ -> ~a-a~
     //按连字符拆开处理
-    $arrword = split_diphthong($currword);
+	$arrword = split_diphthong($currword);
+	
     foreach ($arrword as $oneword) {
-        $result = array(); //全局变量,递归程序的输出容器
+		$result = array(); //全局变量,递归程序的输出容器
+		//$noSandhi = removeSandhi($oneword);
 
-        mySplit2($oneword, 0, false, 0, 0.5, 0.8, true, false);
-		if(count($result) < 3){
-			mySplit2($oneword, 0, $_express, 0, 0.2, 0.8, true, true);
+        mySplit2($oneword, 0, false, 0, 0.2, 0.1, true, false);
+		if(count($result) < 2){
+			//mySplit2($oneword, 0, $_express, 0, 0.2, 0.8, true, true);
 		}
         if (isset($_POST["debug"])) {
-            echo "正切:" . count($result) . "\n";
+            echo "正切:" . count($result) . "<br>\n";
 		}
-		if(count($result) < 3){
-			mySplit2($oneword, 0, $_express, 0, 0.2, 0.8, false, true);
+		if(count($result) < 2){
+			//mySplit2($oneword, 0, $_express, 0, 0.2, 0.8, false, true);
 		}
         if (isset($_POST["debug"])) {
-            echo "反切:" . count($result) . "\n";
+            echo "反切:" . count($result) . "<br>\n";
         }
         /*
         if (count($result) < 5) {
@@ -124,10 +126,29 @@ foreach ($arrWords as $currword) {
         $iCount = 0;
         foreach ($result as $row => $value) {
             $iCount++;
-            $word_part = array();
+			$word_part = array();
+			
             $word_part["word"] = $row;
-            $word_part["confidence"] = $value;
-            $wordlist[] = $word_part;
+			$word_part["confidence"] = $value;
+			$wordlist[] = $word_part;
+
+			//后处理 进一步切分没有意思的长词
+			$new = split2($row);
+			if($new!==$row){
+				$word_part["word"] = $new;
+				$word_part["confidence"] = $value;
+				$wordlist[] = $word_part;	
+				#再处理一次
+				$new2 = split2($new);
+				if($new2!==$new){
+					$word_part["word"] = $new2;
+					$word_part["confidence"] = $value;
+					$wordlist[] = $word_part;					
+				}				
+			}
+
+
+
             if ($iCount >= $iMax) {
                 break;
             }
@@ -141,7 +162,7 @@ foreach ($arrWords as $currword) {
         }
         $iCount = 0;
         foreach ($result as $row => $value) {
-            if ($iCount > 10) {
+            if ($iCount > 100) {
                 break;
             }
             $iCount++;

+ 167 - 95
app/dict/turbo_split.php

@@ -4,6 +4,11 @@ require_once '../public/casesuf.inc';
 //require_once '../studio/sandhi.php';
 require_once "../path.php";
 require_once "../public/_pdo.php";
+
+require_once "../redis/function.php";
+global $redis;
+$redis = redis_connect();
+
 // open word part db
 global $dbh;
 $dns = "" . _FILE_DB_PART_;
@@ -38,97 +43,99 @@ $path[] = array("", 0);
 
 global $sandhi;
 //sandhi rules table 语尾表
-$sandhi[] = array("a" => "", "b" => "", "c" => "", "len" => 0, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "a", "c" => "ā", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ā", "b" => "ā", "c" => "ā", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "ā", "c" => "ā", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ā", "b" => "a", "c" => "ā", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "e", "c" => "e", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "i", "c" => "i", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "o", "c" => "o", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "u", "c" => "o", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "u", "b" => "a", "c" => "o", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "u", "b" => "u", "c" => "ū", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "u", "c" => "u", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "ī", "c" => "ī", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "ū", "c" => "ū", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "i", "c" => "e", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "e", "b" => "a", "c" => "e", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "i", "b" => "i", "c" => "ī", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "i", "b" => "e", "c" => "e", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "i", "b" => "a", "c" => "ya", "len" => 2, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "atth", "c" => "atth", "len" => 4, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "taṃ", "b" => "n", "c" => "tann", "len" => 4, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[ṃ]", "b" => "api", "c" => "mpi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[ṃ]", "b" => "eva", "c" => "meva", "len" => 4, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[o]", "b" => "iva", "c" => "ova", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "o", "b" => "a", "c" => "o", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "ādi", "c" => "ādi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a[ānaṃ]", "b" => "a", "c" => "ānama", "len" => 5, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "iti", "c" => "āti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[ṃ]", "b" => "ca", "c" => "ñca", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[ṃ]", "b" => "iti", "c" => "nti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[ṃ]", "b" => "a", "c" => "ma", "len" => 2, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ṃ", "b" => "a", "c" => "m", "len" => 1, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[ṃ]", "b" => "ā", "c" => "mā", "len" => 2, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[ṃ]", "b" => "u", "c" => "mu", "len" => 2, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "[ṃ]", "b" => "h", "c" => "ñh", "len" => 2, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ā", "b" => "[ṃ]", "c" => "am", "len" => 2, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī", "b" => "[ṃ]", "c" => "im", "len" => 2, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ati", "b" => "tabba", "c" => "atabba", "len" => 6, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ati", "b" => "tabba", "c" => "itabba", "len" => 6, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "iti", "b" => "a", "c" => "icca", "len" => 4, "adj_len" => 0, "advance" => false);
-
-$sandhi[] = array("a" => "uṃ", "b" => "a", "c" => "uma", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "u[ūnaṃ]", "b" => "a", "c" => "ūnama", "len" => 5, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī[īnaṃ]", "b" => "a", "c" => "īnama", "len" => 5, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "su", "b" => "a", "c" => "sva", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "", "b" => "", "c" => "", "len" => 0, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "a", "c" => "ā", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ā", "b" => "ā", "c" => "ā", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "ā", "c" => "ā", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ā", "b" => "a", "c" => "ā", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "e", "c" => "e", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "i", "c" => "i", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "o", "c" => "o", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "u", "c" => "o", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "u", "b" => "a", "c" => "o", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "u", "b" => "u", "c" => "ū", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "u", "c" => "u", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "ī", "c" => "ī", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "ū", "c" => "ū", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "i", "c" => "e", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "e", "b" => "a", "c" => "e", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "i", "b" => "i", "c" => "ī", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "i", "b" => "e", "c" => "e", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "i", "b" => "a", "c" => "ya", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "atth", "c" => "atth", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "taṃ", "b" => "n", "c" => "tann", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[ṃ]", "b" => "api", "c" => "mpi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[ṃ]", "b" => "eva", "c" => "meva", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[o]", "b" => "iva", "c" => "ova", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "o", "b" => "a", "c" => "o", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "ādi", "c" => "ādi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a[ānaṃ]", "b" => "a", "c" => "ānama", "len" => 5, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "iti", "c" => "āti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[ṃ]", "b" => "ca", "c" => "ñca", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[ṃ]", "b" => "iti", "c" => "nti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[ṃ]", "b" => "a", "c" => "ma", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ṃ", "b" => "a", "c" => "m", "len" => 1, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[ṃ]", "b" => "ā", "c" => "mā", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[ṃ]", "b" => "u", "c" => "mu", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "[ṃ]", "b" => "h", "c" => "ñh", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ā", "b" => "[ṃ]", "c" => "am", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "[ṃ]", "c" => "am", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī", "b" => "[ṃ]", "c" => "im", "len" => 2, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ati", "b" => "tabba", "c" => "atabba", "len" => 6, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ati", "b" => "tabba", "c" => "itabba", "len" => 6, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "iti", "b" => "a", "c" => "icca", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+
+$sandhi[] = array("a" => "uṃ", "b" => "a", "c" => "uma", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "u[ūnaṃ]", "b" => "a", "c" => "ūnama", "len" => 5, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī[īnaṃ]", "b" => "a", "c" => "īnama", "len" => 5, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "su", "b" => "a", "c" => "sva", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
 
 #other sandhi rule. can be use but program will be slow down
 #其他连音规则,如果使用则会让程序运行变慢
 
-$sandhi[] = array("a" => "ā", "b" => "iti", "c" => "āti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "iti", "c" => "āti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "e", "b" => "iti", "c" => "eti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī", "b" => "iti", "c" => "īti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "i", "b" => "iti", "c" => "īti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "o", "b" => "iti", "c" => "oti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ū", "b" => "iti", "c" => "ūti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "u", "b" => "iti", "c" => "ūti", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ṃ", "b" => "iti", "c" => "nti", "len" => 3, "adj_len" => 0, "advance" => false);
-
-$sandhi[] = array("a" => "ṃ", "b" => "ca", "c" => "ñca", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ṃ", "b" => "cāti", "c" => "ñcāti", "len" => 5, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ṃ", "b" => "cet", "c" => "ñcet", "len" => 4, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "ā", "b" => "iti", "c" => "āti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "iti", "c" => "āti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "e", "b" => "iti", "c" => "eti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī", "b" => "iti", "c" => "īti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "i", "b" => "iti", "c" => "īti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "o", "b" => "iti", "c" => "oti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ū", "b" => "iti", "c" => "ūti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "u", "b" => "iti", "c" => "ūti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ṃ", "b" => "iti", "c" => "nti", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+
+$sandhi[] = array("a" => "ṃ", "b" => "ca", "c" => "ñca", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ṃ", "b" => "cāti", "c" => "ñcāti", "len" => 5, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ṃ", "b" => "cet", "c" => "ñcet", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ṃ", "b" => "ev", "c" => "mev", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
 
 /*
-$sandhi[] = array("a" => "a", "b" => "eva", "c" => "eva", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ā", "b" => "eva", "c" => "āyeva", "len" => 5, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "e", "b" => "eva", "c" => "eva", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "i", "b" => "eva", "c" => "yeva", "len" => 4, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "iyeva", "len" => 5, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "īyeva", "len" => 5, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "o", "b" => "eva", "c" => "ova", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "u", "b" => "eva", "c" => "veva", "len" => 3, "adj_len" => 0, "advance" => false);
-
-$sandhi[] = array("a" => "a", "b" => "eva", "c" => "evā", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "e", "b" => "eva", "c" => "evā", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "i", "b" => "eva", "c" => "yevā", "len" => 4, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "yevā", "len" => 4, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "iyevā", "len" => 4, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "īyevā", "len" => 4, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "o", "b" => "eva", "c" => "ovā", "len" => 4, "adj_len" => 0, "advance" => false);
-
-$sandhi[] = array("a" => "ā", "b" => "api", "c" => "āpi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "a", "b" => "api", "c" => "āpi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "e", "b" => "api", "c" => "epi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ī", "b" => "api", "c" => "īpi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "i", "b" => "api", "c" => "īpi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "o", "b" => "api", "c" => "opi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ū", "b" => "api", "c" => "ūpi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "u", "b" => "api", "c" => "ūpi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "u", "b" => "api", "c" => "upi", "len" => 3, "adj_len" => 0, "advance" => false);
-$sandhi[] = array("a" => "ṃ", "b" => "api", "c" => "mpi", "len" => 3, "adj_len" => 0, "advance" => false);
+$sandhi[] = array("a" => "a", "b" => "eva", "c" => "eva", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ā", "b" => "eva", "c" => "āyeva", "len" => 5, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "e", "b" => "eva", "c" => "eva", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "i", "b" => "eva", "c" => "yeva", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "iyeva", "len" => 5, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "īyeva", "len" => 5, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "o", "b" => "eva", "c" => "ova", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "u", "b" => "eva", "c" => "veva", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+
+$sandhi[] = array("a" => "a", "b" => "eva", "c" => "evā", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "e", "b" => "eva", "c" => "evā", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "i", "b" => "eva", "c" => "yevā", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "yevā", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "iyevā", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī", "b" => "eva", "c" => "īyevā", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "o", "b" => "eva", "c" => "ovā", "len" => 4, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+
+$sandhi[] = array("a" => "ā", "b" => "api", "c" => "āpi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "a", "b" => "api", "c" => "āpi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "e", "b" => "api", "c" => "epi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ī", "b" => "api", "c" => "īpi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "i", "b" => "api", "c" => "īpi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "o", "b" => "api", "c" => "opi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ū", "b" => "api", "c" => "ūpi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "u", "b" => "api", "c" => "ūpi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "u", "b" => "api", "c" => "upi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
+$sandhi[] = array("a" => "ṃ", "b" => "api", "c" => "mpi", "len" => 3, "adj_len" => 0, "advance" => false,"cf"=>1.0);
  */
 $sandhi[] = array("a" => "a", "b" => "a", "c" => "a", "len" => 1, "adj_len" => -1, "advance" => true);
 $sandhi[] = array("a" => "ī", "b" => "", "c" => "i", "len" => 1, "adj_len" => 0, "advance" => true);
@@ -321,7 +328,8 @@ function mySplit2($strWord, $deep = 0, $express = false, $adj_len = 0, $c_thresh
     $confidence = isExsit($strWord, $adj_len);
     if ($confidence >= 0) {
         $output[] = array($strWord, "", $confidence);
-    } else {
+	} 
+	else {
         $confidence = isExsit("[" . $strWord . "]");
         if ($confidence >= 0) {
             $output[] = array("[" . $strWord . "]", "", $confidence);
@@ -356,7 +364,7 @@ function mySplit2($strWord, $deep = 0, $express = false, $adj_len = 0, $c_thresh
                         if ($confidence > $c_threshhold) {
                             $output[] = array($str1, $str2, $confidence, $row["adj_len"]);
                             if (isset($_POST["debug"])) {
-                                echo "插入:{$str1}\n";
+                                echo "插入:{$str1} 剩余{$str2} 应用:{$row["a"]}-{$row["b"]}-{$row["c"]}\n";
                             }
                             if ($express) {
                                 break;
@@ -394,8 +402,8 @@ function mySplit2($strWord, $deep = 0, $express = false, $adj_len = 0, $c_thresh
                 }
             }
         }
-
-    }
+	}
+	
 	$word = "";
     if (count($output) > 0) {
         foreach ($output as $part) {
@@ -438,7 +446,8 @@ function mySplit2($strWord, $deep = 0, $express = false, $adj_len = 0, $c_thresh
                 mySplit2($remainder, ($deep + 1), $express, $adj_len, $c_threshhold, $w_threshhold, $forward, $sandhi_advance);
             }
         }
-    } else {
+	} 
+	else {
         #尾巴查不到了
         $word = "";
         $cf = 1.0;
@@ -449,13 +458,16 @@ function mySplit2($strWord, $deep = 0, $express = false, $adj_len = 0, $c_thresh
             }
             $word .= "+";
             $cf = $cf * $path[$i][1];
-        }
-        $len = pow(mb_strlen($strWord, "UTF-8"), 3);
+		}
+		
+		$len = pow(mb_strlen($strWord, "UTF-8"), 3);
+		
         if ($forward) {
             $cf =(1-$cf) * $len / ($len + 150);
-        } else 
+        } else {
 			$cf =(1-$cf) * $len / ($len + 5);
-        }
+		}
+		
         if (isset($_POST["debug"])) {
             $word = $word.$strWord . "(0)";
         } else {
@@ -466,7 +478,8 @@ function mySplit2($strWord, $deep = 0, $express = false, $adj_len = 0, $c_thresh
             if ($forward == true) {
 				$result[$word] = $cf;
 				return;
-            } else {
+			} 
+			else {
                 $reverseWord = word_reverse($word);
 				$result[$reverseWord] = $cf;
 				return;
@@ -474,6 +487,7 @@ function mySplit2($strWord, $deep = 0, $express = false, $adj_len = 0, $c_thresh
         }
     }
 
+}
 
 function word_reverse($word)
 {
@@ -491,3 +505,61 @@ function word_reverse($word)
         return $word;
     }
 }
+
+#后处理
+function split2($word){
+	global $redis;
+
+	$input = explode("+",$word);
+	$newword=array();
+	foreach ($input as $value) {
+		$word = strstr($value,"(",true);
+		if($word==false){
+			$word=$value;
+		}
+		if(mb_strlen($word,"UTF-8")>4){
+		# 先看有没有中文意思
+			if($redis->hExists("dict_ref_with_mean",$word)===TRUE){
+				$newword[]=$word;
+			}
+			else{
+				#如果没有查巴缅替换拆分
+				if($redis->hExists("dict_pm_part",$word)===TRUE){
+					$pmPart = explode("+",$redis->hGet("dict_pm_part",$word)) ;
+					foreach ($pmPart as  $pm) {
+						# code...
+						$newword[]=$pm;
+					}
+				}
+				else{
+					#如果没有查规则变形
+					if($redis->hExists("dict_regular_part",$word)===TRUE){
+						$rglPart = explode("+",$redis->hGet("dict_regular_part",$word)) ;
+						#看巴缅有没有第一部分
+						if($redis->hExists("dict_pm_part",$rglPart[0])===TRUE){
+							$pmPart = explode("+",$redis->hGet("dict_pm_part",$rglPart[0])) ;
+							foreach ($pmPart as  $pm) {
+								# code...
+								$newword[]=$pm;
+							}
+						}
+						else{
+							#没有
+							$newword[]=$rglPart[0];
+						}
+						$newword[]=$rglPart[1];
+					}
+					else{
+						#还没有就认命了
+						$newword[]=$word;
+					}
+				}
+			}
+		}
+		else{
+			$newword[]=$word;
+		}
+
+	}
+	return implode("+",$newword);
+}

+ 9 - 3
app/install/step3.php

@@ -124,7 +124,7 @@ function run_rich_dict(index){
     }
 }
 
-function run_sys_dict(index){
+function run_sys_dict(index,onlyOne=false){
     if(index >= sys_file_list.length){
         $("#response").html($("#response").html()+"All Down");
     }
@@ -138,8 +138,14 @@ function run_sys_dict(index){
         },
         function(data,status){
             $("#response").html($("#response").html()+data+"<br>");
-            iCurrSysDictIndex++;
-            run_sys_dict(iCurrSysDictIndex);
+			if(onlyOne){
+				$("#response").html($("#response").html()+"all done<br>");
+			}
+			else{
+				iCurrSysDictIndex++;
+				run_sys_dict(iCurrSysDictIndex);				
+			}
+
         });
     }
 }

+ 18 - 17
app/install/step3_run.php

@@ -4,7 +4,9 @@ require_once '../path.php';
 
 $filename = $_GET["filename"];
 $dbname = $_GET["dbname"];
-$table = $_GET["table"];
+if(isset($_GET["table"])){
+	$table = $_GET["table"];
+}
 switch ($_GET["dbtype"]) {
     case "rich":
     case "system":
@@ -15,7 +17,7 @@ switch ($_GET["dbtype"]) {
             $sDescDbFile = _DIR_DICT_SYSTEM_ . "/" . $dbname;
             $csvfile = _DIR_DICT_TEXT_ . "/system/{$filename}";
         }
-        $dns = "" . $sDescDbFile;
+        $dns = "sqlite:" . $sDescDbFile;
         $dbh = new PDO($dns, "", "", array(PDO::ATTR_PERSISTENT => true));
         $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
 
@@ -28,8 +30,10 @@ switch ($_GET["dbtype"]) {
                 $dbh->query($_value . ';');
             }
             echo $dns . "建立数据库成功<br>";
-        }
-
+		}
+		
+		$query = "DELETE from dict where 1";
+		$dbh->query($query);
         // 开始一个事务,关闭自动提交
         $dbh->beginTransaction();
 
@@ -62,7 +66,7 @@ switch ($_GET["dbtype"]) {
 
         break;
     case "thin":
-        echo "doing filename: $filename dbname: $dbname table:$table<br>";
+        echo "doing filename: $filename dbname: $dbname <br>";
         $sDescDbFile = _DIR_DICT_SYSTEM_ . "/" . $dbname;
         $csvfile = _DIR_DICT_TEXT_ . "/thin/{$filename}";
         $dns = "" . $sDescDbFile;
@@ -76,7 +80,7 @@ switch ($_GET["dbtype"]) {
             } else if ($table === "info") {
                 $query = "INSERT INTO info ('language' , 'id' ,  'shortname' , 'name') VALUES (  ? ,  ? ,  ? ,  ? )";
             } else {
-                echo "table name $table unkow.";
+                echo "table name  unkow.";
             }
         } else if ($dbname === "ref1.db") {
             $query = "INSERT INTO dict ('id','eword', 'word', 'length', 'count') VALUES (  ? ,  ? ,  ? ,  ? ,  ? )";
@@ -114,17 +118,14 @@ switch ($_GET["dbtype"]) {
         $dns = "" . _FILE_DB_PART_;
         $dbh = new PDO($dns, "", "", array(PDO::ATTR_PERSISTENT => true));
         $dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
-
-        {
-            //建立数据库
-            $_sql = file_get_contents('part.sql');
-            $_arr = explode(';', $_sql);
-            //执行sql语句
-            foreach ($_arr as $_value) {
-                $dbh->query($_value . ';');
-            }
-            echo $dns . "建立数据库成功<br>";
-        }
+		//建立数据库
+		$_sql = file_get_contents('part.sql');
+		$_arr = explode(';', $_sql);
+		//执行sql语句
+		foreach ($_arr as $_value) {
+			$dbh->query($_value . ';');
+		}
+		echo $dns . "建立数据库成功<br>";
 
         // 开始一个事务,关闭自动提交
         $dbh->beginTransaction();

+ 9 - 0
app/path.php

@@ -47,6 +47,7 @@ define("_DIR_CSV_PALI_CANON_WORD_INDEX_", __DIR__ . "/../paliword/index");
 define("_DIR_PALI_CSV_", __DIR__ . "/../tmp/palicsv");
 define("_DIR_LOG_", __DIR__ . "/../tmp/log");
 define("_DIR_TEMP_", __DIR__ . "/../tmp/temp");
+define("_DIR_TEMP_DICT_TEXT_", __DIR__ . "/../tmp/dict_text");
 define("_DIR_TMP_", __DIR__ . "/../tmp");
 
 //dictionary
@@ -55,6 +56,14 @@ define("_DIR_DICT_SYSTEM_", __DIR__ . "/../tmp/appdata/dict/system");
 define("_DIR_DICT_3RD_", __DIR__ . "/../tmp/appdata/dict/3rd");
 define("_DIR_DICT_REF_", __DIR__ . "/../tmp/appdata/dict/ref");
 
+#巴缅字典
+define("_DICT_DB_PM_", "sqlite:" . __DIR__ . "/../tmp/appdata/dict/3rd/pm.db");
+define("_TABLE_DICT_PM_", "dict");
+
+#系统规则
+define("_DICT_DB_REGULAR_", "sqlite:" . __DIR__ . "/../tmp/appdata/dict/system/sys_regular.db");
+define("_TABLE_DICT_REGULAR_", "dict");
+
 define("_DIR_USERS_GUIDE_", __DIR__ . "/../documents/users_guide");
 
 #参考字典

+ 0 - 47
app/pcdl/html_foot.php

@@ -1,51 +1,4 @@
 <!--手机版导航-->
-<div id="footer_nav" class="footer_navbar tool_bar_bg">
-	<div class="navbar_button" onclick="main_menu_show(0)">
-		<div class="nav_icon">
-			<svg class="small_icon">
-				<use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="../studio/svg/icon.svg#ic_star"></use>
-			</svg>
-		</div>
-		<div class="nav_text btn_color">推荐</div>
-	</div>
-
-	<div class="navbar_button" onclick="main_menu_show(1)">
-		<div class="nav_icon">
-			<svg class="small_icon">
-				<use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="../studio/svg/icon.svg#ic_grid"></use>
-			</svg>
-		</div>
-		<div class="nav_text btn_color">三藏</div>
-	</div>
-
-	<div class="navbar_button" onclick="main_menu_show(2)">
-		<div class="nav_icon">
-			<svg class="small_icon">
-				<use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="../studio/svg/icon.svg#ic_list"></use>
-			</svg>
-		</div>
-		<div class="nav_text btn_color">分类</div>
-	</div>
-
-	<div class="navbar_button" onclick="main_menu_show(4)">
-		<div class="nav_icon">
-			<svg class="small_icon">
-				<use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="../studio/svg/icon.svg#ic_search"></use>
-			</svg>
-		</div>
-		<div class="nav_text btn_color">搜索</div>
-	</div>
-
-
-	<div class="navbar_button" onclick="main_menu_show(3)">
-		<div class="nav_icon">
-			<svg class="small_icon">
-				<use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="../studio/svg/icon.svg#ic_user"></use>
-			</svg>
-		</div>
-		<div class="nav_text btn_color">我的</div>
-	</div>
-</div>
 
 <div class="foot_div">
 	<div>wikipali @2020</div>

+ 10 - 10
app/public/union.inc

@@ -1,15 +1,15 @@
 <?php
 $un = array(
-array("ena+ti","enāti",256),
-array("ā+ti","āti",256),
-array("a+ti","āti",256),
-array("e+ti","eti",256),
-array("ī+ti","īti",256),
-array("i+ti","īti",256),
-array("o+ti","oti",256),
-array("ū+ti","uti",256),
-array("u+ti","uti",256),
-array("ṃ+ti","nti",256),
+array("ena+iti","enāti",256),
+array("ā+iti","āti",256),
+array("a+iti","āti",256),
+array("e+iti","eti",256),
+array("ī+iti","īti",256),
+array("i+iti","īti",256),
+array("o+iti","oti",256),
+array("ū+iti","uti",256),
+array("u+iti","uti",256),
+array("ṃ+iti","nti",256),
 array("ṃ+ca","ñca",256),
 array("āni+eva","āneva",256),
 array("o+eva","ova",256),

+ 1 - 6
dicttext/system/list.txt

@@ -1,9 +1,4 @@
-comp.csv,comp.db
-comp1.csv,comp.db
-comp2.csv,comp.db
-comp3.csv,comp.db
-comp4.csv,comp.db
-comp5.csv,comp.db
+../../tmp/dict_text/comp.csv,comp.db
 sys_irregular.csv,sys_irregular.db
 sys_regular.csv,sys_regular.db
 sys_regular1.csv,sys_regular.db