quote($word);
$row = PDO_FetchOne($query);
if ($row[0] == 0) {
return false;
} else {
return true;
}
}
/*
*自动拆分复合词
*功能:将一个单词拆分为两个部分
*输入:想要拆的词
*输出:数组,第一个为前半部分,第二个为后半部分,前半部分是在现有字典里搜索到的。
*范例:
while(($split=mySplit($splitWord))!==FALSE){
array_push($part,$split[0]);
$splitWord=$split[1];
}
循环结束后$part里放的就是拆分结果
算法:从最后一个字母开始,一次去掉一个字母,然后在现有字典里搜索剩余的部分(前半部分)
如果搜索到,就返回。第二次,将剩余的部分,也就是后半部分应用相同的算法。
直到单词长度小于5
中间考虑了连音规则:
~a+i~=~i~
在拆分的时候要补上前面的元音
有时后面的词第一个辅音会重复
word+tha~=wordttha~
需要去掉后面的单词的一个辅音
*/
function mySplit($strWord)
{
$doubleword = "kkggccjjṭṭḍḍttddppbb";
$len = mb_strlen($strWord, "UTF-8");
if ($len > 5) {
for ($i = $len - 1; $i > 3; $i--) {
$str1 = mb_substr($strWord, 0, $i, "UTF-8");
$str2 = mb_substr($strWord, $i, null, "UTF-8");
if (isExsit($str1)) {
//如果字典里存在,返回拆分结果
$left2 = mb_substr($str2, 0, 2, "UTF-8");
//如果第二个部分有双辅音,去掉第一个辅音。因为巴利语中没有以双辅音开头的单词。
if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
$str2 = mb_substr($str2, 1, null, "UTF-8");
}
return array($str1, $str2);
} else {
//补上结尾的a再次查找
$str1 = $str1 . "a";
if (isExsit($str1)) {
$left2 = mb_substr($str2, 0, 2, "UTF-8");
if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
$str2 = mb_substr($str2, 1, null, "UTF-8");
}
return array($str1, $str2);
}
}
}
//如果没找到。将ā变为a后再找。因为两个a复合后会变成ā
if (mb_substr($strWord, 0, 1, "UTF-8") == "ā") {
$strWord = 'a' . mb_substr($strWord, 1, null, "UTF-8");
for ($i = $len - 1; $i > 3; $i--) {
$str1 = mb_substr($strWord, 0, $i, "UTF-8");
$str2 = mb_substr($strWord, $i, null, "UTF-8");
//echo "$str1 + $str2 = ";
if (isExsit($str1)) {
//echo "match";
$left2 = mb_substr($str2, 0, 2, "UTF-8");
if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
$str2 = mb_substr($str2, 1, null, "UTF-8");
}
return array($str1, $str2);
} else {
$str1 = $str1 . "a";
if (isExsit($str1)) {
//echo "match";
$left2 = mb_substr($str2, 0, 2, "UTF-8");
if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
$str2 = mb_substr($str2, 1, null, "UTF-8");
}
return array($str1, $str2);
}
}
}
}
//如果没找到将开头的e变为i再次查找
if (mb_substr($strWord, 0, 1, "UTF-8") == "e") {
$strWord = 'i' . mb_substr($strWord, 1, null, "UTF-8");
for ($i = $len - 1; $i > 3; $i--) {
$str1 = mb_substr($strWord, 0, $i, "UTF-8");
$str2 = mb_substr($strWord, $i, null, "UTF-8");
if (isExsit($str1)) {
//echo "match";
$left2 = mb_substr($str2, 0, 2, "UTF-8");
if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
$str2 = mb_substr($str2, 1, null, "UTF-8");
}
return array($str1, $str2);
} else {
$str1 = $str1 . "a";
if (isExsit($str1)) {
$left2 = mb_substr($str2, 0, 2, "UTF-8");
if (mb_strpos($doubleword, $left2, 0, "UTF-8") !== false) {
$str2 = mb_substr($str2, 1, null, "UTF-8");
}
return array($str1, $str2);
}
}
}
}
}
return (false);
}
function mySplit2($strWord)
{
$output = array();
$len = mb_strlen($strWord, "UTF-8");
if ($len > 2) {
for ($i = $len - 1; $i > 1; $i--) {
foreach ($sandhi as $row) {
if (mb_substr($strWord, $i, $row[3], "UTF-8") == $row[2]) {
$str1 = mb_substr($strWord, 0, $i - 1, "UTF-8") . $row[0];
$str2 = $row[1] . mb_substr($strWord, $i + $row[2], null, "UTF-8");
if (isExsit($str1)) {
array_push($output, array($str1, $str2));
}
}
}
}
}
return ($output);
}
switch ($op) {
case "pre": //预查询
PDO_Connect(_FILE_DB_REF_INDEX_);
echo "