Răsfoiți Sursa

Merge branch 'master' into master

Bhikkhu-Kosalla 5 ani în urmă
părinte
comite
98ebe995c2

+ 208 - 0
app/pali_sent/pali_sent.php

@@ -28,6 +28,214 @@ if(isset($_GET["id"])){
 
 global $PDO;
 PDO_Connect("sqlite:"._FILE_DB_PALI_SENTENCE_);
+
+if(isset($_GET["sent"])){
+	$_sent=mb_strtolower($_GET["sent"],'UTF-8');
+}
+
+// 输入一个句子,输出整个句子的单词 array
+function words_of_sentence(string $sent) {
+	$words = preg_split("/[ \.\[\]\{\}\-,';‘’–0123456789]+/", $sent); // 去除标点、数字
+	$words = array_filter($words);  // 去除空词
+	$words = array_filter($words, function($item) { 
+		if ($item != 'ca' && $item != 'vā' && $item != 'na') return $item; }); // 去除 ca, vā 和 na
+	return $words;
+}
+
+// 采用 jaccard 相似度,考虑到圣典中的相似句单词、句式都是非常接近的
+function jaccard_similarity($words_of_sent1, $words_of_sent2) {
+	$intersect = count(array_intersect($words_of_sent1, $words_of_sent2));
+	$union = count($words_of_sent1)+count($words_of_sent2)-$intersect;
+	if ($union) {
+		return $intersect / $union;
+	} else {
+		return 0;
+	}
+}
+
+// 带顺序的 jaccard 算法,当前效果一般,TODO: 切片相同时加入得分
+function ordered_jaccard_similarity($words_of_sent1, $words_of_sent2) {
+	$score = 0;
+	$k = min(count($words_of_sent1), count($words_of_sent2));
+	for ($i=1; $i<$k; $i++) {
+		$score += jaccard_similarity(
+			array_slice($words_of_sent1, 0, $i),
+			array_slice($words_of_sent2, 0, $i));
+	}
+	return $score / $k;
+}
+
+// 定义一个链表节点,方便 sim_sent_id 按照相似度插入
+class sim_sent_node {
+	public $id;
+	public $jaccard_score;
+	public $next;
+
+	public function __construct($id = null, $jaccard_score = null, $next = null) {
+		$this->id = $id;
+		$this->jaccard_score = $jaccard_score;
+		$this->next = $next;
+	}
+}
+
+// 定义链表
+class sim_sent_list {
+	public $head;    // 头节点,默认一个虚头节点
+	public $size;
+
+	public function __construct() {
+		$this->head = new sim_sent_node();
+		$this->size = 0;
+	}
+
+	// 按照 jaccard_score 相似度插入
+	public function jaccard_add($id, $jaccard_score) {
+		$prev = $this->head;
+		while ($prev->next != null && $prev->next->jaccard_score > $jaccard_score) {
+			$prev = $prev->next;
+		}
+		$prev->next = new sim_sent_node($id, $jaccard_score, $prev->next);
+		$this->size++;
+	}
+
+	public function get_text_list() {
+		$prev = $this->head;
+		if ($this->size == 0) {
+			return;
+		}
+
+		$ids = "";
+		while ($prev->next != null) {
+			$ids = $ids.",".$prev->next->id;
+			$prev = $prev->next;
+		}
+		$ids = substr($ids, 1); // 去掉第一个逗号
+		return $ids;
+	}
+
+	public function print_list() {
+		$prev = $this->head;
+		if ($this->size == 0) {
+			return;
+		}
+		while ($prev->next != null) {
+			print($prev->next->id."\t".$prev->next->jaccard_score."\n");
+			$prev = $prev->next;
+		}
+	}
+}
+
+// 将相似句列表存入数据库
+function insert_similar_sent_list_into_sqlite($current_id, $text_list) {
+	/* 使用这部分代码先为数据库添加一个 sim_sents 字段
+	$add_column = "ALTER TABLE pali_sent ADD COLUMN sim_sents TEXT";
+	$Action_add = PDO_Execute($add_column);
+	$query = "PRAGMA TABLE_INFO (pali_sent)";
+	$Fetch = PDO_FetchALL($query);
+	print_r($Fetch);
+	*/
+	global $PDO;
+	$Update = "UPDATE pali_sent SET sim_sents = ".$PDO->quote($text_list)." WHERE id = ".$current_id;
+	$Result = PDO_Execute($Update);
+	return;
+}
+
+// 预计算,存入数据库
+function similar_sent_matrix() {
+	// 按照 count = 18, 8, ..., 255 依次获得查询结果 (i-3,i+3)
+	//          count = 17,16,...,7                                       (i-2,i+2)
+	for ($current_count=17; $current_count > 7; $current_count--) { 
+		print("单词数:".$current_count."\n");
+		$current_query = "select id,text from pali_sent where count=".$current_count;
+		$Current = PDO_FetchAll($current_query);
+		if (count($Current)) {
+			foreach($Current as $current_row) {
+				$current_id = $current_row['id'];
+				$current_sent = $current_row['text'];
+				$current_words = words_of_sentence($current_sent);
+				
+				// 按照 count > $current_count-3 and count <$current_count+3 查询希望比较的语句
+				$compare_query = "select id,text from pali_sent where count>".($current_count-2)." and count<".($current_count+2);
+				$Compare = PDO_FetchALL($compare_query);
+
+				$current_sim_sent_list = new sim_sent_list(); // 新建相似句链表
+				foreach($Compare as $compare_row) {
+					if ($current_row != $compare_row) {
+						$compare_id = $compare_row['id'];
+						$compare_sent = $compare_row['text'];
+						$compare_words = words_of_sentence($compare_sent);
+
+						$jaccard_score = jaccard_similarity($current_words, $compare_words);
+						if ($jaccard_score > 0.3) {
+							$current_sim_sent_list->jaccard_add($compare_id, $jaccard_score);
+						}
+					}
+				} // end of foreach $compare_row
+
+				if ($current_sim_sent_list->size != 0) {
+					print("update ".$current_id."\n");
+					$text_list = $current_sim_sent_list->get_text_list();
+					insert_similar_sent_list_into_sqlite($current_id, $text_list);
+				}
+			} // end of foreach $current_row
+		}
+	}
+	return;
+}
+
+// 实时计算相似句
+function sents_similar_to_id($id) {
+	$query = "SELECT count,text FROM pali_sent WHERE id=".$id;
+	$Current = PDO_FetchALL($query);
+	if (count($Current)) {
+		foreach($Current as $current_row) {
+			$current_count = $current_row['count'];
+			$current_sent = $current_row['text'];
+			$current_words = words_of_sentence($current_sent);
+			print("current text: \n".$current_sent."\n");
+
+			if ($current_count <= 5) {
+				print("[-] too short.\n");
+				return;
+			}
+
+			// 只和单词数大于 5 的比较
+			$compare_query = "SELECT id,text FROM pali_sent WHERE count>5";
+			$Compare = PDO_FetchALL($compare_query);
+
+			$current_sim_sent_list = new sim_sent_list(); // 新建相似句链表
+			foreach($Compare as $compare_row) {
+				if ($current_row != $compare_row) {
+					$compare_id = $compare_row['id'];
+					$compare_sent = $compare_row['text'];
+					$compare_words = words_of_sentence($compare_sent);
+
+					$jaccard_score = jaccard_similarity($current_words, $compare_words);
+					if ($jaccard_score > 0.3) {
+						print("Jaccard similarity: ".$jaccard_score."\tSentence id:".$compare_id."\n");
+						print("Text: \n". $compare_sent."\n");
+						$current_sim_sent_list->jaccard_add($compare_id, $jaccard_score);
+					}
+				}
+			} // end of foreach $compare_row
+
+			if ($current_sim_sent_list->size != 0) {
+				// $current_sim_sent_list->print_list();
+			} else {
+				print("[-]not found.\n");
+			}
+
+		} // end of foreach($Current)
+	} // end of if (count($Current))
+}
+
+$id = $argv[1];
+sents_similar_to_id($id);
+
+if (!isset($_op)) {
+	exit(0);
+}
+
 switch($_op){
 	case "get":
 	{

+ 1 - 1
app/studio/dictadmin/user/pla.php

@@ -5827,4 +5827,4 @@ function checkVersion(installed,url)
 {var xhr=createCORSRequest('GET',url);if(!xhr)
 return false;xhr.onload=function()
 {if(xhr.responseText.split("\n").indexOf(installed)==-1)
-{document.getElementById('oldVersion').style.display='inline';}};xhr.send();}AAABAAEAEBAAAAEAIAAoBAAAFgAAACgAAAAQAAAAIAAAAAEAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADwoKZQAAABMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEMDJMAAAAdAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASDg7BAAAASAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAETDw9CCQkJ1QUFBb4AAABjAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgsLVgAAAO8YExP/AAAA7QAAALEAAAARAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQNDUsAAADJGBMT/xgTE/8AAAD/AAAAuAAAAA8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZERE8DQoKwhgTE/8QDQ2sGBMT/xgTE/8AAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwICHkAAAD/EQwMzQAAAMIAAAD/AAAA7gAAAGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOCwtWAAAA8RgTE/8IBQW1AAAA/wAAAP8AAADlAAAAHQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA0KCsEAAAD/EQ8PzAAAAMkAAAD/AAAA/wAAAHoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABDAAAA/xgTE/8TDw/FAAAA8gAAAP8AAADqAAAAJgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAALUAAAD/GBMT/wAAALkAAAD/AAAA/wAAAIkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAfAAAA4wAAAP8GBgbFAAAA2QAAAP8AAADGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF4AAAD5AAAA/RgTE/8AAAD/AAAA0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQQAAAOIAAAD/AAAA/wAAAHYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjAAAArgAAAG4AAAAAAAAAAAAAAAAAAAAA
+{document.getElementById('oldVersion').style.display='inline';}};xhr.send();}AAABAAEAEBAAAAEAIAAoBAAAFgAAACgAAAAQAAAAIAAAAAEAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADwoKZQAAABMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEMDJMAAAAdAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASDg7BAAAASAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAETDw9CCQkJ1QUFBb4AAABjAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgsLVgAAAO8YExP/AAAA7QAAALEAAAARAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQNDUsAAADJGBMT/xgTE/8AAAD/AAAAuAAAAA8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZERE8DQoKwhgTE/8QDQ2sGBMT/xgTE/8AAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwICHkAAAD/EQwMzQAAAMIAAAD/AAAA7gAAAGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOCwtWAAAA8RgTE/8IBQW1AAAA/wAAAP8AAADlAAAAHQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA0KCsEAAAD/EQ8PzAAAAMkAAAD/AAAA/wAAAHoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABDAAAA/xgTE/8TDw/FAAAA8gAAAP8AAADqAAAAJgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAALUAAAD/GBMT/wAAALkAAAD/AAAA/wAAAIkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAfAAAA4wAAAP8GBgbFAAAA2QAAAP8AAADGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF4AAAD5AAAA/RgTE/8AAAD/AAAA0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQQAAAOIAAAD/AAAA/wAAAHYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjAAAArgAAAG4AAAAAAAAAAAAAAAAAAAAA

+ 1 - 1
app/studio/editor.php

@@ -378,7 +378,7 @@ foreach($plugin_list as $info){
 					</button> 
 				
 				<!--Save-->
-				<button id="B_Save" class="icon_btn" onclick="editor_save()" type="button" title="<?php echo $module_gui_str['editor']['1017'];?>">
+				<button id="B_Save" class="icon_btn" onclick="editor_save()" type="button" style="display:none;" title="<?php echo $module_gui_str['editor']['1017'];?>">
 						<svg class="icon">
 							<use xlink:href="svg/icon.svg#ic_save"></use>
 						</svg>

+ 1 - 1
documents/users_guide/zh-cn/grammar_abbr.md

@@ -36,4 +36,4 @@
 | 后缀     | suffix    | (-ika, -ana, -tā)            |
 | 语尾     | case end  | vibhatti (-ena, -ehi)        |
 | 动原型   | v:base    | karo(ti) gaccha(ti)          |
-| 原版注释 | note      | sī sya                       |
+| 原版注释 | note      | sī sya                       |