| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643 |
- <?php
- require_once "install_head.php";
- ?>
- <!DOCTYPE html>
- <html>
- <head>
- </head>
- <body>
- <p><a href="index.php">Home</a></p>
- <?php
- if(isset($_GET["from"])==false){
- ?>
- <form action="xmlmaker.php" method="get">
- From: <input type="text" value="0" name="from"><br>
- To: <input type="text" value="216" name="to"><br>
- <input type="submit">
- </form>
- <?php
- return;
- }
- $from=$_GET["from"];
- $to=$_GET["to"];
- echo "<h2>Doing $from / $to Don't close this window</h2>";
- $filelist=array();
- $fileNums=0;
- $log="";
- if(($handle=fopen("filelist.csv",'r'))!==FALSE){
- while(($filelist[$fileNums]=fgetcsv($handle,0,','))!==FALSE){
- $fileNums++;
- }
- }
- if($to==0 || $to>=$fileNums) $to=$fileNums-1;
- $FileName=$filelist[$from][1].".htm";
- $fileId=$filelist[$from][0];
- $dirLog=_DIR_LOG_."/";
- $dirHtml=_DIR_PALI_HTML_."/";
- $inputFileName=$FileName;
- $outputFileNameHead=$filelist[$from][1];
- $bookId=$filelist[$from][2];
- $vriParNum=0;
- $wordOrder=1;
- $dirXmlBase=_DIR_PALI_CSV_."/";
- $dirXml=$outputFileNameHead."/";
- $currChapter="";
- $currParNum="";
- $class="";
- $arrAllWords[0]=array("id","wid","book","paragraph","word","real","type","gramma","mean","note","part","partmean","bmc","bmt","un","style","vri","sya","si","ka","pi","pa","kam");
- $g_wordCounter=0;
- $arrUnWords[0]=array("id","word","type","gramma","parent","mean","note","part","partmean","cf","state","delete","tag","len");
- $g_unWordCounter=0;
- $arrToc[0]=array("id","book","par_num","level","class","title","text");
- $g_TocCounter=0;
- $arrUnPart[0]="word";
- $g_unPartCounter=-1;
- /*去掉标点符号的统计*/
- $arrAllPaliWordsCount=array();
- $g_paliWordCounter=0;
- $g_wordCounterInSutta=0;
- $g_paliWordCountCounter=0;
- if(file_exists($dirHtml.$inputFileName)==false){
- die('file ".."not exists...');
- }
- if(is_dir(_DIR_PALI_CSV_)==FALSE){
- if (!mkdir(_DIR_PALI_CSV_)) {
- die('Failed to create folders...');
- }
- }
- if(is_dir($dirXmlBase.$dirXml)==FALSE){
- if (!mkdir($dirXmlBase.$dirXml)) {
- die('Failed to create folders...');
- }
- }
- $parBegin=false;
- function getChildNodeValue($array,$attName){
- if($array){
- foreach($array as $x=>$x_value) {
- if($x==$attName){
- return $x_value;
- }
- }
- }
- return false;
- }
- //函数在 inWord 字符串中查找 是否有非法的字符。找到返回 FALSE 找不到返回 TRUE
- function testPaliWord($inWord){
- $paliletter="āīūṅñṭḍṇḷṃṁŋĀĪŪṄÑṬḌṆḶṂṀŊabcdefghijklmnoprstuvyABCDEFGHIJKLMNOPRSTUVY-";
- for($i=0;$i<mb_strlen($inWord,"UTF-8");$i++){
- if(mb_strpos($paliletter,mb_substr($inWord,$i,1,"UTF-8"))===FALSE){
- return FALSE;
- }
- }
- return TRUE;
- }
- //函数在 inWord 字符串中查找 char_list 中的字符。找到返回true 找不到返回false
- function isPaliWord($inWord){
- $paliletter="āīūṅñṭḍṇḷṃṁŋĀĪŪṄÑṬḌṆḶṂṀŊabcdefghijklmnoprstuvyABCDEFGHIJKLMNOPRSTUVY";
- for($i=0;$i<mb_strlen($paliletter,"UTF-8");$i++){
- if(mb_strpos($inWord,mb_substr($paliletter,$i,1,"UTF-8"))!==FALSE){
- return TRUE;
- }
- }
- return FALSE;
- }
- function makeRealWord($inString){
- $paliletter="āīūṅñṭḍṇḷṃṁŋabcdefghijklmnoprstuvy";
- $lowerWord=mb_strtolower($inString,'UTF-8');
- $output="";
- for($i=0;$i<mb_strlen($lowerWord,"UTF-8");$i++){
- $oneLetter=mb_substr($lowerWord,$i,1,"UTF-8");
- if(mb_strstr($paliletter,$oneLetter,'UTF-8')!==FALSE){
- $output.=$oneLetter;
- }
- }
- return($output);
- }
- function getLastWordIndex($iCurr){
- for($i=1;$i<5;$i++){
- if($GLOBALS['arrAllWords'][$iCurr-$i][5]!=""){
- return($iCurr-$i);
- }
- }
- return -1;
- }
- function splitWords($inStr,$inClass="",$type=0){
-
- $mStr=trim($inStr);
- if(strlen($mStr)==0){
- return;
- }
- if($inClass=="#a#"){
- $GLOBALS['g_wordCounter']++;
- $GLOBALS['wordOrder']++;
- /*"id","wid","book","paragraph","word","real","type","gramma","mean","note","part","partmean","bmc","bmt","un",style,"vri","sya","si","ka","pi","pa","kam"*/
- $realWord=$inStr;
- $word=$inStr;
- if($type==0){
- $thisParNum=$GLOBALS['vriParNum'];
- $thisWordOrder=$GLOBALS['wordOrder'];
- }
- else{
- $thisParNum=$GLOBALS['vriParNum']+1;
- $thisWordOrder=1;
- echo "<p>out side tag:a $word insert next paragraph $thisParNum</p>";
- }
- $wordId=$GLOBALS['bookId']."-".$thisParNum."-".$thisWordOrder;
- $wordinfo=array($GLOBALS['g_wordCounter'],$wordId,$GLOBALS['bookId'],$thisParNum,$word,$realWord,".ctl.",".a.","?","?","?","?","","","NULL",$inClass,$thisWordOrder,0,0,0,0,0,0);
- $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']]=$wordinfo;
- return;
- }
-
- //toc out put
- $GLOBALS['arrToc'][$GLOBALS['g_TocCounter']][6] .= $inStr;
-
- if($GLOBALS['tocOnly']=="on"){
- return;
- }
-
- $paliletter="āīūṅñṭḍṇḷṃṁŋĀĪŪṄÑṬḌṆḶṂṀŊabcdefghijklmnoprstuvyABCDEFGHIJKLMNOPRSTUVY-";
- $mStr=str_replace("‘"," ‘ ",$mStr);
- $mStr=str_replace("’"," ’ ",$mStr);
- $mStr=str_replace(","," , ",$mStr);
- $mStr=str_replace("."," . ",$mStr);
- $mStr=str_replace("?"," ? ",$mStr);
- $mStr=str_replace("!"," ! ",$mStr);
- $mStr=str_replace("["," [ ",$mStr);
- $mStr=str_replace("]"," ] ",$mStr);
- $mStr=str_replace("("," ( ",$mStr);
- $mStr=str_replace(")"," ) ",$mStr);
- $mStr=str_replace("…"," … ",$mStr);
- $mStr=str_replace("="," = ",$mStr);
- $mStr=str_replace("+"," + ",$mStr);
- $mStr=str_replace(":"," : ",$mStr);
- $mStr=str_replace(";"," ; ",$mStr);
- $mStr=str_replace("§"," § ",$mStr);
- $mStr=str_replace("`"," ` ",$mStr);
- $mStr=str_replace(" "," ",$mStr);
- $mStr=str_replace(" "," ",$mStr);
- $mStr=str_replace(" "," ",$mStr);
- $arrList = mb_split("\s",$mStr);
- foreach ($arrList as $word){
- if(strlen($word)>0){
- $iLastWordIndex=$GLOBALS['g_wordCounter'];
- $GLOBALS['g_wordCounter']++;
- $GLOBALS['wordOrder']++;
- /*"id","wid","book","paragraph","word","real","type","gramma","mean","note","part","partmean","bmc","bmt","un",style,"vri","sya","si","ka","pi","pa","kam"*/
- $realWord=makeRealWord($word);
- if((mb_substr($realWord,0,3,"UTF-8")=="nti" || mb_substr($realWord,0,5,"UTF-8")=="ntyād" || $realWord=="ntveva" || $realWord=="nteva" )&& $word!="Nti"){
- $lastWord=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1];
- if($lastWord[5]!=""/* && $lastWord[15]=="bld"*/)//前一个词不是标点符号,是黑体
- {
- $word=mb_substr($realWord,1);
- $realWord="i".$word;
-
- $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]."n";
- $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5]."ṃ";
- //
- $GLOBALS['g_unPartCounter']++;
- $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5];
-
- }
- else{
- $lastWordIndex=getLastWordIndex($GLOBALS['g_wordCounter']-1);
- if($lastWordIndex>0){
- $word=mb_substr($realWord,1);
- $realWord="i".$word;
- $GLOBALS['arrAllWords'][$lastWordIndex][4]=$GLOBALS['arrAllWords'][$lastWordIndex][4]."n";
- $GLOBALS['arrAllWords'][$lastWordIndex][5]=$GLOBALS['arrAllWords'][$lastWordIndex][5]."ṃ";
-
- $GLOBALS['g_unPartCounter']++;
- $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$GLOBALS['arrAllWords'][$lastWordIndex][5];
-
- }
- }
- }
-
- if($realWord=="ti" || mb_substr($realWord,0,4,"UTF-8")=="tiād"){
- $lastWord=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1];
- if($lastWord[5]!="")//前一个词不是标点符号,是黑体
- {
-
- $strEndofWord=mb_substr($lastWord[5],-1,1,"UTF-8");
- if($strEndofWord=="ā" || $strEndofWord=="ī" || $strEndofWord=="ū" ){
- switch($strEndofWord){
- case 'ā':
- $newUnWord=mb_substr($lastWord[5],0,-1,"UTF-8").'a';
- break;
- case 'ī':
- $newUnWord=mb_substr($lastWord[5],0,-1,"UTF-8").'i';
- break;
- case 'ū':
- $newUnWord=mb_substr($lastWord[5],0,-1,"UTF-8").'u';
- break;
- }
- //加入连读词列表
- $GLOBALS['g_unWordCounter']++;
- $GLOBALS['arrUnWords'][$GLOBALS['g_unWordCounter']]=array("NULL",$lastWord[5].$realWord,".un.","","","","","$newUnWord+i".$realWord,"","","","","",mb_strlen($lastWord[5].$realWord,"UTF-8"));
-
- //加入连读词零件列表
- $GLOBALS['g_unPartCounter']++;
- $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$newUnWord;
- }
- //加入连读词列表
- $GLOBALS['g_unWordCounter']++;
- $GLOBALS['arrUnWords'][$GLOBALS['g_unWordCounter']]=array("NULL",$lastWord[5].$realWord,".un.","","","","",$lastWord[5]."+i".$realWord,"","","","","",mb_strlen($lastWord[5].$realWord,"UTF-8"));
- //加入连读词零件列表
- $GLOBALS['g_unPartCounter']++;
- $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$lastWord[5];
-
- //添加到单词列表
- $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][10]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]."+i".$realWord;
- $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]="{".$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]."}".$word;
- $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5].$realWord;
- $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][6]=".un.";
- $GLOBALS['g_wordCounter']--;
-
- $word="";
- $realWord="";
- }
- else{//前一个词是标点符号
- $lastWordIndex=getLastWordIndex($GLOBALS['g_wordCounter']-1);
- if($lastWordIndex>0){
- //$word="ti";
- $realWord="i{$realWord}";
-
- $strEndofWord=mb_substr($GLOBALS['arrAllWords'][$lastWordIndex][5],-1,1,"UTF-8");
- if($strEndofWord=="ā" || $strEndofWord=="ī" || $strEndofWord=="ū" ){
- switch($strEndofWord){
- case 'ā':
- $newUnWord=mb_substr($GLOBALS['arrAllWords'][$lastWordIndex][5],0,-1,"UTF-8").'a';
- break;
- case 'ī':
- $newUnWord=mb_substr($GLOBALS['arrAllWords'][$lastWordIndex][5],0,-1,"UTF-8").'i';
- break;
- case 'ū':
- $newUnWord=mb_substr($GLOBALS['arrAllWords'][$lastWordIndex][5],0,-1,"UTF-8").'u';
- break;
- }
- //加入连读词零件列表
- $GLOBALS['g_unPartCounter']++;
- $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$newUnWord;
- }
- }
- }
- }
-
- if($word!=""){
- $wordId=$GLOBALS['class'];//$GLOBALS['bookId']."-".$GLOBALS['vriParNum']."-".$GLOBALS['wordOrder'];
- $wordinfo=array($GLOBALS['g_wordCounter'],$wordId,$GLOBALS['bookId'],$GLOBALS['vriParNum'],$word,$realWord,"?","?","?","?","?","?","","","",$inClass,$GLOBALS['wordOrder'],0,0,0,0,0,0);
- $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']]=$wordinfo;
-
- $lcWord=mb_strtolower($word,'UTF-8');
- if(mb_strlen($word,"UTF-8")>1 && isPaliWord($lcWord))
- {
- //$GLOBALS['arrAllPaliWordsCount'][$lcWord][0]=1;
- if(isset($GLOBALS['arrAllPaliWordsCount'][$realWord])){
- $GLOBALS['arrAllPaliWordsCount'][$realWord][1]++;
- }
- else{
- $GLOBALS['arrAllPaliWordsCount'][$realWord][1]=1;
- $GLOBALS['arrAllPaliWordsCount'][$realWord][2]=mb_strlen($realWord,"UTF-8");
- //测试是否有非法字符
- if($lcWord!="’ti"){
- if(testPaliWord($lcWord)===FALSE){
- $errorFileLine = $GLOBALS['from'];
- $errorFileName = $GLOBALS['FileName'];
- $GLOBALS['log'].="$errorFileLine,$errorFileName,error,char error:,".$word."\r\n";
- echo "char error:".$word."<br>";
- }
- }
- }
- $GLOBALS['g_paliWordCounter']++;
- }
- }
- }
- }
- return;
- }
- $xmlfile = $dirHtml.$inputFileName;
- $xmlparser = xml_parser_create();
- echo "doing:".$xmlfile."<br>";
- // 打开文件并读取数据
- $fp = fopen($xmlfile, 'r');
- $xmldata = fread($fp,filesize($xmlfile));
- xml_parse_into_struct($xmlparser,$xmldata,$values);
- xml_parser_free($xmlparser);
- $begin = false;
- $suttaCount=0;
- $output="";
- $suttaName="";
- $log=$log."$from,$FileName,open\r\n";
- foreach ($values as $child)
- {
- $attributes=getChildNodeValue($child,"attributes");
- switch ($child["tag"])
- {
- case "BODY":
- //无法处理的段落块之外的数据 需要手工修改html文件
- $parText="";
- switch($child["type"]){
- case "open":
- $parText=getChildNodeValue($child,"value");
- break;
- case "close":
- break;
- case "complete":
- $parText=getChildNodeValue($child,"value");
- break;
- case "cdata":
- $parText=$child["value"];
- break;
- default:
- echo "无法处理的段落块之外的数据。原因:无法识别的type:";
- $log=$log."$from,$FileName,error,无法处理的段落块之外的数据,原因:无法识别的type in body tag\r\n";
- break;
- }
- if(strlen($parText)>1){
- echo "段落块之外的数据:"."size".strlen($parText).$parText;
- $log=$log. "$from,$FileName,error,无法处理的段落块之外的数据,".$parText."\r\n";
- }
- break;
- case "P":
- $class=getChildNodeValue($attributes,"CLASS");
- {
- switch($child["type"]){
- case "open":
- $vriParNum++;
- $wordOrder=1;
- $g_TocCounter++;
- $arrToc[$g_TocCounter]=array('NULL',$bookId,$vriParNum,"0",$class,"","");
- splitWords(getChildNodeValue($child,"value"));
- $parBegin=true;
- break;
- case "close":
- if($parBegin){
- $parBegin=false;
- }
- break;
- case "complete":
- $vriParNum++;
- $wordOrder=1;
- $parText=getChildNodeValue($child,"value");
- $g_TocCounter++;
- $arrToc[$g_TocCounter]=array('NULL',$bookId,$vriParNum,"0",$class,"","");
- splitWords($parText);
- $parBegin=false;
- break;
- case "cdata":
- splitWords($child["value"]);
- break;
- default:
- echo "无法处理的块P。原因:无法识别的type:";
- $log=$log."$from,$FileName,error,无法处理的块P,原因:无法识别的type\r\n";
- break;
- }
- }
- break;
- case "A":
- switch($child["type"]){
- case "open":
- echo "无法处理的块A。原因:内部有嵌套其他的块<br>";
- $log=$log."$from,$FileName,error,无法处理的块A,原因:内部有嵌套其他的块\r\n";
- break;
- case "close":
- break;
- case "complete":
- $aName=getChildNodeValue($attributes,"NAME");
- if($parBegin===false){
- splitWords($aName,"#a#",1);
- }
- else{
- splitWords($aName,"#a#");
- }
- break;
- default:
- echo "无法处理的块A。原因:无法识别的type:".$child["type"];
- $log=$log."$from,$FileName,error,无法处理的块A,原因:无法识别的type:".$child["type"]."\r\n";
- break;
- }
- break;
- case "SPAN":
- $className="";
- $className=getChildNodeValue($attributes,"CLASS");
- if($className=="paranum"){
- $currParNum=$child["value"];
- }
- $spanValue=getChildNodeValue($child,"value");
- switch($child["type"]){
- case "open":
- splitWords($child["value"],$className);
- break;
- case "close":
- break;
- case "complete":
- if($parBegin){
- if(strlen($spanValue)>0){
- splitWords($child["value"],$className);
- }
- }
- else{
- echo "无法处理的块span。原因:该块在段落外<br>";
- $log=$log."$from,$FileName,error,无法处理的块span,原因:该块在段落外\r\n";
- }
- break;
- case "cdata":
- splitWords($child["value"]);
- break;
- default:
- echo "无法处理的块span。原因:无法识别的type:";
- $log=$log. "$from,$FileName,error,无法处理的块span,原因:无法识别的type:\r\n";
- }
- break;
- default:
- echo "无法处理的tag:".$child["tag"];
- $log=$log. "$from,$FileName,error,无法处理的tag,".$child["tag"]."\r\n";
- }
- }
- $myLogFile = fopen($dirLog."palicanoon.log", "a");
- fwrite($myLogFile, $log);
- fclose($myLogFile);
-
- //Toc
- $counter=0;
- if(($fptitle=fopen($dirXmlBase.$dirXml."/".($from+1)."_title.csv", "w")) === FALSE){
- echo "error: can not open output file toc .";
- }
- if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead."_toc.csv", "w"))!==FALSE){
- $fpPaliText=fopen($dirXmlBase.$dirXml.$outputFileNameHead."_pali.csv", "w");
- foreach($arrToc as $xWord){
- $xPali=$xWord;
- switch($xWord[4]){
- case "book":
- $xWord[3]=1;
- $xPali[3] = 1;
- break;
- case "chapter":
- $xWord[3]=2;
- $xPali[3] = 2;
- break;
- case "title":
- $xWord[3]=3;
- $xPali[3] = 3;
- break;
- case "subhead":
- $xWord[3]=4;
- $xPali[3] = 4;
- break;
- case "subsubhead":
- $xWord[3]=5;
- $xPali[3] = 5;
- break;
- case "hangnum":
- $xWord[3]=8;
- $xPali[3] = 8;
- break;
- default:
- $xWord[3]=100;
- $xPali[3] = 100;
- break;
- }
-
- if($xWord[3] < 100){
- $xWord[5] = $xWord[6];
- }
-
-
- fputcsv($fpPaliText,$xPali);
- fputcsv($fp,$xWord);
- fputcsv($fptitle,$xWord);
- if($counter>0){
- //fputcsv($fpCombinToc,$xWord);
- }
-
- $counter++;
- }
- fclose($fpPaliText);
- fclose($fp);
- fclose($fptitle);
- //fclose($fpCombinToc);
- echo "TOC 表导出到:".$dirXmlBase.$dirXml.$outputFileNameHead."_toc.csv<br>";
- }
- else{
- echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead."_toc.csv";
- }
- /*单词表*/
- if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead.".csv", "w"))!==FALSE){
- foreach($arrAllWords as $xWord){
- fputcsv($fp,$xWord);
- }
- fclose($fp);
- echo "单词表导出到:".$dirXmlBase.$dirXml.$outputFileNameHead.".csv<br>";
- }
- else{
- echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead.".csv";
- }
- /*union表*/
- if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead."_un.csv", "w"))!==FALSE){
- foreach($arrUnWords as $xWord){
- fputcsv($fp,$xWord);
- }
- fclose($fp);
- echo "union表导出到:".$dirXmlBase.$dirXml.$outputFileNameHead."_un.csv<br>";
- }
- else{
- echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead."_un.csv";
- }
- /*union part 表*/
- if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead."_un_part.csv", "w"))!==FALSE){
- foreach($arrUnPart as $xWord){
- fwrite($fp,$xWord."\r\n");
- }
- fclose($fp);
- echo "union part 表导出到:".$dirXmlBase.$dirXml.$outputFileNameHead."_un_part.csv<br>";
- }
- else{
- echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead."_un_part.csv";
- }
- /*Pali单词统计表*/
- $countCsvFileName=$dirXmlBase.$dirXml.$outputFileNameHead."_analysis.csv";
- if(($fp=fopen($countCsvFileName, "w"))!==FALSE){
- $wordCountCsvHead=array("编号","词","数量","百分比","长度");
-
- fputcsv($fp,$wordCountCsvHead);
- $i=0;
- foreach($arrAllPaliWordsCount as $x=>$x_value){
- $i++;
- $csvWord[0]=$i;
- $csvWord[1]=$x;
- $csvWord[2]=$x_value[1];
- $csvWord[3]=$x_value[1]*10000/$g_paliWordCounter;
- $csvWord[4]=$x_value[2];
- fputcsv($fp,$csvWord);
- }
- fclose($fp);
- echo "Pali单词表统计导出到:".$countCsvFileName."<br>";
- }
- else{
- echo "can not open csv file. filename=".$countCsvFileName."<br>";
- }
-
- ?>
- <?php
- if($from>=$to){
- echo "<h2>齐活!功德无量!all done!</h2>";
- }
- else{
- echo "<script>";
- echo "window.location.assign(\"xmlmaker.php?from=".($from+1)."&to=".$to."\")";
- echo "</script>";
- echo "正在载入:".($from+1)."——".$filelist[$from+1][0];
- }
- ?>
- </body>
- </html>
|