2
0

xmlmaker.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. <?php
  2. require_once "install_head.php";
  3. ?>
  4. <!DOCTYPE html>
  5. <html>
  6. <head>
  7. </head>
  8. <body>
  9. <p><a href="index.php">Home</a></p>
  10. <?php
  11. if(isset($_GET["from"])==false){
  12. ?>
  13. <form action="xmlmaker.php" method="get">
  14. From: <input type="text" value="0" name="from"><br>
  15. To: <input type="text" value="216" name="to"><br>
  16. <input type="submit">
  17. </form>
  18. <?php
  19. return;
  20. }
  21. $from=$_GET["from"];
  22. $to=$_GET["to"];
  23. echo "<h2>Doing $from / $to Don't close this window</h2>";
  24. $filelist=array();
  25. $fileNums=0;
  26. $log="";
  27. if(($handle=fopen("filelist.csv",'r'))!==FALSE){
  28. while(($filelist[$fileNums]=fgetcsv($handle,0,','))!==FALSE){
  29. $fileNums++;
  30. }
  31. }
  32. if($to==0 || $to>=$fileNums) $to=$fileNums-1;
  33. $FileName=$filelist[$from][1].".htm";
  34. $fileId=$filelist[$from][0];
  35. $dirLog=_DIR_LOG_."/";
  36. $dirHtml=_DIR_PALI_HTML_."/";
  37. $inputFileName=$FileName;
  38. $outputFileNameHead=$filelist[$from][1];
  39. $bookId=$filelist[$from][2];
  40. $vriParNum=0;
  41. $wordOrder=1;
  42. $dirXmlBase=_DIR_PALI_CSV_."/";
  43. $dirXml=$outputFileNameHead."/";
  44. $currChapter="";
  45. $currParNum="";
  46. $class="";
  47. $arrAllWords[0]=array("id","wid","book","paragraph","word","real","type","gramma","mean","note","part","partmean","bmc","bmt","un","style","vri","sya","si","ka","pi","pa","kam");
  48. $g_wordCounter=0;
  49. $arrUnWords[0]=array("id","word","type","gramma","parent","mean","note","part","partmean","cf","state","delete","tag","len");
  50. $g_unWordCounter=0;
  51. $arrToc[0]=array("id","book","par_num","level","class","title","text");
  52. $g_TocCounter=0;
  53. $arrUnPart[0]="word";
  54. $g_unPartCounter=-1;
  55. /*去掉标点符号的统计*/
  56. $arrAllPaliWordsCount=array();
  57. $g_paliWordCounter=0;
  58. $g_wordCounterInSutta=0;
  59. $g_paliWordCountCounter=0;
  60. if(file_exists($dirHtml.$inputFileName)==false){
  61. die('file ".."not exists...');
  62. }
  63. if(is_dir(_DIR_PALI_CSV_)==FALSE){
  64. if (!mkdir(_DIR_PALI_CSV_)) {
  65. die('Failed to create folders...');
  66. }
  67. }
  68. if(is_dir($dirXmlBase.$dirXml)==FALSE){
  69. if (!mkdir($dirXmlBase.$dirXml)) {
  70. die('Failed to create folders...');
  71. }
  72. }
  73. $parBegin=false;
  74. function getChildNodeValue($array,$attName){
  75. if($array){
  76. foreach($array as $x=>$x_value) {
  77. if($x==$attName){
  78. return $x_value;
  79. }
  80. }
  81. }
  82. return false;
  83. }
  84. //函数在 inWord 字符串中查找 是否有非法的字符。找到返回 FALSE 找不到返回 TRUE
  85. function testPaliWord($inWord){
  86. $paliletter="āīūṅñṭḍṇḷṃṁŋĀĪŪṄÑṬḌṆḶṂṀŊabcdefghijklmnoprstuvyABCDEFGHIJKLMNOPRSTUVY-";
  87. for($i=0;$i<mb_strlen($inWord,"UTF-8");$i++){
  88. if(mb_strpos($paliletter,mb_substr($inWord,$i,1,"UTF-8"))===FALSE){
  89. return FALSE;
  90. }
  91. }
  92. return TRUE;
  93. }
  94. //函数在 inWord 字符串中查找 char_list 中的字符。找到返回true 找不到返回false
  95. function isPaliWord($inWord){
  96. $paliletter="āīūṅñṭḍṇḷṃṁŋĀĪŪṄÑṬḌṆḶṂṀŊabcdefghijklmnoprstuvyABCDEFGHIJKLMNOPRSTUVY";
  97. for($i=0;$i<mb_strlen($paliletter,"UTF-8");$i++){
  98. if(mb_strpos($inWord,mb_substr($paliletter,$i,1,"UTF-8"))!==FALSE){
  99. return TRUE;
  100. }
  101. }
  102. return FALSE;
  103. }
  104. function makeRealWord($inString){
  105. $paliletter="āīūṅñṭḍṇḷṃṁŋabcdefghijklmnoprstuvy";
  106. $lowerWord=mb_strtolower($inString,'UTF-8');
  107. $output="";
  108. for($i=0;$i<mb_strlen($lowerWord,"UTF-8");$i++){
  109. $oneLetter=mb_substr($lowerWord,$i,1,"UTF-8");
  110. if(mb_strstr($paliletter,$oneLetter,'UTF-8')!==FALSE){
  111. $output.=$oneLetter;
  112. }
  113. }
  114. return($output);
  115. }
  116. function getLastWordIndex($iCurr){
  117. for($i=1;$i<5;$i++){
  118. if($GLOBALS['arrAllWords'][$iCurr-$i][5]!=""){
  119. return($iCurr-$i);
  120. }
  121. }
  122. return -1;
  123. }
  124. function splitWords($inStr,$inClass="",$type=0){
  125. $mStr=trim($inStr);
  126. if(strlen($mStr)==0){
  127. return;
  128. }
  129. if($inClass=="#a#"){
  130. $GLOBALS['g_wordCounter']++;
  131. $GLOBALS['wordOrder']++;
  132. /*"id","wid","book","paragraph","word","real","type","gramma","mean","note","part","partmean","bmc","bmt","un",style,"vri","sya","si","ka","pi","pa","kam"*/
  133. $realWord=$inStr;
  134. $word=$inStr;
  135. if($type==0){
  136. $thisParNum=$GLOBALS['vriParNum'];
  137. $thisWordOrder=$GLOBALS['wordOrder'];
  138. }
  139. else{
  140. $thisParNum=$GLOBALS['vriParNum']+1;
  141. $thisWordOrder=1;
  142. echo "<p>out side tag:a $word insert next paragraph $thisParNum</p>";
  143. }
  144. $wordId=$GLOBALS['bookId']."-".$thisParNum."-".$thisWordOrder;
  145. $wordinfo=array($GLOBALS['g_wordCounter'],$wordId,$GLOBALS['bookId'],$thisParNum,$word,$realWord,".ctl.",".a.","?","?","?","?","","","NULL",$inClass,$thisWordOrder,0,0,0,0,0,0);
  146. $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']]=$wordinfo;
  147. return;
  148. }
  149. //toc out put
  150. $GLOBALS['arrToc'][$GLOBALS['g_TocCounter']][6] .= $inStr;
  151. if($GLOBALS['tocOnly']=="on"){
  152. return;
  153. }
  154. $paliletter="āīūṅñṭḍṇḷṃṁŋĀĪŪṄÑṬḌṆḶṂṀŊabcdefghijklmnoprstuvyABCDEFGHIJKLMNOPRSTUVY-";
  155. $mStr=str_replace("‘"," ‘ ",$mStr);
  156. $mStr=str_replace("’"," ’ ",$mStr);
  157. $mStr=str_replace(","," , ",$mStr);
  158. $mStr=str_replace("."," . ",$mStr);
  159. $mStr=str_replace("?"," ? ",$mStr);
  160. $mStr=str_replace("!"," ! ",$mStr);
  161. $mStr=str_replace("["," [ ",$mStr);
  162. $mStr=str_replace("]"," ] ",$mStr);
  163. $mStr=str_replace("("," ( ",$mStr);
  164. $mStr=str_replace(")"," ) ",$mStr);
  165. $mStr=str_replace("…"," … ",$mStr);
  166. $mStr=str_replace("="," = ",$mStr);
  167. $mStr=str_replace("+"," + ",$mStr);
  168. $mStr=str_replace(":"," : ",$mStr);
  169. $mStr=str_replace(";"," ; ",$mStr);
  170. $mStr=str_replace("§"," § ",$mStr);
  171. $mStr=str_replace("`"," ` ",$mStr);
  172. $mStr=str_replace(" "," ",$mStr);
  173. $mStr=str_replace(" "," ",$mStr);
  174. $mStr=str_replace(" "," ",$mStr);
  175. $arrList = mb_split("\s",$mStr);
  176. foreach ($arrList as $word){
  177. if(strlen($word)>0){
  178. $iLastWordIndex=$GLOBALS['g_wordCounter'];
  179. $GLOBALS['g_wordCounter']++;
  180. $GLOBALS['wordOrder']++;
  181. /*"id","wid","book","paragraph","word","real","type","gramma","mean","note","part","partmean","bmc","bmt","un",style,"vri","sya","si","ka","pi","pa","kam"*/
  182. $realWord=makeRealWord($word);
  183. if((mb_substr($realWord,0,3,"UTF-8")=="nti" || mb_substr($realWord,0,5,"UTF-8")=="ntyād" || $realWord=="ntveva" || $realWord=="nteva" )&& $word!="Nti"){
  184. $lastWord=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1];
  185. if($lastWord[5]!=""/* && $lastWord[15]=="bld"*/)//前一个词不是标点符号,是黑体
  186. {
  187. $word=mb_substr($realWord,1);
  188. $realWord="i".$word;
  189. $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]."n";
  190. $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5]."ṃ";
  191. //
  192. $GLOBALS['g_unPartCounter']++;
  193. $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5];
  194. }
  195. else{
  196. $lastWordIndex=getLastWordIndex($GLOBALS['g_wordCounter']-1);
  197. if($lastWordIndex>0){
  198. $word=mb_substr($realWord,1);
  199. $realWord="i".$word;
  200. $GLOBALS['arrAllWords'][$lastWordIndex][4]=$GLOBALS['arrAllWords'][$lastWordIndex][4]."n";
  201. $GLOBALS['arrAllWords'][$lastWordIndex][5]=$GLOBALS['arrAllWords'][$lastWordIndex][5]."ṃ";
  202. $GLOBALS['g_unPartCounter']++;
  203. $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$GLOBALS['arrAllWords'][$lastWordIndex][5];
  204. }
  205. }
  206. }
  207. if($realWord=="ti" || mb_substr($realWord,0,4,"UTF-8")=="tiād"){
  208. $lastWord=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1];
  209. if($lastWord[5]!="")//前一个词不是标点符号,是黑体
  210. {
  211. $strEndofWord=mb_substr($lastWord[5],-1,1,"UTF-8");
  212. if($strEndofWord=="ā" || $strEndofWord=="ī" || $strEndofWord=="ū" ){
  213. switch($strEndofWord){
  214. case 'ā':
  215. $newUnWord=mb_substr($lastWord[5],0,-1,"UTF-8").'a';
  216. break;
  217. case 'ī':
  218. $newUnWord=mb_substr($lastWord[5],0,-1,"UTF-8").'i';
  219. break;
  220. case 'ū':
  221. $newUnWord=mb_substr($lastWord[5],0,-1,"UTF-8").'u';
  222. break;
  223. }
  224. //加入连读词列表
  225. $GLOBALS['g_unWordCounter']++;
  226. $GLOBALS['arrUnWords'][$GLOBALS['g_unWordCounter']]=array("NULL",$lastWord[5].$realWord,".un.","","","","","$newUnWord+i".$realWord,"","","","","",mb_strlen($lastWord[5].$realWord,"UTF-8"));
  227. //加入连读词零件列表
  228. $GLOBALS['g_unPartCounter']++;
  229. $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$newUnWord;
  230. }
  231. //加入连读词列表
  232. $GLOBALS['g_unWordCounter']++;
  233. $GLOBALS['arrUnWords'][$GLOBALS['g_unWordCounter']]=array("NULL",$lastWord[5].$realWord,".un.","","","","",$lastWord[5]."+i".$realWord,"","","","","",mb_strlen($lastWord[5].$realWord,"UTF-8"));
  234. //加入连读词零件列表
  235. $GLOBALS['g_unPartCounter']++;
  236. $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$lastWord[5];
  237. //添加到单词列表
  238. $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][10]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]."+i".$realWord;
  239. $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]="{".$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][4]."}".$word;
  240. $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5]=$GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][5].$realWord;
  241. $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']-1][6]=".un.";
  242. $GLOBALS['g_wordCounter']--;
  243. $word="";
  244. $realWord="";
  245. }
  246. else{//前一个词是标点符号
  247. $lastWordIndex=getLastWordIndex($GLOBALS['g_wordCounter']-1);
  248. if($lastWordIndex>0){
  249. //$word="ti";
  250. $realWord="i{$realWord}";
  251. $strEndofWord=mb_substr($GLOBALS['arrAllWords'][$lastWordIndex][5],-1,1,"UTF-8");
  252. if($strEndofWord=="ā" || $strEndofWord=="ī" || $strEndofWord=="ū" ){
  253. switch($strEndofWord){
  254. case 'ā':
  255. $newUnWord=mb_substr($GLOBALS['arrAllWords'][$lastWordIndex][5],0,-1,"UTF-8").'a';
  256. break;
  257. case 'ī':
  258. $newUnWord=mb_substr($GLOBALS['arrAllWords'][$lastWordIndex][5],0,-1,"UTF-8").'i';
  259. break;
  260. case 'ū':
  261. $newUnWord=mb_substr($GLOBALS['arrAllWords'][$lastWordIndex][5],0,-1,"UTF-8").'u';
  262. break;
  263. }
  264. //加入连读词零件列表
  265. $GLOBALS['g_unPartCounter']++;
  266. $GLOBALS['arrUnPart'][$GLOBALS['g_unPartCounter']]=$newUnWord;
  267. }
  268. }
  269. }
  270. }
  271. if($word!=""){
  272. $wordId=$GLOBALS['class'];//$GLOBALS['bookId']."-".$GLOBALS['vriParNum']."-".$GLOBALS['wordOrder'];
  273. $wordinfo=array($GLOBALS['g_wordCounter'],$wordId,$GLOBALS['bookId'],$GLOBALS['vriParNum'],$word,$realWord,"?","?","?","?","?","?","","","",$inClass,$GLOBALS['wordOrder'],0,0,0,0,0,0);
  274. $GLOBALS['arrAllWords'][$GLOBALS['g_wordCounter']]=$wordinfo;
  275. $lcWord=mb_strtolower($word,'UTF-8');
  276. if(mb_strlen($word,"UTF-8")>1 && isPaliWord($lcWord))
  277. {
  278. //$GLOBALS['arrAllPaliWordsCount'][$lcWord][0]=1;
  279. if(isset($GLOBALS['arrAllPaliWordsCount'][$realWord])){
  280. $GLOBALS['arrAllPaliWordsCount'][$realWord][1]++;
  281. }
  282. else{
  283. $GLOBALS['arrAllPaliWordsCount'][$realWord][1]=1;
  284. $GLOBALS['arrAllPaliWordsCount'][$realWord][2]=mb_strlen($realWord,"UTF-8");
  285. //测试是否有非法字符
  286. if($lcWord!="’ti"){
  287. if(testPaliWord($lcWord)===FALSE){
  288. $errorFileLine = $GLOBALS['from'];
  289. $errorFileName = $GLOBALS['FileName'];
  290. $GLOBALS['log'].="$errorFileLine,$errorFileName,error,char error:,".$word."\r\n";
  291. echo "char error:".$word."<br>";
  292. }
  293. }
  294. }
  295. $GLOBALS['g_paliWordCounter']++;
  296. }
  297. }
  298. }
  299. }
  300. return;
  301. }
  302. $xmlfile = $dirHtml.$inputFileName;
  303. $xmlparser = xml_parser_create();
  304. echo "doing:".$xmlfile."<br>";
  305. // 打开文件并读取数据
  306. $fp = fopen($xmlfile, 'r');
  307. $xmldata = fread($fp,filesize($xmlfile));
  308. xml_parse_into_struct($xmlparser,$xmldata,$values);
  309. xml_parser_free($xmlparser);
  310. $begin = false;
  311. $suttaCount=0;
  312. $output="";
  313. $suttaName="";
  314. $log=$log."$from,$FileName,open\r\n";
  315. foreach ($values as $child)
  316. {
  317. $attributes=getChildNodeValue($child,"attributes");
  318. switch ($child["tag"])
  319. {
  320. case "BODY":
  321. //无法处理的段落块之外的数据 需要手工修改html文件
  322. $parText="";
  323. switch($child["type"]){
  324. case "open":
  325. $parText=getChildNodeValue($child,"value");
  326. break;
  327. case "close":
  328. break;
  329. case "complete":
  330. $parText=getChildNodeValue($child,"value");
  331. break;
  332. case "cdata":
  333. $parText=$child["value"];
  334. break;
  335. default:
  336. echo "无法处理的段落块之外的数据。原因:无法识别的type:";
  337. $log=$log."$from,$FileName,error,无法处理的段落块之外的数据,原因:无法识别的type in body tag\r\n";
  338. break;
  339. }
  340. if(strlen($parText)>1){
  341. echo "段落块之外的数据:"."size".strlen($parText).$parText;
  342. $log=$log. "$from,$FileName,error,无法处理的段落块之外的数据,".$parText."\r\n";
  343. }
  344. break;
  345. case "P":
  346. $class=getChildNodeValue($attributes,"CLASS");
  347. {
  348. switch($child["type"]){
  349. case "open":
  350. $vriParNum++;
  351. $wordOrder=1;
  352. $g_TocCounter++;
  353. $arrToc[$g_TocCounter]=array('NULL',$bookId,$vriParNum,"0",$class,"","");
  354. splitWords(getChildNodeValue($child,"value"));
  355. $parBegin=true;
  356. break;
  357. case "close":
  358. if($parBegin){
  359. $parBegin=false;
  360. }
  361. break;
  362. case "complete":
  363. $vriParNum++;
  364. $wordOrder=1;
  365. $parText=getChildNodeValue($child,"value");
  366. $g_TocCounter++;
  367. $arrToc[$g_TocCounter]=array('NULL',$bookId,$vriParNum,"0",$class,"","");
  368. splitWords($parText);
  369. $parBegin=false;
  370. break;
  371. case "cdata":
  372. splitWords($child["value"]);
  373. break;
  374. default:
  375. echo "无法处理的块P。原因:无法识别的type:";
  376. $log=$log."$from,$FileName,error,无法处理的块P,原因:无法识别的type\r\n";
  377. break;
  378. }
  379. }
  380. break;
  381. case "A":
  382. switch($child["type"]){
  383. case "open":
  384. echo "无法处理的块A。原因:内部有嵌套其他的块<br>";
  385. $log=$log."$from,$FileName,error,无法处理的块A,原因:内部有嵌套其他的块\r\n";
  386. break;
  387. case "close":
  388. break;
  389. case "complete":
  390. $aName=getChildNodeValue($attributes,"NAME");
  391. if($parBegin===false){
  392. splitWords($aName,"#a#",1);
  393. }
  394. else{
  395. splitWords($aName,"#a#");
  396. }
  397. break;
  398. default:
  399. echo "无法处理的块A。原因:无法识别的type:".$child["type"];
  400. $log=$log."$from,$FileName,error,无法处理的块A,原因:无法识别的type:".$child["type"]."\r\n";
  401. break;
  402. }
  403. break;
  404. case "SPAN":
  405. $className="";
  406. $className=getChildNodeValue($attributes,"CLASS");
  407. if($className=="paranum"){
  408. $currParNum=$child["value"];
  409. }
  410. $spanValue=getChildNodeValue($child,"value");
  411. switch($child["type"]){
  412. case "open":
  413. splitWords($child["value"],$className);
  414. break;
  415. case "close":
  416. break;
  417. case "complete":
  418. if($parBegin){
  419. if(strlen($spanValue)>0){
  420. splitWords($child["value"],$className);
  421. }
  422. }
  423. else{
  424. echo "无法处理的块span。原因:该块在段落外<br>";
  425. $log=$log."$from,$FileName,error,无法处理的块span,原因:该块在段落外\r\n";
  426. }
  427. break;
  428. case "cdata":
  429. splitWords($child["value"]);
  430. break;
  431. default:
  432. echo "无法处理的块span。原因:无法识别的type:";
  433. $log=$log. "$from,$FileName,error,无法处理的块span,原因:无法识别的type:\r\n";
  434. }
  435. break;
  436. default:
  437. echo "无法处理的tag:".$child["tag"];
  438. $log=$log. "$from,$FileName,error,无法处理的tag,".$child["tag"]."\r\n";
  439. }
  440. }
  441. $myLogFile = fopen($dirLog."palicanoon.log", "a");
  442. fwrite($myLogFile, $log);
  443. fclose($myLogFile);
  444. //Toc
  445. $counter=0;
  446. if(($fptitle=fopen($dirXmlBase.$dirXml."/".($from+1)."_title.csv", "w")) === FALSE){
  447. echo "error: can not open output file toc .";
  448. }
  449. if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead."_toc.csv", "w"))!==FALSE){
  450. $fpPaliText=fopen($dirXmlBase.$dirXml.$outputFileNameHead."_pali.csv", "w");
  451. foreach($arrToc as $xWord){
  452. $xPali=$xWord;
  453. switch($xWord[4]){
  454. case "book":
  455. $xWord[3]=1;
  456. $xPali[3] = 1;
  457. break;
  458. case "chapter":
  459. $xWord[3]=2;
  460. $xPali[3] = 2;
  461. break;
  462. case "title":
  463. $xWord[3]=3;
  464. $xPali[3] = 3;
  465. break;
  466. case "subhead":
  467. $xWord[3]=4;
  468. $xPali[3] = 4;
  469. break;
  470. case "subsubhead":
  471. $xWord[3]=5;
  472. $xPali[3] = 5;
  473. break;
  474. case "hangnum":
  475. $xWord[3]=8;
  476. $xPali[3] = 8;
  477. break;
  478. default:
  479. $xWord[3]=100;
  480. $xPali[3] = 100;
  481. break;
  482. }
  483. if($xWord[3] < 100){
  484. $xWord[5] = $xWord[6];
  485. }
  486. fputcsv($fpPaliText,$xPali);
  487. fputcsv($fp,$xWord);
  488. fputcsv($fptitle,$xWord);
  489. if($counter>0){
  490. //fputcsv($fpCombinToc,$xWord);
  491. }
  492. $counter++;
  493. }
  494. fclose($fpPaliText);
  495. fclose($fp);
  496. fclose($fptitle);
  497. //fclose($fpCombinToc);
  498. echo "TOC 表导出到:".$dirXmlBase.$dirXml.$outputFileNameHead."_toc.csv<br>";
  499. }
  500. else{
  501. echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead."_toc.csv";
  502. }
  503. /*单词表*/
  504. if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead.".csv", "w"))!==FALSE){
  505. foreach($arrAllWords as $xWord){
  506. fputcsv($fp,$xWord);
  507. }
  508. fclose($fp);
  509. echo "单词表导出到:".$dirXmlBase.$dirXml.$outputFileNameHead.".csv<br>";
  510. }
  511. else{
  512. echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead.".csv";
  513. }
  514. /*union表*/
  515. if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead."_un.csv", "w"))!==FALSE){
  516. foreach($arrUnWords as $xWord){
  517. fputcsv($fp,$xWord);
  518. }
  519. fclose($fp);
  520. echo "union表导出到:".$dirXmlBase.$dirXml.$outputFileNameHead."_un.csv<br>";
  521. }
  522. else{
  523. echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead."_un.csv";
  524. }
  525. /*union part 表*/
  526. if(($fp=fopen($dirXmlBase.$dirXml.$outputFileNameHead."_un_part.csv", "w"))!==FALSE){
  527. foreach($arrUnPart as $xWord){
  528. fwrite($fp,$xWord."\r\n");
  529. }
  530. fclose($fp);
  531. echo "union part 表导出到:".$dirXmlBase.$dirXml.$outputFileNameHead."_un_part.csv<br>";
  532. }
  533. else{
  534. echo "can not open csv file. filename=".$dirXmlBase.$dirXml.$outputFileNameHead."_un_part.csv";
  535. }
  536. /*Pali单词统计表*/
  537. $countCsvFileName=$dirXmlBase.$dirXml.$outputFileNameHead."_analysis.csv";
  538. if(($fp=fopen($countCsvFileName, "w"))!==FALSE){
  539. $wordCountCsvHead=array("编号","词","数量","百分比","长度");
  540. fputcsv($fp,$wordCountCsvHead);
  541. $i=0;
  542. foreach($arrAllPaliWordsCount as $x=>$x_value){
  543. $i++;
  544. $csvWord[0]=$i;
  545. $csvWord[1]=$x;
  546. $csvWord[2]=$x_value[1];
  547. $csvWord[3]=$x_value[1]*10000/$g_paliWordCounter;
  548. $csvWord[4]=$x_value[2];
  549. fputcsv($fp,$csvWord);
  550. }
  551. fclose($fp);
  552. echo "Pali单词表统计导出到:".$countCsvFileName."<br>";
  553. }
  554. else{
  555. echo "can not open csv file. filename=".$countCsvFileName."<br>";
  556. }
  557. ?>
  558. <?php
  559. if($from>=$to){
  560. echo "<h2>齐活!功德无量!all done!</h2>";
  561. }
  562. else{
  563. echo "<script>";
  564. echo "window.location.assign(\"xmlmaker.php?from=".($from+1)."&to=".$to."\")";
  565. echo "</script>";
  566. echo "正在载入:".($from+1)."——".$filelist[$from+1][0];
  567. }
  568. ?>
  569. </body>
  570. </html>