count.php 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. <!DOCTYPE html>
  2. <html>
  3. <head>
  4. </head>
  5. <body>
  6. <p><a href="index.php">Home</a></p>
  7. <?php
  8. //生成巴利语单词统计表
  9. $dirXmlBase="xml/";
  10. $filelist=array();
  11. $fileNums=0;
  12. $log="";
  13. if(($handle=fopen("filelist.csv",'r'))!==FALSE){
  14. while(($filelist[$fileNums]=fgetcsv($handle,0,','))!==FALSE){
  15. $fileNums++;
  16. }
  17. }
  18. $g_paliWordCounter=0;
  19. for($i=0;$i<count($filelist);$i++)
  20. {
  21. $outputFileNameHead=$filelist[$i][1];
  22. $dirXml=$outputFileNameHead."/";
  23. $inputFileName=$dirXmlBase.$dirXml.$outputFileNameHead."_analysis.csv";
  24. echo "doing:[$i] - $inputFileName <br />";
  25. if(($handle=fopen($inputFileName,'r'))!==FALSE){
  26. $iLineNum=0;
  27. while(($data=fgetcsv($handle,0,','))!==FALSE){
  28. if($iLineNum>0){/*skip first line*/
  29. $pali=$data[1];
  30. if(isset($arrAllPaliWordsCount[$pali])){
  31. $arrAllPaliWordsCount[$pali][1]+=$data[2];
  32. }
  33. else{
  34. $arrAllPaliWordsCount[$pali][0]="";
  35. $arrAllPaliWordsCount[$pali][1]=$data[2];
  36. }
  37. $g_paliWordCounter+=$data[2];
  38. }
  39. $iLineNum++;
  40. }
  41. }
  42. else{
  43. echo "open file:".$inputFileName." false<br>";
  44. }
  45. fclose($handle);
  46. //union part
  47. $inputFileName=$dirXmlBase.$dirXml.$outputFileNameHead."_un_part.csv";
  48. if(($handle=fopen($inputFileName,'r'))!==FALSE){
  49. $iLineNum=0;
  50. while(($data=fgetcsv($handle,0,','))!==FALSE){
  51. $pali=$data[0];
  52. if(isset($arrAllPaliWordsCount[$pali])){
  53. }
  54. else{
  55. $arrAllPaliWordsCount[$pali][0]="";
  56. $arrAllPaliWordsCount[$pali][1]=0;
  57. }
  58. $iLineNum++;
  59. }
  60. }
  61. else{
  62. echo "open file:".$inputFileName." false<br>";
  63. }
  64. fclose($handle);
  65. }
  66. $outputfile=$dirXmlBase."all_word.csv";
  67. echo "outputfile:".$outputfile."<br>";
  68. /*Pali单词统计表*/
  69. if(($fp=fopen($outputfile, "w"))!==FALSE){
  70. $wordCountCsvHead=array("编号","拼写","数量","万分比","长度","状态");
  71. fputcsv($fp,$wordCountCsvHead);
  72. $i=0;
  73. $iLastRate=0.0;
  74. foreach($arrAllPaliWordsCount as $x=>$x_value){
  75. $i++;
  76. $csvWord[0]=$i;
  77. $csvWord[1]=$x;
  78. $csvWord[2]=$x_value[1];
  79. if($x_value[1]>0){
  80. $csvWord[3]=$x_value[1]*10000/$g_paliWordCounter;
  81. }
  82. else{
  83. $csvWord[3]=0;
  84. }
  85. $csvWord[4]=mb_strlen($x,"UTF-8");
  86. $csvWord[5]=100;
  87. fputcsv($fp,$csvWord);
  88. }
  89. fclose($fp);
  90. echo "Pali单词表统计导出到:".$outputfile.".csv<br>";
  91. }
  92. else{
  93. echo "can not open csv file. filename="."_count.csv";
  94. }
  95. echo "all done!";
  96. ?>
  97. </body>
  98. </html>