首页 > 其他 > 详细

好吧 老师作业 晚交好久

时间:2014-03-17 03:43:23      阅读:470      评论:0      收藏:0      [点我收藏+]
 
分析一个文本(英文文章)(300k500k)中的词出现的频率,并且把频率最高的10个词打印出来
 
 
 
 
import java.io.*;
public class  Account_num
{
    public static void main(String args[])
    {
        int member[][],max=0,mid=0; //member记录前10个单词的位置
        int i,j,k,m,n;
        int all=0,num[],num1=0,num2[],b=0,a=0,tmp11,tmp22;//num记录每层上单词的个数
        member=new int[10][3];
        char tmp1,tmp2,temp[]; //tmp用于从文件读取的中介,temp用于暂时记录每个单词
        temp=new char[20];
        for(i=0;i<20;i++)
        temp[i]=‘\0‘;
        String words[][][];//words用于记录所有的单词;
        try{
        FileInputStream f1=new FileInputStream("E:/Software_eng/a.txt");
        tmp11=f1.read();
        tmp22=f1.read();
        while(tmp11!=-1)      //第一遍读取,用于计算出总单词的个数
        {tmp1=(char)tmp11;
        tmp2=(char)tmp22;
        ///////////////
            if(((tmp1>=‘a‘)&&(tmp1<=‘z‘))||((tmp1>=‘A‘)&&(tmp1<=‘Z‘)))
            {
                mid++;
                if((((tmp1>=‘a‘)&&(tmp1<=‘z‘))||((tmp1>=‘A‘)&&(tmp1<=‘Z‘)))&&!(((tmp2>=‘a‘)&&(tmp2<=‘z‘))||((tmp2>=‘A‘)&&(tmp2<=‘Z‘))))
                {
                    all++;
                    if(mid>max)  
                    {
                        max=mid;
                    }   
                mid=0;
                }
                  
            }
            tmp11=tmp22;
            tmp22=f1.read();
        }
        f1.close();}
        catch(IOException ie){
              System.out.println(ie);
              }
        num=new int[max];
        num2=new int[max];
        for(i=0;i<max;i++)
        {
            num2[i]=0;
            num[i]=0;
        }
        words=new String[max][][];
        try{
            FileInputStream f2=new FileInputStream("E:/Software_eng/a.txt");
            tmp11=f2.read();
            tmp22=f2.read();
            mid=0;
        while(tmp11!=-1)                      //第二次读取用于计算每一层上的单词的个数
        {
            tmp1=(char)tmp11;
            tmp2=(char)tmp22;
            if(((tmp1>=‘a‘)&&(tmp1<=‘z‘))||((tmp1>=‘A‘)&&(tmp1<=‘Z‘)))
            {
                    mid++;                //用于记录中间每个单词的长度
            }
            if((((tmp1>=‘a‘)&&(tmp1<=‘z‘))||((tmp1>=‘A‘)&&(tmp1<=‘Z‘)))&&!(((tmp2>=‘a‘)&&(tmp2<=‘z‘))||((tmp2>=‘A‘)&&(tmp2<=‘Z‘))))
            {
                 num[mid-1]++;  //相应的记录每层的长度的一维数组加一
                 mid=0;     //一个单词结束,记录长度的数置0
            }
            tmp11=tmp22;
            tmp22=f2.read();    
        }
        f2.close();
            }
        catch(IOException ie){
              System.out.println(ie);
              }
        for(i=0;i<max;i++)
        {   num1=num[i];
            num2[i]=num[i];               //num2数组用于记录每层上的数组的个数,以便于后面的使用
            words[i]=new String[num1][2];//对每一层上的二维数组进行空间的申请
        }
        for(i=0;i<max;i++)
        {
            for(j=0;j<num[i];j++)
            {
                words[i][j][0]="1";   //用于记录该单词的个数, 
                words[i][j][1]=null;
            }
        }
        try{
        FileInputStream f3=new FileInputStream("E:/Software_eng/a.txt");
        tmp11=f3.read();
        tmp22=f3.read();
        mid=0
            //第三次读取用于吧所有的单词存储到申请好的空间里
        while(tmp11!=-1)         
        {tmp1=(char)tmp11;
        tmp2=(char)tmp22;
            if((((tmp1>=‘a‘)&&(tmp1<=‘z‘))||((tmp1>=‘A‘)&&(tmp1<=‘Z‘))))
            {
                    temp[mid]=tmp1;    //用于暂时保存读到的单词
                    mid++;
            }
            if((((tmp1>=‘a‘)&&(tmp1<=‘z‘))||((tmp1>=‘A‘)&&(tmp1<=‘Z‘)))&&!(((tmp2>=‘a‘)&&(tmp2<=‘z‘))||((tmp2>=‘A‘)&&(tmp2<=‘Z‘))))
            {
                num1=num[mid-1]-1;     //
                num[mid-1]=num1;
                words[mid-1][num1][1]=new String(temp);//将读到的单词放入相应的层地相应位置
                words[mid-1][num1][1]=words[mid-1][num1][1].toLowerCase();//将单词转化为小写,便于后面的匹配
                for(;mid>0;mid--)
                    temp[mid-1]=‘\0‘;//将数组重新置为空
                 mid=0;     
            }
            tmp11=tmp22;
            tmp22=f3.read();    
        }
        f3.close();
        }
        catch(IOException ie){
      System.out.println(ie);
      }
        /////////
      for(i=0;i<10;i++)
      {
        for(j=0;j<3;j++)
        {member[i][j]=0;//将记录出现次数最多单词位置的数组全置为0,初始化。
        }
      }
      int mid1=0;
        for(i=0;i<max;i++)
        {
            for(k=0;k<num2[i];k++)
            {mid=0;
            if(!(words[i][k][0]=="0"))
            {       
                for(j=k;j<num2[i];j++)
                {
                    if(words[i][k][1].compareTo(words[i][j][1])==0)
                    {   
                        mid++;  
                        if(mid!=1)                //如果遇到相等的单词就个数加一
                        words[i][j][0]=""+0+"";
                    }
                }
                words[i][k][0]=""+mid+"";
                if(mid>member[9][2])          //如果当前单词的出现次数大于已储存的
                {                             //最小的单词次数,就提换掉最小的,
                    member[9][0]=i;           //在此声明member数组是用于存储当前出现次数
                    member[9][1]=k;            //最多的单词在三维数组中的位置。
                    member[9][2]=mid;          //顺续是按从大到小的顺序排的
                    for(n=0;n<9;n++)           //然后是循环找到当前出现次数在已记录的
                    {                          //次数中的位置
                        if(member[9-n][2]>member[9-n-1][2])
                        {
                            m=member[9-n][0];
                            member[9-n][0]=member[9-n-1][0];
                            member[9-n-1][0]=m;
                            m=member[9-n][1];
                            member[9-n][1]=member[9-n-1][1];
                            member[9-n-1][1]=m;
                            m=member[9-n][2];
                            member[9-n][2]=member[9-n-1][2];
                            member[9-n-1][2]=m;
                        }
                    }
                }
                ///////////////////////////
            }
        }
        }
        System.out.println(member[0][2]);
        System.out.println("出现频率最高的10个单词为:");
        for(i=0;i<10;i++)
        {   
            a=member[i][0];                       //根据记录的位置输出单词
            b=member[i][1];
            System.out.println(a);
            System.out.println(b);
            System.out.println(words[a][b][1]);
        }
    }
}

好吧 老师作业 晚交好久,布布扣,bubuko.com

好吧 老师作业 晚交好久

原文:http://www.cnblogs.com/shaoh98521/p/3603941.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!