标题: 文件数据分析比较(Rev 02!)大佬有更好的思路请多多指教!!! [打印本页]
作者: Gin_Q 时间: 2020-3-17 11:32 标题: 文件数据分析比较(Rev 02!)大佬有更好的思路请多多指教!!!
本帖最后由 Gin_Q 于 2020-4-10 11:17 编辑
感谢red2020--_--||你们提供的帮助!
3W+ 数据1秒不到搞定,真的不要太快!- #if 0
- By Cool_Breeze
- Rev 02
- #endif
- #include <stdio.h>
- #include <unistd.h>
- #include <stdlib.h>
- #include <string.h>
- #include <stddef.h>
- #include <malloc.h>
- #include <float.h>
- #include <errno.h>
-
- typedef unsigned long long intll;
-
- FILE* fileptr(char* file, char* mode);
- //源文件解析
- void scantext(FILE* fp, intll* inu);
- void getname(FILE* fp, char* name, intll si);
- void getdata(FILE* fp, double* max, double* min, double* avg, double* sum, double* count,\
- intll ns, intll nl, char* max_n, char* min_n);
- //limit文件解析
- void limittext(FILE* fp, intll* inu);
- void getlimitdata(FILE* fp,char* name, char* item, char* comp, double* data);
-
- int compare(char *cmp,double s,double l);
- void output(FILE* fp,int flag,int n,char* name,char* litem,double sdata,char* comp,double ldata);
- #define LINES 1024
- #define NAMESIZE 100
- #define COMPSIZE 3
-
- static char temp[LINES];
- static char const delim[]=",";
- static int ERROR=0;//ERROR退出值,0代表Pass ,非0代表Fail
-
- int main(int argc,char **argv)
- {
- static char usage[NAMESIZE]="-s source.csv\n-l limit.csv\n-r result.csv";
- static char source[NAMESIZE];
- static char limit[NAMESIZE];
- static char result[NAMESIZE];
- register int opt=0;
- while ((opt = getopt(argc,argv,"-s:-l:-r:")) != -1)
- switch (opt)
- {
- case 's': strcpy(source,optarg);break;
- case 'l': strcpy(limit,optarg);break;
- case 'r': strcpy(result,optarg);break;
- default : printf("%s\n",usage);exit(EXIT_SUCCESS);
- }
-
- FILE* fpi=fileptr(source,"rb");
- FILE* lpi=fileptr(limit,"rb");
- FILE* fpo=fileptr(result,"wb");
-
- //开始源文件解析
- static intll inu,isize=NAMESIZE;
- scantext(fpi,&inu);
- char* iname=(char*)calloc(inu,sizeof(char)*isize);
- char* max_n=(char*)calloc(inu,sizeof(char)*isize);
- char* min_n=(char*)calloc(inu,sizeof(char)*isize);
- getname(fpi, iname, isize);
- double* max=(double*)calloc(inu,sizeof(double));
- double* min=(double*)calloc(inu,sizeof(double));
- double* avg=(double*)calloc(inu,sizeof(double));
- double* sum=(double*)calloc(inu,sizeof(double));
- double* count=(double*)calloc(inu,sizeof(double));
- register intll i=0;
- for (i=0;i<inu;i++) *(min+i)=DBL_MAX, *(max+i)=LDBL_MIN;
- getdata(fpi, max, min, avg, sum, count, isize, inu, max_n, min_n);
- //源文件解析完成
-
- //开始limit文件解析
- static intll lnu,lsize=NAMESIZE;
- limittext(lpi,&lnu);
- char* lname=(char*)calloc(lnu,sizeof(char)*lsize);
- char* litem=(char*)calloc(lnu,sizeof(char)*lsize);
- char* comp=(char*)calloc(lnu,sizeof(char)*COMPSIZE);
- double* ldata=(double*)calloc(lnu,sizeof(double)*lsize);
- getlimitdata(lpi, lname, litem, comp, ldata);
- //limit文件解析完成
-
- static const char li_comp[3][10]={{"Max"},{"Min"},{"Avg"}};
-
- register int j=0,flag=1;
- register int k=0;
- fprintf(fpo," Result\r\n");
- fprintf(fpo," Nu, Item, Options, %s, Comp, %s, Result\r\n",source,limit);
- for (i=0;i<lnu;i++)
- for (j=0;j<inu-1;j++)
- if ( ! strcmp(lname+i*NAMESIZE,iname+(j+1)*NAMESIZE) )//匹配项目名 (j+1)跳过第一项数据生成时间项
- {
- for (k=0;k<3;k++) if ( ! strcmp(litem+i*NAMESIZE,li_comp[k]) ) break;
- switch (k)
- {
- case 0: flag=compare(comp+i*COMPSIZE,*(max+j),*(ldata+i));
- output(fpo,flag,(j+1),lname+i*NAMESIZE,litem+i*NAMESIZE,max[j],comp+i*COMPSIZE,ldata[i]);
- break;
- case 1: flag=compare(comp+i*COMPSIZE,*(min+j),*(ldata+i));
- output(fpo,flag,(j+1),lname+i*NAMESIZE,litem+i*NAMESIZE,min[j],comp+i*COMPSIZE,ldata[i]);
- break;
- case 2: flag=compare(comp+i*COMPSIZE,*(avg+j),*(ldata+i));
- output(fpo,flag,(j+1),lname+i*NAMESIZE,litem+i*NAMESIZE,avg[j],comp+i*COMPSIZE,ldata[i]);
- break;
- default : break;
- }
- }
- //输出源文件和limit文件内容
- fprintf(fpo," %s\r\n",limit);
- fprintf(fpo," Nu, Item, Comp_Item, Comp, Data\r\n");
- for (i=0;i<lnu;i++) fprintf(fpo,"%-d, %s, %s, %s,%lf\r\n",i+1,lname+i*NAMESIZE,litem+i*NAMESIZE,comp+i*COMPSIZE,*(ldata+i));
- fprintf(fpo," %s\r\n",source);
- fprintf(fpo," Nu, Item, Count, Max, Max_Date, Min, Min_Date, Sum, Avg\r\n");
- for (i=0;i<inu-1;i++) fprintf(fpo,"%-d, %s, %.0lf, %.6lf, %s, %.6lf, %s, %.6lf, %.6lf\r\n",\
- i+1,iname+(i+1)*isize,*(count+i),*(max+i),max_n+i*isize,*(min+i),min_n+i*isize,*(sum+i),*(avg+i));
-
- free(iname);
- free(max);
- free(min);
- free(avg);
- free(sum);
- free(count);
- free(max_n);
- free(min_n);
- free(lname);
- free(litem);
- free(comp);
- free(ldata);
- fclose(fpi);
- fclose(fpo);
- return ERROR;
- }
- FILE* fileptr(char* file, char* mode)
- {
- FILE* fp = fopen(file,mode);
- if (fp == NULL)
- {
- printf("open file fail!:%s",strerror(errno));
- exit(EXIT_FAILURE);
- }
- return fp;
- }
- void scantext(FILE* fp, intll* inu)
- {
- rewind(fp);
- register intll i=0;//记录项目数
- char *p;
- fgets(temp,LINES,fp);
- p=strtok(temp,delim);
- while(p != NULL)
- {
- i++;
- p=strtok(NULL,delim);
- }
- *inu=i;
- }
- void getname(FILE* fp, char* name, intll si)
- {
- rewind(fp);
- fgets(temp,LINES,fp);
- char* p=strtok(temp,delim);
- register intll i=0,l;
- while (p != NULL)
- {
- strcpy(name+i*si,p);
- p=strtok(NULL,delim);
- i++;
- }
- i--;
- l=strlen(name+i*si);
- while (l--)
- if (*(name+i*si+l) == 0xA || *(name+i*si+l) == 0xD)//最后一列丢弃换行,回车
- *(name+i*si+l) = '\0';
- else break;
- }
- void getdata(FILE* fp, double* max, double* min, double* avg, double* sum, double* count,\
- intll ns, intll nl, char* max_n, char* min_n)
- {
- rewind(fp);
- fgets(temp,LINES,fp);
-
- register double t=0;
- register intll i=0;
- register intll cou=0;
- register intll stl=0;
- char *p=NULL;
- char *pb=NULL;
-
- while (fgets(temp,LINES,fp))
- {
- cou++;
- i=0;
- p=pb=strtok(temp,delim);//按指定字符分割字符串 ,pb指向字符串的第一列数据
- if ( cou == 1 )
- {
- int i;
- stl=strlen(pb);
- for (i=0;i<nl-1;i++)
- {
- memmove(max_n+i*ns,pb,stl);
- *(max_n+i*ns+stl)='\0';
- memmove(min_n+i*ns,pb,stl);
- *(min_n+i*ns+stl)='\0';
- }
- }
- do
- {
- p=strtok(NULL,delim);//遍历每一项数据
- if (p == NULL) break;
- t=atof(p);
- stl=strlen(pb);
- if (t > *(max+i))
- {
- *(max+i) = t;
- memmove(max_n+i*ns,pb,stl);
- *(max_n+i*ns+stl)='\0';
- }
- if (t < *(min+i))
- {
- *(min+i) = t;
- memmove(min_n+i*ns,pb,stl);
- *(min_n+i*ns+stl)='\0';
- }
- *(sum+i)+=t;
- i++;
- }while (1);
- }
- register intll j;
- for (j=0; j<nl-1; j++)//如果最后一行字符串小于标准列数,单独计算平均值和计数器
- {
- if (j < i) *(avg+j)=*(sum+j)/cou,*(count+j)=cou;
- else *(avg+j)=*(sum+j)/(cou-1),*(count+j)=cou-1;
- }
- }
-
- //limit文件解析
- void limittext(FILE* fp, intll* inu)
- {
- rewind(fp);
- register intll i=0;
- while (fgets(temp,LINES,fp))
- i++;//读取行数
- *inu=i;
- }
- void getlimitdata(FILE* fp,char* name, char* item, char* comp, double* data)
- {
- rewind(fp);
- char* p=temp;
- register intll i=0,count=0,psize=0;
- while (fgets(temp,LINES,fp))
- {
- count=0;
- p=strtok(temp,delim);
- do
- {
- if(p == NULL) break;
- psize=strlen(p);//按分割每个符分割字符串
- switch (count)
- {
- case 0: memmove(name+i*NAMESIZE,p,psize);
- *(name+i*NAMESIZE+psize)='\0';break;
- case 1: memmove(item+i*NAMESIZE,p,psize);
- *(item+i*NAMESIZE+psize)='\0';break;
- case 2: memmove(comp+i*COMPSIZE,p,psize);
- *(comp+i*COMPSIZE+psize)='\0';break;
- case 3: *(data+i) = atof(p);break;
- default : break;
- }
- count++;
- } while (p=strtok(NULL,delim));
- i++;
- }
- }
- //数据比较
- int compare(char *cmp,double s,double l)
- {
- if (! strcmp(cmp,"<")) return (s < l)? 1:0;
- else if (! strcmp(cmp,">")) return (s > l)? 1:0;
- else if (! strcmp(cmp,"=")) return (s = l)? 1:0;
- else if (! strcmp(cmp,"<=")) return (s <= l)? 1:0;
- else if (! strcmp(cmp,">=")) return (s >= l)? 1:0;
- }
- //输出
- void output(FILE* fp,int flag,int n,char* name,char* litem,double sdata,char* comp,double ldata)
- {
- if (flag)
- fprintf(fp,"%-d, %s, %s,%.5lf, %s,%.5lf, Pass,\r\n",n,name,litem,sdata,comp,ldata);
- else
- {
- fprintf(fp,"%-d, %s, %s,%.5lf, %s,%.5lf, Fail,\r\n",n,name,litem,sdata,comp,ldata);
- ERROR++;
- }
- }
复制代码
欢迎光临 批处理之家 (http://bathome.net./) |
Powered by Discuz! 7.2 |