C语言实现读取CSV文件的方法详解
Brickie-liu 人气:01、每一次只能读取同一种数据类型,不能读取字符串。
2、第次读取会返回一个 CSV数据结构,有源数据和二维数组,行列数信息
3、可以转换二维数组,但总大小不能变
123.csv
Month,"CO2 (ppm) mauna loa, 1965-1980",,,CO2 (ppm) mauna loa Jan-65,319.32,1,,, Feb-65,320.36,2,,, Mar-65,320.82,3,,, Apr-65,322.06,4,,, May-65,322.17,5,,, Jun-65,321.95,6,,,
头文件 read_csv_data.h
#include <stdio.h> #include <memory.h> // for memset #include <stdlib.h> // for malloc, free #include <string.h> // for strtok //fgets函数读取的最大字节数 #define MAX_LINE_SIZE 1024 //字符串分割结构 struct str_split { int count; char *str; char **str_array; }; //可变数据类型 typedef union dtype_u{ long int s32; char s8; short int s16; unsigned char u8; unsigned short int u16; unsigned long int u32; float f32; double f64; } Dtype; //CSV文件结构 struct csv_s{ Dtype **darray; //二维数组 int drow; int dcol; int dnum; //总有多少个 Dtype data[]; }; enum{ S8, S16, S32, U8, U16, U32, F32, F64, DEF = S32 }; /* 释放CSV文件结构,使用方法,查看testFile() */ void FreeCsvData(struct csv_s **csv_ptr); /* 对CSV数据进行二维转换,生成新的二维数据。使用方法,查看testFile() */ int setNewArray( struct csv_s *tt, int row, int col); /* 读取CSV文件数据,生成CSV结构。使用方法,查看testFile() csvFilePath:文件路径 delimiter: 分割符 skiprows: 跳过头N行 ColumnList:选把指定列,从0列开始, dclass: 数据类型 */ struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass);
源码 read_csv_data.c
#include "read_csv_data.h" int str_split_func(struct str_split *split, char * src, char delimiter) { int count = 0; char *pchar, **ptr; if ( NULL != split ) { memset(split, 0, sizeof(struct str_split)); } if(NULL == split || NULL == src || src[0] == '\0') { return 0; } split->str = strdup(src); if(NULL == split->str) { return 0; } count = 1; pchar = src; while('\0' != *pchar) { if (delimiter == *pchar) { count++; } pchar++; } split->str_array = (char **)malloc(count*sizeof(char*)); if(NULL == split->str_array) { return 0; } split->count = count; ptr = split->str_array; *ptr = split->str; pchar = split->str; while('\0' != *pchar && count > 1) { if (delimiter == *pchar) { ptr++; *ptr = pchar+1; *pchar = '\0'; count--; } pchar++; } return 0; } int str_split_free(struct str_split *split) { if(split == NULL) { return 0; } if(split->str!=NULL) { free(split->str); split->str=NULL; } if(split->str_array != NULL) { free(split->str_array); split->str_array=NULL; } return 0; } //计算csv文件中的总大小(字节) int GetTotalSize(FILE * fp) { if(fp==NULL){ return -1; } fseek(fp,0,SEEK_END); return ftell(fp); } //计算csv文件中的总行数 int GetTotalLineCount(FILE * fp) { int i = 0; char strLine[MAX_LINE_SIZE]; fseek(fp,0,SEEK_SET); while (fgets(strLine, MAX_LINE_SIZE, fp)) i++; fseek(fp,0,SEEK_SET); return i; } //计算csv文件中的总列数(以第一行的列数为基准) int GetTotalColCount(FILE * fp, char delimiter) { int i = 0; char strLine[MAX_LINE_SIZE]; struct str_split tss; fseek(fp,0,SEEK_SET); char *str=NULL; str = fgets(strLine, MAX_LINE_SIZE, fp); if (str) { str_split_func(&tss, strLine, delimiter); i = tss.count; str_split_free(&tss); } return i; } // 通过指针*giCsvData给数据动态分配内存空间 int strToData(Dtype *d, char *str, int dtpye) { switch(dtpye){ case S8: *((char *)d) =(char) strtol(str,NULL,10); break; case S16: *((short int *)d) =(short int) strtol(str,NULL,10); break; case S32: *((long int *)d) =(long int) strtol(str,NULL,10); break; case U8: *((unsigned char *)d) =(unsigned char ) strtol(str,NULL,10); break; case U16: *((unsigned short int *)d) =(unsigned short int) strtol(str,NULL,10); break; case U32: *((unsigned long int *)d) =(unsigned long int) strtol(str,NULL,10); break; case F32: *((float *)d) =(float) strtod(str,NULL); break; case F64: *((double *)d) =(double) strtod(str,NULL); break; default: printf("读取数据类型不对\n"); return -1; } return 0; } // 对CSV结构中的二维数组进行转换 int setNewArray( struct csv_s *tt, int row, int col) { int i; //CSV结构不NULL if(tt == NULL)return -1; //新的行列总个数,与旧的数据个数不等 if(row * col != tt->dnum)return -2; Dtype **darray=(Dtype **)malloc( sizeof(Dtype *[row]) ); //分配内存空间失败,出错的可能性不大 if(!darray)return -3; if(tt->darray)free(tt->darray); tt->drow = row; tt->dcol = col; tt->dnum = row * col; tt->darray = darray; for(i=0; i<row; i++){ tt->darray[i]=(Dtype *)(tt->data+i*col); } return 0; } // 释放动态数据内存 void FreeCsvData(struct csv_s **csv_ptr) { if(*csv_ptr){ if((*csv_ptr)->darray) free((*csv_ptr)->darray); free(*csv_ptr); } *csv_ptr=NULL; return ; } // 从csv文件中读取数据 struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass) { /* delimiter:分割符 dtype:数据类型,多个数据类型使用逗号分割,从第一列到最后一列开始一一对应。默认其他的是float 类型。 例:有5列 dtype=“str, str, str” 则前三列为string类型,后两列为float类型 skiprows:跳过开头N行 ColumnList:取N列数据。 例:"2,1,3",取第 2,1,3列数据返回 */ FILE* fCsv=NULL; int rowTotal=0; int colTotal=0; struct str_split tss; struct csv_s *temp_csv=NULL; int cur_ptr=0; char strLine[MAX_LINE_SIZE]; int i; int j; //获取指定列 int t_column_list=0; int *t_column_list_ptr=NULL; if(access(csvFilePath, 0)<0){ printf("%s 文件不存在\n", csvFilePath); goto label_error; } // 打开文件 fCsv = fopen( csvFilePath, "r" ); if( fCsv==NULL ) { printf("open file %s failed",csvFilePath); goto label_error; } rowTotal = GetTotalLineCount(fCsv); colTotal = GetTotalColCount(fCsv, delimiter); //获取指定列 if(ColumnList==NULL){ t_column_list=colTotal; t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list); for(i=0;i<tss.count;i++){ t_column_list_ptr[i]=i; } }else{ str_split_func(&tss, ColumnList, ','); t_column_list = tss.count; t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list); for(i=0;i<tss.count;i++){ t_column_list_ptr[i]=strtol(tss.str_array[i], NULL, 10); } str_split_free(&tss); //检查参数ColumnList if (t_column_list>colTotal){ printf("[%s] 超过最大列数 %d\n", ColumnList, colTotal); goto label_error; } for(i=0; i<t_column_list; i++){ if(t_column_list_ptr[i] > colTotal){ printf("[%s] 超过最大列数 %d\n", ColumnList, colTotal); goto label_error; } } } //检查是否超过,最大列数 if(rowTotal <1 || colTotal <1 || skiprows>rowTotal){ printf("数据不对,有%d行,%d列\n", rowTotal, colTotal); goto label_error; } //分配内在空间 rowTotal = rowTotal - skiprows; temp_csv = (struct csv_s *)malloc(sizeof(struct csv_s) + (rowTotal * t_column_list)*sizeof(Dtype)); if(!temp_csv){ printf("分配内存失败 \n"); goto label_error; } // 读取数据 i = skiprows; fseek(fCsv, 0, SEEK_SET); //跳过开头N行 while(i>0 && fgets(strLine,MAX_LINE_SIZE,fCsv) )i--; i = rowTotal; while( i>=0 && fgets(strLine,MAX_LINE_SIZE,fCsv)){ //printf("%d %s\n", i, strLine); i--; str_split_func(&tss, strLine, delimiter); if(t_column_list > tss.count){ printf("read error\n"); goto label_error; } int t_res = 0; for(j=0; j<t_column_list; j++){ t_res = strToData(temp_csv->data+cur_ptr, tss.str_array[t_column_list_ptr[j]], dclass); if(t_res < 0){ printf("str to data error\n"); goto label_error; } cur_ptr++; } str_split_free(&tss); } temp_csv->drow = rowTotal; temp_csv->dcol = t_column_list; temp_csv->dnum = rowTotal * t_column_list; //printf("%d %d %d\n", i, rowTotal, t_column_list); //for(i=0;i<temp_csv->dnum; i++) // printf("%d ", temp_csv->data[i].s32); //temp_csv->dtype = dtype; temp_csv->darray=NULL; setNewArray(temp_csv,temp_csv->drow, temp_csv->dcol); // 关闭文件 fclose(fCsv); free(t_column_list_ptr); return temp_csv; label_error: if(fCsv)fclose(fCsv); if(temp_csv)free(temp_csv); if(t_column_list_ptr)free(t_column_list_ptr); return NULL; } int testData() { int size=1000; //分配内存空间 struct csv_s *stdata=(struct csv_s *)malloc(sizeof(struct csv_s) + size*sizeof(Dtype)); int i; //初始化数据 for(i=900; i<size; i++){ // *((long int *)(stdata->data+i)) = i; *((double *)(stdata->data+i)) = i*1.0; } //随机测试 初始化数据 *((double *)(stdata->data+1)) = 10*1.0; printf("%f asdfasfasdf\n", *((double *)(stdata->data+1))); //初始化结构 int row = 500; int col = 2; stdata->drow= row; stdata->dcol = col; //stdata->dtype = sizeof(Dtype); stdata->dnum = row * col; int j=0; j = setNewArray(stdata,row, col); if(j<0){ printf("setNewArray %d error\n", j); return 0; } //显示结构数据 printf("显示定义结构最后一行数据 \n"); for(j=0; j<col; j++){ printf("%f\n", stdata->darray[row-1][j].f64); } //结构转换 row = 100; col = 10; j = setNewArray(stdata,row, col); if(j<0){ printf("setNewArray 转换 %d error\n", j); return 0; } printf("显示结构转换最后一行数据 \n"); //显示结构数据 for(j=0; j<col; j++){ printf("%f\n", stdata->darray[row-1][j].f64); } return 0; } //测试浮点数 void testFile() { struct csv_s *stdata; //stdata = ReadCsvData("123.csv", ',', 1, "1,2", F32); stdata = ReadCsvData("123.csv", ',', 1, "2,1", F32); printf("testFile row %d col %d\n", stdata->drow, stdata->dcol); int i, j; //单独以Dtype显示数据,是一个二维数组,以stdata->drow, stdata->dcol为行,列:Dtype show[stdata->drow][stdata->dcol] Dtype ** show = stdata->darray; printf("显示最后一行数据 \n"); for(j=0; j<stdata->dcol; j++){ printf("%f\n", show[stdata->drow-1][j].f32); } //结构转换 int row = 3; int col = 4; j = setNewArray(stdata,row, col); if(j<0){ printf("setNewArray 转换 %d error\n", j); }else{ //显示结构数据 printf("显示结构转换最后一行数据 \n"); for(j=0; j<col; j++){ printf("%f\n", stdata->darray[row-1][j].f32); } } printf("转换失败测试\n"); //结构转换 row = 3; col = 2; j = setNewArray(stdata,row, col); if(j<0){ printf("setNewArray 转换 %d error\n", j); } //释放内存 FreeCsvData(&stdata); if(stdata==NULL) printf("ok t is null\n"); else printf("ok t is not null\n"); } //测试整数类型 void testFile2() { struct csv_s *stdata; //stdata = ReadCsvData("123.csv", ',', 1, "1,2", F32); stdata = ReadCsvData("123.csv", ',', 1, "2,1", DEF); printf("testFile row %d col %d\n", stdata->drow, stdata->dcol); int i, j; //单独以Dtype显示数据,是一个二维数组,以stdata->drow, stdata->dcol为行,列:Dtype show[stdata->drow][stdata->dcol] //注意:这个默认long int 数据类型 Dtype ** show = stdata->darray; printf("显示最后一行数据 \n"); for(j=0; j<stdata->dcol; j++){ printf("%d\n", show[stdata->drow-1][j]); } //结构转换 int row = 3; int col = 4; j = setNewArray(stdata,row, col); if(j<0){ printf("setNewArray 转换 %d error\n", j); }else{ //显示结构数据 printf("显示结构转换最后一行数据 \n"); for(j=0; j<col; j++){ printf("%d\n", stdata->darray[row-1][j]); } } printf("转换失败测试\n"); //结构转换 row = 3; col = 2; j = setNewArray(stdata,row, col); if(j<0){ printf("setNewArray 转换 %d error\n", j); } //释放内存 FreeCsvData(&stdata); if(stdata==NULL) printf("ok t is null\n"); else printf("ok t is not null\n"); } int main(int args, char *argv) { // testData(); testFile(); testFile2(); return 0; }
加载全部内容