I threw together this C program today to handle a bioinformatics data processing task. The program seems to work correctly, but I wanted to know if anyone has suggestions regarding how the input data are parsed and how I've used control structures in the main processing loop.
#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define BUFFERSIZE 1024 FILE *fileopen(const char *filename, const char *mode) { FILE *fh = fopen(filename, mode); if(fh == NULL) { fprintf(stderr, "error: unable to open file '%s'\n", filename); exit(1); } return fh; } void *memalloc(size_t size) { void *memory = malloc(size); if(memory == NULL) { fprintf(stderr, "error: unable to allocate memory\n"); exit(1); } return memory; } int main(int argc, const char **argv) { // Parse command line arguments char seqbuffer1[BUFFERSIZE]; char seqbuffer2[BUFFERSIZE]; char qualbuffer1[BUFFERSIZE]; char qualbuffer2[BUFFERSIZE]; char *seqbufferout; FILE *seqinfile; FILE *seqoutfile; FILE *qualinfile; FILE *qualoutfile; int seqlength; if(argc != 6) { fprintf(stderr, "error: 5 arguments required, %d provided\n", argc - 1); exit(1); } seqinfile = fileopen(argv[1], "r"); qualinfile = fileopen(argv[2], "r"); seqoutfile = fileopen(argv[3], "w"); qualoutfile = fileopen(argv[4], "w"); seqlength = atoi(argv[5]); assert(seqlength > 0); // Process reads seqbufferout = (char *)memalloc( sizeof(char) * (seqlength + 1) ); seqbufferout[seqlength] = '\0'; while(fgets(seqbuffer1, BUFFERSIZE, seqinfile)) { if(strlen(seqbuffer1) > 0) { int ambiguous; int i; assert(strncmp(seqbuffer1, ">", 1) == 0); if(fgets(seqbuffer2, BUFFERSIZE, seqinfile) == NULL) { fprintf(stderr, "error: file ends with a fasta header\n"); exit(1); } if( fgets(qualbuffer1, BUFFERSIZE, qualinfile) == NULL || fgets(qualbuffer2, BUFFERSIZE, qualinfile) == NULL ) { fprintf(stderr, "error: no quality entry corresponding to sequence '%s'\n", seqbuffer1); exit(1); } ambiguous = 0; for(i = 0; i < seqlength; i++) { if(seqbuffer2[i] == '.') { ambiguous = 1; break; } seqbufferout[i] = seqbuffer2[i]; } if(!ambiguous) { fprintf(seqoutfile, "%s%s\n", seqbuffer1, seqbufferout); fputs(qualbuffer1, qualoutfile); i = 0; char *tok = strtok(qualbuffer2, " "); while(tok != NULL && i < seqlength - 1) { if(i > 0) fputs(" ", qualoutfile); fputs(tok, qualoutfile); tok = strtok(NULL, " "); i++; } fputs("\n", qualoutfile); } } } fclose(seqinfile); fclose(qualinfile); fclose(seqoutfile); fclose(qualoutfile); free(seqbufferout); return 0; }