Skip to content

Commit 5b1cfc6

Browse files
authored
feat: the -M option now specifies the output type (reads, variants, or both) (#85)
1 parent 0e8b8a7 commit 5b1cfc6

File tree

3 files changed

+46
-32
lines changed

3 files changed

+46
-32
lines changed

src/dwgsim.c

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ void dwgsim_core(dwgsim_opt_t * opt)
498498
contigs = NULL;
499499
}
500500

501-
if(0 == opt->muts_only) {
501+
if(opt->output_type != OUTPUT_TYPE_MUTS) {
502502
fprintf(stderr, "[dwgsim_core] Currently on: \n0");
503503
}
504504
else {
@@ -508,8 +508,8 @@ void dwgsim_core(dwgsim_opt_t * opt)
508508
while ((l = seq_read_fasta(opt->fp_fa, &seq, name, 0)) >= 0) {
509509
int64_t n_pairs = 0;
510510
n_ref--;
511-
512-
if(1 == opt->muts_only) {
511+
512+
if(opt->output_type == OUTPUT_TYPE_MUTS) {
513513
fprintf(stderr, "\r[dwgsim_core] Currently on: %s", name);
514514
if(name_len_max < strlen(name)) {
515515
name_len_max = strlen(name);
@@ -616,9 +616,11 @@ void dwgsim_core(dwgsim_opt_t * opt)
616616
// generate mutations and print them out
617617
mutseq[0] = mutseq_init(); mutseq[1] = mutseq_init();
618618
mut_diref(opt, &seq, mutseq[0], mutseq[1], contig_i, muts_input);
619-
mut_print(name, &seq, mutseq[0], mutseq[1], opt->fp_mut, opt->fp_vcf);
619+
if(opt->output_type != OUTPUT_TYPE_READS) {
620+
mut_print(name, &seq, mutseq[0], mutseq[1], opt->fp_mut, opt->fp_vcf);
621+
}
620622

621-
if(0 == opt->muts_only) {
623+
if(opt->output_type != OUTPUT_TYPE_MUTS) {
622624
int num_failed = 0;
623625
for (ii = 0; ii != n_pairs; ++ii, ++ctr) { // the core loop
624626
if(0 == (ctr % 10000)) {
@@ -1113,16 +1115,18 @@ int main(int argc, char *argv[])
11131115
opt->fp_fa = xopen(argv[optind+0], "r");
11141116
snprintf(fn_fai, sizeof(fn_fai), "%s.fai", argv[optind+0]);
11151117
opt->fp_fai = fopen(fn_fai, "r"); // NB: depends on returning NULL;
1116-
snprintf(fn_tmp, sizeof(fn_tmp), "%s.mutations.txt", argv[optind+1]);
1117-
opt->fp_mut = xopen(fn_tmp, "w");
1118-
snprintf(fn_tmp, sizeof(fn_tmp), "%s.mutations.vcf", argv[optind+1]);
1119-
opt->fp_vcf = xopen(fn_tmp, "w");
1120-
if(0 == opt->muts_only) {
1121-
if (opt->output_type != OUTPUT_TYPE_BWA) {
1118+
if(opt->output_type != OUTPUT_TYPE_READS) {
1119+
snprintf(fn_tmp, sizeof(fn_tmp), "%s.mutations.txt", argv[optind+1]);
1120+
opt->fp_mut = xopen(fn_tmp, "w");
1121+
snprintf(fn_tmp, sizeof(fn_tmp), "%s.mutations.vcf", argv[optind+1]);
1122+
opt->fp_vcf = xopen(fn_tmp, "w");
1123+
}
1124+
if(opt->output_type != OUTPUT_TYPE_MUTS) {
1125+
if (opt->reads_output_type != READS_OUTPUT_TYPE_BWA) {
11221126
snprintf(fn_tmp, sizeof(fn_tmp), "%s.bfast.fastq.gz", argv[optind+1]);
11231127
opt->fp_bfast = gzopen(fn_tmp, "w");
11241128
}
1125-
if (opt->output_type != OUTPUT_TYPE_BFAST) {
1129+
if (opt->reads_output_type != READS_OUTPUT_TYPE_BFAST) {
11261130
snprintf(fn_tmp, sizeof(fn_tmp), "%s.bwa.read1.fastq.gz", argv[optind+1]);
11271131
opt->fp_bwa1 = gzopen(fn_tmp, "w");
11281132
snprintf(fn_tmp, sizeof(fn_tmp), "%s.bwa.read2.fastq.gz", argv[optind+1]);
@@ -1134,18 +1138,20 @@ int main(int argc, char *argv[])
11341138
dwgsim_core(opt);
11351139

11361140
// Close files
1137-
if(0 == opt->muts_only) {
1138-
if (opt->output_type != OUTPUT_TYPE_BWA) {
1139-
gzclose(opt->fp_bfast);
1141+
if(NULL != opt->fp_fai) fclose(opt->fp_fai);
1142+
if(opt->output_type != OUTPUT_TYPE_READS) {
1143+
fclose(opt->fp_mut);
1144+
fclose(opt->fp_vcf);
1145+
}
1146+
if(opt->output_type != OUTPUT_TYPE_MUTS) {
1147+
if (opt->reads_output_type != READS_OUTPUT_TYPE_BWA) {
1148+
gzclose(opt->fp_bfast);
11401149
}
1141-
if (opt->output_type != OUTPUT_TYPE_BFAST) {
1142-
gzclose(opt->fp_bwa1); gzclose(opt->fp_bwa2);
1150+
if (opt->reads_output_type != READS_OUTPUT_TYPE_BFAST) {
1151+
gzclose(opt->fp_bwa1); gzclose(opt->fp_bwa2);
11431152
}
1144-
fclose(opt->fp_fa);
1153+
fclose(opt->fp_fa);
11451154
}
1146-
if(NULL != opt->fp_fai) fclose(opt->fp_fai);
1147-
fclose(opt->fp_mut);
1148-
fclose(opt->fp_vcf);
11491155

11501156
dwgsim_opt_destroy(opt);
11511157

src/dwgsim_opt.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ dwgsim_opt_t* dwgsim_opt_init()
6363
opt->flow_order_len = 0;
6464
opt->use_base_error = 0;
6565
opt->seed = -1;
66-
opt->muts_only = 0;
6766
opt->fixed_quality = NULL;
6867
opt->quality_std = 2.0;
6968
opt->fn_muts_input = NULL;
@@ -73,6 +72,8 @@ dwgsim_opt_t* dwgsim_opt_init()
7372
opt->fp_bfast = opt->fp_bwa1 = opt->fp_bwa2 = NULL;
7473
opt->fp_fa = opt->fp_fai = NULL;
7574
opt->read_prefix = NULL;
75+
opt->reads_output_type = READS_OUTPUT_TYPE_ALL;
76+
opt->output_type = OUTPUT_TYPE_ALL;
7677
opt->amplicons = 0;
7778

7879
return opt;
@@ -132,15 +133,18 @@ int dwgsim_opt_usage(dwgsim_opt_t *opt)
132133
fprintf(stderr, " -B use a per-base error rate for Ion Torrent data [%s]\n", __IS_TRUE(opt->use_base_error));
133134
fprintf(stderr, " -H haploid mode [%s]\n", __IS_TRUE(opt->is_hap));
134135
fprintf(stderr, " -z INT random seed (-1 uses the current time) [%d]\n", opt->seed);
135-
fprintf(stderr, " -M generate a mutations file only [%s]\n", __IS_TRUE(opt->muts_only));
136+
fprintf(stderr, " -M output files to generate [%d]:\n", opt->output_type);
137+
fprintf(stderr, " 0: both reads and mutation files\n");
138+
fprintf(stderr, " 1: reads only\n");
139+
fprintf(stderr, " 2: mutations only\n");
136140
fprintf(stderr, " -m FILE the mutations txt file to re-create [%s]\n", (MUT_INPUT_TXT != opt->fn_muts_input_type) ? "not using" : opt->fn_muts_input);
137141
fprintf(stderr, " -b FILE the bed-like file set of candidate mutations [%s]\n", (MUT_INPUT_BED == opt->fn_muts_input_type) ? "not using" : opt->fn_muts_input);
138142
fprintf(stderr, " -v FILE the vcf file set of candidate mutations (use pl tag for strand) [%s]\n", (MUT_INPUT_VCF == opt->fn_muts_input_type) ? "not using" : opt->fn_muts_input);
139143
fprintf(stderr, " -x FILE the bed of regions to cover [%s]\n", (NULL == opt->fn_regions_bed) ? "not using" : opt->fn_regions_bed);
140144
fprintf(stderr, " -P STRING a read prefix to prepend to each read name [%s]\n", (NULL == opt->read_prefix) ? "not using" : opt->read_prefix);
141145
fprintf(stderr, " -q STRING a fixed base quality to apply (single character) [%s]\n", (NULL == opt->fixed_quality) ? "not using" : opt->fixed_quality);
142146
fprintf(stderr, " -Q FLOAT standard deviation of the base quality scores [%.2lf]\n", (NULL == opt->fixed_quality) ? opt->quality_std : 0.0);
143-
fprintf(stderr, " -o INT output type for the FASTQ files [%d]:\n", opt->output_type);
147+
fprintf(stderr, " -o INT output type for the FASTQ files [%d]:\n", opt->reads_output_type);
144148
fprintf(stderr, " 0: interleaved (bfast) and per-read-end (bwa)\n");
145149
fprintf(stderr, " 1: per-read-end (bwa) only\n");
146150
fprintf(stderr, " 2: interleaved (bfast) only\n");
@@ -204,7 +208,7 @@ dwgsim_opt_parse(dwgsim_opt_t *opt, int argc, char *argv[])
204208
int c;
205209
int muts_input_type = 0;
206210

207-
while ((c = getopt(argc, argv, "id:s:N:C:1:2:e:E:r:F:R:X:I:c:S:A:n:y:BHf:z:Mm:b:v:x:P:q:Q:o:ah")) >= 0) {
211+
while ((c = getopt(argc, argv, "id:s:N:C:1:2:e:E:r:F:R:X:I:c:S:A:n:y:BHf:z:M:m:b:v:x:P:q:Q:o:ah")) >= 0) {
208212
switch (c) {
209213
case 'i': opt->is_inner = 1; break;
210214
case 'd': opt->dist = dwgsim_atoi(optarg, 'd', 0); break;
@@ -237,7 +241,7 @@ dwgsim_opt_parse(dwgsim_opt_t *opt, int argc, char *argv[])
237241
case 'H': opt->is_hap = 1; break;
238242
case 'h': return 0;
239243
case 'z': opt->seed = dwgsim_atoi(optarg, 'z', 1); break;
240-
case 'M': opt->muts_only = 1; break;
244+
case 'M': opt->output_type = dwgsim_atoi(optarg, 'M', 0); break;
241245
case 'm':
242246
free(opt->fn_muts_input);
243247
opt->fn_muts_input = strdup(optarg);
@@ -293,7 +297,7 @@ dwgsim_opt_parse(dwgsim_opt_t *opt, int argc, char *argv[])
293297
}
294298
break;
295299
case 'Q': opt->quality_std = atof(optarg); break;
296-
case 'o': opt->output_type = atoi(optarg); break;
300+
case 'o': opt->reads_output_type = atoi(optarg); break;
297301
case 'a': opt->amplicons = 1; break;
298302
default: fprintf(stderr, "Unrecognized option: -%c\n", c); return 0;
299303
}
@@ -364,7 +368,7 @@ dwgsim_opt_parse(dwgsim_opt_t *opt, int argc, char *argv[])
364368
fprintf(stderr, "Warning: remember to use the -P option with dwgsim_eval\n");
365369
}
366370

367-
__check_option(opt->output_type, 0, 2, "-o");
371+
__check_option(opt->reads_output_type, 0, 2, "-o");
368372

369373
switch(muts_input_type) {
370374
case 0x0:
@@ -448,7 +452,7 @@ dwgsim_opt_parse(dwgsim_opt_t *opt, int argc, char *argv[])
448452
opt->e[1].by = (opt->e[1].end - opt->e[1].start) / opt->length[1];
449453
}
450454

451-
__check_option(opt->muts_only, 0, 1, "-M");
455+
__check_option(opt->output_type, OUTPUT_TYPE_ALL, OUTPUT_TYPE_MUTS, "-M");
452456
__check_option(opt->amplicons, 0, 1, "-a");
453457

454458
if (opt->amplicons == 1 && opt->fn_regions_bed != NULL) {

src/dwgsim_opt.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@
55

66
#define ERROR_RATE_NUM_RANDOM_READS 1000000
77

8+
#define READS_OUTPUT_TYPE_ALL 0
9+
#define READS_OUTPUT_TYPE_BWA 1
10+
#define READS_OUTPUT_TYPE_BFAST 2
11+
812
#define OUTPUT_TYPE_ALL 0
9-
#define OUTPUT_TYPE_BWA 1
10-
#define OUTPUT_TYPE_BFAST 2
13+
#define OUTPUT_TYPE_READS 1
14+
#define OUTPUT_TYPE_MUTS 2
1115

1216

1317
typedef struct {
@@ -37,7 +41,6 @@ typedef struct {
3741
int32_t use_base_error;
3842
int32_t is_hap;
3943
int32_t seed;
40-
int32_t muts_only;
4144
char *fixed_quality;
4245
double quality_std;
4346
char *fn_muts_input;
@@ -51,6 +54,7 @@ typedef struct {
5154
FILE *fp_fa;
5255
FILE *fp_fai;
5356
char *read_prefix;
57+
int32_t reads_output_type;
5458
int32_t output_type;
5559
int32_t amplicons;
5660
} dwgsim_opt_t;

0 commit comments

Comments
 (0)