Skip to content

Commit

Permalink
make bc compatible with umi-tools format
Browse files Browse the repository at this point in the history
  • Loading branch information
jamorrison committed Nov 6, 2023
1 parent 0b3bb69 commit 966b93b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
26 changes: 16 additions & 10 deletions src/bc.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,16 @@ int prepare_read_se(kseq_t *k, kstring_t *s, bc_conf_t *conf) {
}
}

// Remove "/1" or "/2" from the end of the read name
remove_read_number(k);

// Create read entry
// NOTE: This assumes there was a FASTQ comment, it may be worth it to look into how
// to handle cases where there aren't comments to start with
ksprintf(
s,
"@%s %s:%.*s\n%.*s%s\n+\n%.*s%s\n",
k->name.s, k->comment.s,
conf->bc_length, k->seq.s+conf->bc_start,
"@%s_%.*s_AAAAAAAA %s\n%.*s%s\n+\n%.*s%s\n",
k->name.s, conf->bc_length, k->seq.s+conf->bc_start, k->comment.s,
conf->bc_start, k->seq.s, k->seq.s+conf->bc_start+conf->bc_length,
conf->bc_start, k->qual.s, k->qual.s+conf->bc_start+conf->bc_length
);
Expand Down Expand Up @@ -112,14 +114,17 @@ int prepare_read_pe(kseq_t *k1, kseq_t *k2, kstring_t *s1, kstring_t *s2, bc_con
}
}

// Remove "/1" or "/2" from the end of the read name
remove_read_number(k_has_bc);
remove_read_number(k_not_bc);

// Create entry for read with barcode
// NOTE: This assumes there was a FASTQ comment, it may be worth it to look into how
// to handle cases where there aren't comments to start with
ksprintf(
s_has_bc,
"@%s %s:%.*s\n%.*s%s\n+\n%.*s%s\n",
k_has_bc->name.s, k_has_bc->comment.s,
conf->bc_length, k_has_bc->seq.s+conf->bc_start,
"@%s_%.*s_AAAAAAAA %s\n%.*s%s\n+\n%.*s%s\n",
k_has_bc->name.s, conf->bc_length, k_has_bc->seq.s+conf->bc_start, k_has_bc->comment.s,
conf->bc_start, k_has_bc->seq.s, k_has_bc->seq.s+conf->bc_start+conf->bc_length,
conf->bc_start, k_has_bc->qual.s, k_has_bc->qual.s+conf->bc_start+conf->bc_length
);
Expand All @@ -129,9 +134,8 @@ int prepare_read_pe(kseq_t *k1, kseq_t *k2, kstring_t *s1, kstring_t *s2, bc_con
// to handle cases where there aren't comments to start with
ksprintf(
s_not_bc,
"@%s %s:%.*s\n%s\n+\n%s\n",
k_not_bc->name.s, k_not_bc->comment.s,
conf->bc_length, k_has_bc->seq.s+conf->bc_start,
"@%s_%.*s_AAAAAAAA %s\n%s\n+\n%s\n",
k_not_bc->name.s, conf->bc_length, k_has_bc->seq.s+conf->bc_start, k_not_bc->comment.s,
k_not_bc->seq.s, k_not_bc->qual.s
);

Expand Down Expand Up @@ -231,7 +235,9 @@ static void usage() {
fprintf(stderr, "General Options:\n");
fprintf(stderr, " -h, --help This help\n");
fprintf(stderr, "\n");
fprintf(stderr, "Note: When writing to stdout, reads 1 and 2 will alternate (i.e., are interleaved)\n");
fprintf(stderr, "Note 1: When writing to stdout, reads 1 and 2 will alternate (i.e., are interleaved)\n");
fprintf(stderr, "Note 2: Also adds an artificial UMI (AAAAAAAA) for compatibility purposes and to serve\n");
fprintf(stderr, " as a placeholder for future UMI work\n");
fprintf(stderr, "\n");
}

Expand Down
11 changes: 10 additions & 1 deletion src/bc.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
KSEQ_INIT(gzFile, gzread)

// Number of extra bytes to allocate
#define N_EXTRA 32
#define N_EXTRA 64

// uint8_t should be large enough for now, but if barcodes start to occur
// in locations further in from the start of the read or barcode lengths
Expand All @@ -58,6 +58,15 @@ static inline void bc_conf_init(bc_conf_t *conf) {

gzFile setup_output(const char *ofile, uint8_t read_num);

// Remove "/1" and "/2" from read name
static inline void remove_read_number(kseq_t *k) {
size_t len = k->name.l;
if (len > 2 && k->name.s[len-2] == '/' && (k->name.s[len-1] == '1' || k->name.s[len-1] == '2')) {
k->name.s[len-2] = '\0';
k->name.l = len-2;
}
}

int prepare_read_se(kseq_t *k, kstring_t *s, bc_conf_t *conf);

void extract_barcodes(bc_conf_t *conf, kseq_t *ks1, kseq_t *ks2, gzFile oh1, gzFile oh2);
Expand Down

0 comments on commit 966b93b

Please sign in to comment.