diff --git a/htslib/sam.h b/htslib/sam.h index 5f8c0a554..4beaea376 100644 --- a/htslib/sam.h +++ b/htslib/sam.h @@ -1438,7 +1438,6 @@ int sam_passes_filter(const sam_hdr_t *h, const bam1_t *b, /// Converts a BAM aux tag to SAM format /* - * @param b Pointer to the bam record * @param key Two letter tag key * @param type Single letter type code: ACcSsIifHZB. * @param tag Tag data pointer, in BAM format @@ -1628,6 +1627,29 @@ static inline const uint8_t *sam_format_aux1(const uint8_t *key, return NULL; } +/// Return a pointer to a BAM record's first aux field +/** @param b Pointer to the BAM record + @return Aux field pointer, or NULL if the record has none + +When NULL is returned, errno will also be set to ENOENT. ("Aux field pointers" +point to the TYPE byte within the auxiliary data for that field; but in general +it is unnecessary for user code to be aware of this.) + */ +HTSLIB_EXPORT +uint8_t *bam_aux_first(const bam1_t *b); + +/// Return a pointer to a BAM record's next aux field +/** @param b Pointer to the BAM record + @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get() + @return Pointer to the next aux field, or NULL if no next field or error + +Whenever NULL is returned, errno will also be set: ENOENT if @p s was the +record's last aux field; otherwise EINVAL, indicating that the BAM record's +aux data is corrupt. + */ +HTSLIB_EXPORT +uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s); + /// Return a pointer to an aux record /** @param b Pointer to the bam record @param tag Desired aux tag @@ -1640,6 +1662,19 @@ static inline const uint8_t *sam_format_aux1(const uint8_t *key, HTSLIB_EXPORT uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]); +/// Return the aux field's 2-character tag +/** @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get() + @return Pointer to the tag characters, NOT NUL-terminated + */ +static inline +const char *bam_aux_tag(const uint8_t *s) { return (const char *) (s-2); } + +/// Return the aux field's type character +/** @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get() + @return The type character: one of cCsSiI/fd/A/Z/H/B + */ +static inline char bam_aux_type(const uint8_t *s) { return *s; } + /// Return a SAM formatting string containing a BAM tag /** @param b Pointer to the bam record @param tag Desired aux tag @@ -1742,15 +1777,33 @@ HTSLIB_EXPORT int bam_aux_append(bam1_t *b, const char tag[2], char type, int len, const uint8_t *data); /// Delete tag data from a bam record -/* @param b The bam record to update - @param s Pointer to the tag to delete, as returned by bam_aux_get(). - @return 0 on success; -1 on failure - If the bam record's aux data is corrupt, errno is set to EINVAL and this - function returns -1; +/** @param b The BAM record to update + @param s Pointer to the aux field to delete, as returned by bam_aux_get() + Must not be NULL + @return 0 on success; -1 on failure + +If the BAM record's aux data is corrupt, errno is set to EINVAL and this +function returns -1. */ HTSLIB_EXPORT int bam_aux_del(bam1_t *b, uint8_t *s); +/// Delete an aux field from a BAM record +/** @param b The BAM record to update + @param s Pointer to the aux field to delete, as returned by + bam_aux_first()/_next()/_get(); must not be NULL + @return Pointer to the following aux field, or NULL if none or on error + +Identical to @c bam_aux_del() apart from the return value, which is an +aux iterator suitable for use with @c bam_aux_next()/etc. + +Whenever NULL is returned, errno will also be set: ENOENT if the aux field +deleted was the record's last one; otherwise EINVAL, indicating that the +BAM record's aux data is corrupt. + */ +HTSLIB_EXPORT +uint8_t *bam_aux_remove(bam1_t *b, uint8_t *s); + /// Update or add a string-type tag /* @param b The bam record to update @param tag Tag identifier diff --git a/sam.c b/sam.c index c95d1c693..ee88cc23e 100644 --- a/sam.c +++ b/sam.c @@ -4614,31 +4614,42 @@ static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end) } } +uint8_t *bam_aux_first(const bam1_t *b) +{ + uint8_t *s = bam_get_aux(b); + uint8_t *end = b->data + b->l_data; + if (s >= end) { errno = ENOENT; return NULL; } + return s+2; +} + +uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s) +{ + uint8_t *end = b->data + b->l_data; + uint8_t *next = s? skip_aux((uint8_t *) s, end) : end; + if (next == NULL) goto bad_aux; + if (next >= end) { errno = ENOENT; return NULL; } + return next+2; + + bad_aux: + hts_log_error("Corrupted aux data for read %s", bam_get_qname(b)); + errno = EINVAL; + return NULL; +} + uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) { - uint8_t *s, *end, *t = (uint8_t *) tag; - uint16_t y = (uint16_t) t[0]<<8 | t[1]; - s = bam_get_aux(b); - end = b->data + b->l_data; - while (s != NULL && end - s >= 3) { - uint16_t x = (uint16_t) s[0]<<8 | s[1]; - s += 2; - if (x == y) { + uint8_t *s; + for (s = bam_aux_first(b); s; s = bam_aux_next(b, s)) + if (s[-2] == tag[0] && s[-1] == tag[1]) { // Check the tag value is valid and complete - uint8_t *e = skip_aux(s, end); - if ((*s == 'Z' || *s == 'H') && *(e - 1) != '\0') { - goto bad_aux; // Unterminated string - } - if (e != NULL) { - return s; - } else { - goto bad_aux; - } + uint8_t *e = skip_aux(s, b->data + b->l_data); + if (e == NULL) goto bad_aux; + if ((*s == 'Z' || *s == 'H') && *(e - 1) != '\0') goto bad_aux; + + return s; } - s = skip_aux(s, end); - } - if (s == NULL) goto bad_aux; - errno = ENOENT; + + // errno now as set by bam_aux_first()/bam_aux_next() return NULL; bad_aux: @@ -4647,23 +4658,28 @@ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) return NULL; } -// s MUST BE returned by bam_aux_get() int bam_aux_del(bam1_t *b, uint8_t *s) { - uint8_t *p, *aux; - int l_aux = bam_get_l_aux(b); - aux = bam_get_aux(b); - p = s - 2; - s = skip_aux(s, aux + l_aux); - if (s == NULL) goto bad_aux; - memmove(p, s, l_aux - (s - aux)); - b->l_data -= s - p; - return 0; + s = bam_aux_remove(b, s); + return (s || errno == ENOENT)? 0 : -1; +} + +uint8_t *bam_aux_remove(bam1_t *b, uint8_t *s) +{ + uint8_t *end = b->data + b->l_data; + uint8_t *next = skip_aux(s, end); + if (next == NULL) goto bad_aux; + + b->l_data -= next - (s-2); + if (next >= end) { errno = ENOENT; return NULL; } + + memmove(s-2, next, end - next); + return s; bad_aux: hts_log_error("Corrupted aux data for read %s", bam_get_qname(b)); errno = EINVAL; - return -1; + return NULL; } int bam_aux_update_str(bam1_t *b, const char tag[2], int len, const char *data) diff --git a/test/sam.c b/test/sam.c index 036349f2b..c641b88e6 100644 --- a/test/sam.c +++ b/test/sam.c @@ -87,6 +87,15 @@ uint8_t *check_bam_aux_get(const bam1_t *aln, const char *tag, char type) return NULL; } +static void check_aux_count(const bam1_t *aln, int expected, const char *what) +{ + const uint8_t *itr; + int n = 0; + for (itr = bam_aux_first(aln); itr; itr = bam_aux_next(aln, itr)) n++; + if (n != expected) + fail("%s has %d aux fields, expected %d", what, n, expected); +} + static void check_int_B_array(bam1_t *aln, char *tag, uint32_t nvals, int64_t *vals) { uint8_t *p; @@ -285,10 +294,30 @@ static int aux_fields1(void) if ((p = check_bam_aux_get(aln, "XA", 'A')) && bam_aux2A(p) != 'k') fail("XA field is '%c', expected 'k'", bam_aux2A(p)); + check_aux_count(aln, 24, "Original record"); + bam_aux_del(aln,p); if (bam_aux_get(aln,"XA")) fail("XA field was not deleted"); + check_aux_count(aln, 23, "Record post-XA-deletion"); + + p = bam_aux_get(aln, "Y2"); + if (p == NULL || strncmp(bam_aux_tag(p), "Y2", 2) != 0 || bam_aux_type(p) != 'i') + fail("bam_aux_get() missed Y2 field"); + + p = bam_aux_next(aln, p); + if (p == NULL || strncmp(bam_aux_tag(p), "Y3", 2) != 0 || bam_aux_type(p) != 'c') + fail("bam_aux_next() missed Y3 field"); + + p = bam_aux_get(aln, "Y8"); + if (p == NULL || strncmp(bam_aux_tag(p), "Y8", 2) != 0 || bam_aux_type(p) != 'I') + fail("bam_aux_get() missed Y8 field"); + + p = bam_aux_next(aln, p); + if (p != NULL || errno != ENOENT) + fail("bam_aux_next missed the end of fields"); + if ((p = check_bam_aux_get(aln, "Xi", 'C')) && bam_aux2i(p) != 37) fail("Xi field is %"PRId64", expected 37", bam_aux2i(p)); @@ -492,6 +521,16 @@ static int aux_fields1(void) if (strcmp(ks.s, r1) != 0) fail("record formatted incorrectly: \"%s\"", ks.s); + + // Test field removal APIs -- after the strcmp(..., r1) check so that + // can also check the formatting of the to-be-removed fields. + + p = bam_aux_remove(aln, check_bam_aux_get(aln, "XH", 'H')); + if (bam_aux_get(aln, "XH")) + fail("XH field was not removed"); + check_aux_count(aln, 31, "Record post-XH-removal"); + if (strncmp(bam_aux_tag(p), "XB", 2) != 0 || bam_aux_type(p) != 'B') + fail("bam_aux_remove() missed XB field"); } else fail("can't read record");