/*===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 */

#include <align/extern.h>
#include <klib/defs.h>
#include <klib/sort.h>
#include <klib/rc.h>
#include <kfs/file.h>
#include <kfs/directory.h>
#include <kfs/mmap.h>
#include <sysalloc.h>

#include <atomic32.h>
#include <strtol.h>

#include <align/bam.h>
#include "bam-priv.h"

#include <kfs/path.h>
#include <kfs/kfs-priv.h>

#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#if 1
/*_DEBUGGING*/
#include <stdio.h>
#endif

#include <endian.h>
#include <byteswap.h>

#include <zlib.h>

#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LE2HUI16(X) (*((const uint16_t *)(X)))
#define LE2HUI32(X) (*((const uint32_t *)(X)))
#define LE2HUI64(X) (*((const uint64_t *)(X)))
#define LE2HI16(X)  (*((const  int16_t *)(X)))
#define LE2HI32(X)  (*((const  int32_t *)(X)))
#define LE2HI64(X)  (*((const  int64_t *)(X)))
#endif
#if __BYTE_ORDER == __BIG_ENDIAN
#define LE2HUI16(X) ((uint16_t)(bswap_16(*((const uint16_t *)(X)))))
#define LE2HUI32(X) ((uint32_t)(bswap_32(*((const uint32_t *)(X)))))
#define LE2HUI64(X) ((uint64_t)(bswap_64(*((const uint64_t *)(X)))))
#define LE2HI16(X)  (( int16_t)(bswap_16(*((const  int16_t *)(X)))))
#define LE2HI32(X)  (( int32_t)(bswap_32(*((const  int32_t *)(X)))))
#define LE2HI64(X)  (( int64_t)(bswap_64(*((const  int64_t *)(X)))))
#endif

typedef struct BAMIndex BAMIndex;
typedef struct BGZFile BGZFile;

#define USEMMAP 0

#define VALIDATE_BGZF_HEADER 1
#if (ZLIB_VERNUM < 0x1230)
#undef VALIDATE_BGZF_HEADER
#warning "zlib too old, inflateGetHeader not available, not validating BGZF headers"
#else
#endif

#define ZLIB_BLOCK_SIZE ( 64 * 1024 )
typedef uint8_t zlib_block_t[ZLIB_BLOCK_SIZE];

#define MEM_ALIGN_SIZE ( 64 * 1024 )
/* MEM_CHUNK_SIZE must be an integer multiple of ZLIB_BLOCK_SIZE.
 * The multiple must be >= 2 shouldn't be < 3.
 * If using mmap, it can be much bigger and let the OS deal with the IO.
 */
#if USEMMAP
#define MEM_CHUNK_SIZE ( 256 * ZLIB_BLOCK_SIZE ) /* 16 MB */
#else
#define MEM_CHUNK_SIZE ( 256 * ZLIB_BLOCK_SIZE )
#endif

struct BGZFile {
    uint64_t fsize;
    uint64_t fpos;  /* position in file of first byte in buffer */
    const uint8_t *buf;   /* page aligned or memmapped */
#if USEMMAP
    const KMMap *mm;
#else
    const KFile *kfp;
    uint8_t *_buf;  /* allocated */
    unsigned malign;
#endif
    size_t bcount;  /* number of valid bytes in buffer */
    uint32_t bpos;  /* position in buffer of read head */
    z_stream zs;
#if USEMMAP
    bool sequentialForward;
#endif
};

static rc_t BGZFileInit(BGZFile *self, const KFile *kfp)
{
    int i;
    rc_t rc;
    
    memset(self, 0, sizeof(*self));

    i = inflateInit2(&self->zs, MAX_WBITS + 16); /* max + enable gzip headers */
    switch (i) {
    case Z_OK:
        break;
    case Z_MEM_ERROR:
        return RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
    default:
        return RC(rcAlign, rcFile, rcConstructing, rcNoObj, rcUnexpected);
    }
    
    rc = KFileSize(kfp, &self->fsize);
    if (rc)
        return rc;

#if USEMMAP
    rc = KMMapMakeRgnRead(&self->mm, kfp, 0, MEM_CHUNK_SIZE);
#else
    self->_buf = malloc(MEM_CHUNK_SIZE + MEM_ALIGN_SIZE);
    if (self->_buf == NULL)
        return RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
    self->malign = (MEM_ALIGN_SIZE - ((intptr_t)self->_buf & (MEM_ALIGN_SIZE - 1))) & (MEM_ALIGN_SIZE - 1);
    self->buf = self->_buf + self->malign;

    self->kfp = kfp;
    KFileAddRef(kfp);
#endif
    
    return 0;
}

static rc_t BGZFileGetMoreBytes(BGZFile *self)
{
    rc_t rc;
    
    self->fpos += self->bpos;
    self->bpos &= (MEM_ALIGN_SIZE - 1);
    self->fpos -= self->bpos;
#if USEMMAP
    rc = KMMapReposition(self->mm, self->fpos, &self->bcount);
    if (rc)
        return rc;
    rc = KMMapAddrRead(self->mm, (const void **)&self->buf);
#else
    rc = KFileRead(self->kfp, self->fpos, self->_buf + self->malign,
                   MEM_CHUNK_SIZE, &self->bcount);
#endif
    if (rc)
        return rc;
    if (self->bcount == 0 || self->bcount == self->bpos)
        return RC(rcAlign, rcFile, rcReading, rcData, rcInsufficient);
    
    self->zs.avail_in = (uInt)(self->bcount - self->bpos);
    self->zs.next_in = (Bytef *)&self->buf[self->bpos];
    
    return 0;
}

static rc_t BGZFileRead(BGZFile *self, zlib_block_t dst, uint32_t *pNumRead)
{
#if VALIDATE_BGZF_HEADER
    uint8_t extra[256];
    gz_header head;
#endif
    rc_t rc = 0;
    unsigned loops;
    int zr;
    
    *pNumRead = 0;
    if (self->bcount == 0) {
        rc = BGZFileGetMoreBytes(self);
        if (rc)
            return rc;
    }

#if VALIDATE_BGZF_HEADER
    memset(&head, 0, sizeof(head));
    head.extra = extra;
    head.extra_max = sizeof(extra);
    
    zr = inflateGetHeader(&self->zs, &head);
    assert(zr == Z_OK);
#endif
    
    self->zs.next_out = (Bytef *)dst;
    self->zs.avail_out = sizeof(zlib_block_t);

    for (loops = 0; loops != 2; ++loops) {
        uInt temp = self->zs.total_in;
        
        zr = inflate(&self->zs, Z_FINISH);
        self->bpos += self->zs.total_in - temp;
        assert(self->zs.avail_in == self->bcount - self->bpos);
        switch (zr) {
        case Z_OK:
        case Z_BUF_ERROR:
            rc = BGZFileGetMoreBytes(self);
            if (rc)
                return rc;
            break;
        case Z_STREAM_END:
#if VALIDATE_BGZF_HEADER
            if (head.done) {
                unsigned i;
                unsigned bsize = 0;
                uint16_t t16;
                
                for (i = 0; i < head.extra_len; ) {
                    if (extra[i] == 'B' && extra[i + 1] == 'C') {
                        memcpy(&t16, &extra[i + 4], 2);
                        bsize = 1 + LE2HUI16(&t16);
                        break;
                    }
                    memcpy(&t16, &extra[i + 2], 2);
                    i += LE2HUI16(&t16);
                }
                if (bsize == 0 || bsize != self->zs.total_in)
                    rc = RC(rcAlign, rcFile, rcReading, rcFormat, rcInvalid); /* not BGZF */
            }
            else
                rc = RC(rcAlign, rcFile, rcReading, rcFile, rcCorrupt);
#endif
            *pNumRead = self->zs.total_out;
            zr = inflateReset(&self->zs);
            assert(zr == Z_OK);
            return rc;
        default:
            return RC(rcAlign, rcFile, rcReading, rcFile, rcCorrupt);
        }
    }
    return RC(rcAlign, rcFile, rcReading, rcFile, rcTooShort);
}

static uint64_t BGZFileGetPos(const BGZFile *self)
{
    return self->fpos + self->bpos;
}

/* returns the position as proportion of the whole file */ 
static float BGZFileProPos(const BGZFile *self)
{
    return BGZFileGetPos(self) / (double)self->fsize;
}

static rc_t BGZFileSetPos(BGZFile *self, uint64_t pos)
{
    if (self->fpos > pos || pos >= self->fpos + self->bcount) {
        self->fpos = pos;
        self->fpos -= pos & (MEM_ALIGN_SIZE - 1);
        self->bpos = pos - self->fpos;
        self->bcount = 0; /* force re-read */
    }
    else {
        self->bpos = pos - self->fpos;
        self->zs.avail_in = (uInt)(self->bcount - self->bpos);
        self->zs.next_in = (Bytef *)&self->buf[self->bpos];
    }

    return 0;
}

typedef rc_t (*BGZFileWalkBlocks_cb)(void *ctx, const BGZFile *file,
                                     rc_t rc, uint64_t fpos,
                                     const zlib_block_t data, unsigned dsize);

/* Without Decompression */
static rc_t BGZFileWalkBlocksND(BGZFile *self, BGZFileWalkBlocks_cb cb, void *ctx)
{
    rc_t rc = 0;
#if VALIDATE_BGZF_HEADER
    uint8_t extra[256];
    char dummy[64];
    gz_header head;
    int zr;

    memset(&head, 0, sizeof(head));
    head.extra = extra;
    head.extra_max = sizeof(extra);
    
    do {
        unsigned loops;
        unsigned hsize = 0;
        unsigned bsize = 0;
        unsigned bsize2;
        uint64_t const fpos = self->fpos + self->bpos;
        
        self->zs.next_out = (Bytef *)dummy;
        self->zs.avail_out = sizeof(dummy);
        
        zr = inflateGetHeader(&self->zs, &head);
        assert(zr == Z_OK);
        
        for (loops = 0; loops != 2; ++loops) {
            uInt temp = self->zs.total_in;
            
            zr = inflate(&self->zs, Z_BLOCK);
            temp = self->zs.total_in - temp;
            self->bpos += temp;
            hsize += temp;
            if (head.done) {
                unsigned i;
                uint16_t t16;
                
                for (i = 0; i < head.extra_len; ) {
                    if (extra[i] == 'B' && extra[i + 1] == 'C') {
                        memcpy(&t16, &extra[i + 4], 2);
                        bsize = 1 + LE2HUI16(&t16);
                        break;
                    }
                    memcpy(&t16, &extra[i + 2], 2);
                    i += LE2HUI16(&t16);
                }
                break;
            }
            else if (self->zs.avail_in == 0) {
                rc = BGZFileGetMoreBytes(self);
                if (rc) {
                    rc = RC(rcAlign, rcFile, rcReading, rcFile, rcTooShort);
                    goto DONE;
                }
            }
            else {
                rc = RC(rcAlign, rcFile, rcReading, rcFile, rcCorrupt);
                goto DONE;
            }
        }
        if (bsize == 0) {
            rc = RC(rcAlign, rcFile, rcReading, rcFormat, rcInvalid); /* not BGZF */
            break;
        }
        bsize2 = bsize;
        bsize -= hsize;
        for ( ; ; ) {
            unsigned n = bsize;
            
            if (n > self->bcount - self->bpos)
                n = self->bcount - self->bpos;
            self->bpos += n;
            bsize -= n;
            if (self->bpos == self->bcount) {
                rc = BGZFileGetMoreBytes(self);
                if (rc) {
                    if (bsize)
                        rc = RC(rcAlign, rcFile, rcReading, rcFile, rcTooShort);
                    goto DONE;
                }
            }
            else {
                zr = inflateReset(&self->zs);
                assert(zr == Z_OK);
                self->zs.avail_in = (uInt)(self->bcount - self->bpos);
                self->zs.next_in = (Bytef *)&self->buf[self->bpos];
                rc = cb(ctx, self, fpos, 0, NULL, bsize2);
                break;
            }
        }
    } while (rc == 0);
DONE:
    if (GetRCState(rc) == rcInsufficient && GetRCObject(rc) == rcData)
        rc = 0;
    rc = cb(ctx, self, self->fpos + self->bpos, rc, NULL, 0);
#endif
    return rc;
}

static rc_t BGZFileWalkBlocksUnzip(BGZFile *self, zlib_block_t *bufp, BGZFileWalkBlocks_cb cb, void *ctx)
{
    rc_t rc;
    rc_t rc2;
    uint32_t dsize;
    
    do {
        uint64_t const fpos = self->fpos + self->bpos;
        
        rc2 = BGZFileRead(self, *bufp, &dsize);
        rc = cb(ctx, self, fpos, rc2, *bufp, dsize);
    } while (rc == 0 && rc2 == 0);
    if (GetRCState(rc2) == rcInsufficient && GetRCObject(rc2) == rcData)
        rc2 = 0;
    rc = cb(ctx, self, self->fpos + self->bpos, rc2, NULL, 0);
    return rc ? rc : rc2;
}

static rc_t BGZFileWalkBlocks(BGZFile *self, bool decompress, zlib_block_t *bufp,
                              BGZFileWalkBlocks_cb cb, void *ctx)
{
    rc_t rc;
    
#if VALIDATE_BGZF_HEADER
#else
    decompress = true;
#endif
    self->fpos = 0;
    self->bpos = 0;
    
    rc = BGZFileGetMoreBytes(self);
    if (rc)
        return rc;
    
    if (decompress)
        return BGZFileWalkBlocksUnzip(self, bufp, cb, ctx);
    else
        return BGZFileWalkBlocksND(self, cb, ctx);
}

static void BGZFileWhack(BGZFile *self)
{
    inflateEnd(&self->zs);
#if USEMMAP
    KMMapRelease(self->mm);
#else
    KFileRelease(self->kfp);
    if (self->_buf)
        free(self->_buf);
#endif
}

struct BAMIndex {
    BAMFilePosition *refSeq[1];
};

struct BAMFile {
    uint64_t fpos_first;
    uint64_t fpos_cur;
    
    BGZFile file;
    BAMRefSeq *refSeq;
    BAMReadGroup *readGroup;
    BAMAlignment *bufLocker;
    const char *version;
    char const *header;
    char const *headerData1;
    char const *headerData2;
    const BAMIndex *ndx;
    
    unsigned refSeqs;
    unsigned readGroups;
    
    atomic32_t refcount;
    unsigned ucfirst;    /* offset of first record in uncompressed buffer */
    unsigned bufSize;    /* current size of uncompressed buffer */
    unsigned bufCurrent; /* location in uncompressed buffer of read head */
    bool eof;
    zlib_block_t buffer; /* uncompressed buffer */
};

struct bam_alignment_s {
    uint8_t rID[4];
    uint8_t pos[4];
    uint8_t read_name_len;
    uint8_t mapQual;
    uint8_t bin[2];
    uint8_t n_cigars[2];
    uint8_t flags[2];
    uint8_t read_len[4];
    uint8_t mate_rID[4];
    uint8_t mate_pos[4];
    uint8_t ins_size[4];
    char read_name[1 /* read_name_len */];
/* if you change length of read_name,
 * adjust calculation of offsets in BAMAlignmentParse */
/*  uint32_t cigar[n_cigars];
 *  uint8_t seq[(read_len + 1) >> 1];
 *  uint8_t qual[read_len];
 *  uint8_t extra[...];
 */
};

typedef union bam_alignment_u {
    struct bam_alignment_s cooked;
    uint8_t raw[sizeof(struct bam_alignment_s)];
} bam_alignment;

struct BAMAlignment {
    atomic32_t refcount;
    
    BAMFile *parent;
    bam_alignment *data;
    unsigned datasize;
        
    unsigned cigar;
    unsigned seq;
    unsigned qual;
    unsigned extra;
    unsigned readGroupName;
    unsigned csread;
    
    uint8_t storage[1];
};

static const char cigarChars[] = {
    ct_Match,
    ct_Insert,
    ct_Delete,
    ct_Skip,
    ct_SoftClip,
    ct_HardClip,
    ct_Padded,
    ct_Equal,
    ct_NotEqual
    /* ct_Overlap must not appear in actual BAM file */
};

static int32_t getRefSeqId(const BAMAlignment *cself) {
    int32_t y;
    
    memcpy(&y, cself->data->cooked.rID, 4);
    return LE2HI32(&y);
}

static int32_t getPosition(const BAMAlignment *cself) {
    int32_t y;
    
    memcpy(&y, cself->data->cooked.pos, 4);
    return LE2HI32(&y);
}

static uint8_t getReadNameLength(const BAMAlignment *cself) {
    return cself->data->cooked.read_name_len;
}

static uint16_t getBin(const BAMAlignment *cself) {
    uint16_t y;
    
    memcpy(&y, cself->data->cooked.bin, 2);
    return LE2HUI16(&y);
}

static uint8_t getMapQual(const BAMAlignment *cself) {
    return cself->data->cooked.mapQual;
}

static uint16_t getCigarCount(const BAMAlignment *cself) {
    uint16_t y;
    
    memcpy(&y, cself->data->cooked.n_cigars, 2);
    return LE2HUI16(&y);
}

static uint16_t getFlags(const BAMAlignment *cself) {
    uint16_t y;
    
    memcpy(&y, cself->data->cooked.flags, 2);
    return LE2HUI16(&y);
}

static uint32_t getReadLen(const BAMAlignment *cself) {
    uint32_t y;
    
    memcpy(&y, cself->data->cooked.read_len, 4);
    return LE2HUI32(&y);
}

static int32_t getMateRefSeqId(const BAMAlignment *cself) {
    int32_t y;
    
    memcpy(&y, cself->data->cooked.mate_rID, 4);
    return LE2HI32(&y);
}

static int32_t getMatePos(const BAMAlignment *cself) {
    int32_t y;
    
    memcpy(&y, cself->data->cooked.mate_pos, 4);
    return LE2HI32(&y);
}

static int32_t getInsertSize(const BAMAlignment *cself) {
    int32_t y;
    
    memcpy(&y, cself->data->cooked.ins_size, 4);
    return LE2HI32(&y);
}

static const char *getReadName(const BAMAlignment *cself) {
    return &cself->data->cooked.read_name[0];
}

static rc_t BAMFileReadn(BAMFile *self, const unsigned len, uint8_t dst[/* len */]) {
    rc_t rc;
    unsigned cur;
    unsigned n = 0;
    
    if (len == 0)
        return 0;
    
    for (cur = 0; ; cur += n) {
        if (self->bufSize > self->bufCurrent) {
            n = self->bufSize - self->bufCurrent;
            if (cur + n > len)
                n = len - cur;
            memcpy(&dst[cur], &self->buffer[self->bufCurrent], n);
            self->bufCurrent += n;
        }
        if (self->bufCurrent != self->bufSize && self->bufSize != 0)
            return 0;
        if (self->bufSize != 0) {
            /* a seek has not just been done so update the file position.
             * if we didn't and a request for the position is made before the
             * next read, we will not have the position of the next read.
             *
             * if a seek had just been done then
             *    self->fpos_cur == BGZFileGetPos(&self->file)
             * is already true.
             */
            self->fpos_cur = BGZFileGetPos(&self->file);
            self->bufCurrent = 0;
            self->bufSize = 0;
            if (cur + n == len)
                return 0;
        }

        rc = BGZFileRead(&self->file, self->buffer, &self->bufSize);
        if (rc)
            return rc;
        if (self->bufSize == 0 || self->bufSize <= self->bufCurrent)
            return RC(rcAlign, rcFile, rcReading, rcData, rcInsufficient);
    }
}

static rc_t BAMFileReadUI32(BAMFile *self, uint32_t *rhs) {
    uint8_t buf[sizeof(*rhs)];
    rc_t rc = BAMFileReadn(self, sizeof(buf), buf);
    if (rc == 0)
        *rhs = LE2HUI32(buf);
    return rc;
}

static int CC comp_ReadGroup(const void *A, const void *B, void * ignored) {
    const BAMReadGroup *a = (const BAMReadGroup *)A;
    const BAMReadGroup *b = (const BAMReadGroup *)B;
    
    if (a->name == NULL) {
        if (b->name)
            return -1;
        else
            return 0;
    }
    if (b->name == NULL) {
        if (a->name)
            return 1;
        else
            return 0;
    }
    return strcmp(a->name, b->name);
}

static int CC comp_RefSeqName ( const void *A, const void *B, void * ignored ) {
    const BAMRefSeq *a = *(const BAMRefSeq **)A;
    const BAMRefSeq *b = *(const BAMRefSeq **)B;
    
    if (a->name == NULL) {
        if (b->name)
            return -1;
        else
            return 0;
    }
    if (b->name == NULL) {
        if (a->name)
            return 1;
        else
            return 0;
    }
    return strcmp(a->name, b->name);
}

static rc_t ParseHeader(BAMFile *self, char hdata[], size_t hlen, const BAMRefSeq ** rs_by_name) {
    unsigned readGroups = 0;
    const char *const endp = hdata + hlen;
    const char *token = NULL, *value = NULL;
    int st = 0, eat_ws = 1;
    BAMRefSeq rs;
    
#define ST_HD_START 10
#define ST_SQ_START 20
#define ST_RG_START 30

    while (hdata != endp) {
        if (eat_ws && isspace(*hdata)) {
            ++hdata;
            continue;
        }
        else
            eat_ws = 0;
        
        switch (st) {
            case 0:
                if (*hdata == '@')
                    ++st;
                else
                    return RC(rcAlign, rcFile, rcParsing, rcData, rcInvalid);
                break;
            case 1:
                if (isspace(*hdata))
                    return RC(rcAlign, rcFile, rcParsing, rcData, rcInvalid);
                token = hdata;
                ++st;
                break;
            case 2:
                if (isspace(*hdata)) {
                    *hdata = '\0';
                    if (hdata - token == 2) {
                        eat_ws = 1;
                        if (strcmp(token, "HD") == 0)
                            st = ST_HD_START;
                        else if (strcmp(token, "SQ") == 0)
                            st = ST_SQ_START;
                        else if (strcmp(token, "RG") == 0)
                            st = ST_RG_START;
                    }
                    if (st == 2) {
                        ++st;
                        eat_ws = 0;
                    }
                }
                else if (hdata - token > 2)
                    ++st;
                break;
            case 3:
                if (*hdata == '\r' || *hdata == '\n') {
                    st = 0;
                    eat_ws = 1;
                }
                break;
            case ST_HD_START:
                token = hdata;
                ++st;
                break;
            case ST_HD_START + 1:
                if (isspace(*hdata))
                    return RC(rcAlign, rcFile, rcParsing, rcData, rcInvalid);
                ++st;
                break;
            case ST_HD_START + 2:
                if (*hdata != ':')
                    return RC(rcAlign, rcFile, rcParsing, rcData, rcInvalid);
                *hdata = '\0';
                eat_ws = 1;
                ++st;
                break;
            case ST_HD_START + 3:
                value = hdata;
                ++st;
                break;
            case ST_HD_START + 4:
                if (*hdata == '\t' || *hdata == '\r' || *hdata == '\n') {
                    *hdata = '\0';
                    
                    if (strcmp(token, "VN") == 0)
                        self->version = value;
                    
                    ++st;
                    eat_ws = 1;
                }
                break;
            case ST_HD_START + 5:
                if (*hdata == '@')
                    st = 1;
                else {
                    token = hdata;
                    st = ST_HD_START + 1;
                }
                break;
                
                
            case ST_SQ_START:
                memset(&rs, 0, sizeof(rs));
                token = hdata;
                ++st;
                break;
            case ST_SQ_START + 1:
                if (isspace(*hdata))
                    return RC(rcAlign, rcFile, rcParsing, rcData, rcInvalid);
                ++st;
                break;
            case ST_SQ_START + 2:
                if (*hdata != ':')
                    return RC(rcAlign, rcFile, rcParsing, rcData, rcInvalid);
                *hdata = '\0';
                eat_ws = 1;
                ++st;
                break;
            case ST_SQ_START + 3:
                value = hdata;
                ++st;
                break;
            case ST_SQ_START + 4:
                if (*hdata == '\t' || *hdata == '\r' || *hdata == '\n') {
                    *hdata = '\0';
                    
                    if (strcmp(token, "SN") == 0)
                        rs.name = value;
                    else if (strcmp(token, "LN") == 0)
                        rs.length = strtou64(value, NULL, 10);
                    else if (strcmp(token, "AS") == 0)
                        rs.assemblyId = value;
                    else if (strcmp(token, "M5") == 0) {
                        unsigned len = hdata - value;
                        
                        while (isspace(value[len - 1]))
                            --len;
                        
                        if ((value[0] == '\'' || value[0] == '"') && value[len - 1] == value[0]) {
                            ++value;
                            len -= 2;
                        }
                        if (len == 32) {
                            unsigned i;
                            
                            rs.checksum = &rs.checksum_array[0];
                            for (i = 0; i != 16; ++i) {
                                int const ch1 = toupper(value[i * 2 + 0]);
                                int const ch2 = toupper(value[i * 2 + 1]);
                                
                                if (isxdigit(ch1) && isxdigit(ch2)) {
                                    rs.checksum_array[i] =
                                        ((ch1 > '9' ? (ch1 - ('A' - 10)) : (ch1 - '0')) << 4) +
                                         (ch2 > '9' ? (ch2 - ('A' - 10)) : (ch2 - '0'));
                                }
                                else {
                                    rs.checksum = NULL;
                                    break;
                                }
                            }
                        }
                    }
                    else if (strcmp(token, "UR") == 0)
                        rs.uri = value;
                    else if (strcmp(token, "SP") == 0)
                        rs.species = value;
                    
                    ++st;
                    eat_ws = 1;
                }
                break;
            case ST_SQ_START + 5:
                if (*hdata == '@') {
                    if (rs.name != NULL) {
                        BAMRefSeq *y = &rs, **xx = kbsearch( &y, rs_by_name, self->refSeqs, sizeof(y), comp_RefSeqName, NULL );

                        if (xx != NULL) {
                            BAMRefSeq *x = *xx;
                            
                            x->assemblyId = rs.assemblyId;
                            if (rs.checksum) {
                                x->checksum = &x->checksum_array[0];
                                memcpy(x->checksum_array, rs.checksum_array, 16);
                            }
                            else
                                x->checksum = NULL;
                            x->uri = rs.uri;
                            x->species = rs.species;
                        }
                    }
                    st = 1;
                }
                else {
                    token = hdata;
                    st = ST_SQ_START + 1;
                }
                break;
                
                
            case ST_RG_START:
                token = hdata;
                ++st;
                break;
            case ST_RG_START + 1:
                if (isspace(*hdata))
                    return RC(rcAlign, rcFile, rcParsing, rcData, rcInvalid);
                ++st;
                break;
            case ST_RG_START + 2:
                if (*hdata != ':')
                    return RC(rcAlign, rcFile, rcParsing, rcData, rcInvalid);
                *hdata = '\0';
                eat_ws = 1;
                ++st;
                break;
            case ST_RG_START + 3:
                value = hdata;
                ++st;
                break;
            case ST_RG_START + 4:
                if (*hdata == '\t' || *hdata == '\r' || *hdata == '\n') {
                    *hdata = '\0';
                    
                    if (strcmp(token, "ID") == 0)
                        self->readGroup[readGroups].name = value;
                    else if (strcmp(token, "SM") == 0)
                        self->readGroup[readGroups].sample = value;
                    else if (strcmp(token, "LB") == 0)
                        self->readGroup[readGroups].library = value;
                    else if (strcmp(token, "DS") == 0)
                        self->readGroup[readGroups].description = value;
                    else if (strcmp(token, "PU") == 0)
                        self->readGroup[readGroups].unit = value;
                    else if (strcmp(token, "PI") == 0)
                        self->readGroup[readGroups].insertSize = value;
                    else if (strcmp(token, "CN") == 0)
                        self->readGroup[readGroups].center = value;
                    else if (strcmp(token, "DT") == 0)
                        self->readGroup[readGroups].runDate = value;
                    else if (strcmp(token, "PL") == 0)
                        self->readGroup[readGroups].platform = value;
                    
                    ++st;
                    eat_ws = 1;
                }
                break;
            case ST_RG_START + 5:
                if (*hdata == '@') {
                    ++readGroups;
                    st = 1;
                }
                else {
                    token = hdata;
                    st = ST_RG_START + 1;
                }
                break;
        }
        ++hdata;
    }
    ksort( self->readGroup, self->readGroups, sizeof(self->readGroup[0]), comp_ReadGroup, NULL );
    for (readGroups = 0; readGroups != self->readGroups; ++readGroups)
        self->readGroup[readGroups].id = readGroups;
    
    return 0;
}

static rc_t CountReadGroups(char const txt[], size_t len, unsigned *reads) {
    const char *const endp = txt + len;
    
    *reads = 0;
    
    do {
        while (txt != endp && isspace(*txt))
            ++txt;
        if (txt == endp || txt + 3 >= endp)
            break;
        
        if (txt[0] == '@' && txt[1] == 'R' && txt[2] == 'G')
            ++*reads;
        
        txt = memchr(txt, '\n', endp - txt);
    } while (txt);
    return 0;
}

static rc_t ProcessHeader(BAMFile *self) {
    rc_t rc;
    uint8_t magic[4];
    uint32_t hlen;
    unsigned nrefs;
    char *htxt;
    char *names = NULL;
    size_t in_names = 0, names_size;
    struct {
        size_t name;
        uint32_t length;
    } *refs;
    const BAMRefSeq **rs_by_name = NULL;
    
    rc = BAMFileReadn(self, sizeof(magic), magic);
    if (rc)
        return rc;
    if (memcmp(magic, "BAM\1", 4) != 0)
        return RC(rcAlign, rcFile, rcConstructing, rcData, rcBadVersion);

    rc = BAMFileReadUI32(self, &hlen);
    if (rc)
        return rc;
    
    self->header = htxt = malloc(hlen + 1);
    if (htxt == NULL)
        return RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
    
    self->headerData1 = htxt = malloc(hlen + 1);
    if (htxt == NULL)
        return RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
    
    rc = BAMFileReadn(self, hlen, (uint8_t *)htxt);
    if (rc == 0) {
        htxt[hlen] = 0;
        
        memcpy((char *)self->header, htxt, hlen + 1);
        
        rc = BAMFileReadUI32(self, &nrefs);
        if (rc == 0) {
            names = malloc(names_size = 1024);
            if (names == NULL)
                rc = RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);

            if (rc == 0) {
                refs = malloc(nrefs * sizeof(refs[0]));
                if (refs == NULL)
                    rc = RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
                else {
                    int i;
                    
                    for (i = 0; i != nrefs; ++i) {
                        uint32_t namelen;
                        
                        rc = BAMFileReadUI32(self, &namelen);
                        if (rc)
                            break;
                        if (namelen + in_names >= names_size) {
                            void *temp = realloc(names, names_size <<= 1);
                            
                            if (temp == NULL) {
                                rc = RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
                                break;
                            }
                            names = temp;
                        }
                        refs[i].name = in_names;
                        rc = BAMFileReadn(self, namelen, (uint8_t *)&names[in_names]);
                        if (rc)
                            break;
                        rc = BAMFileReadUI32(self, &refs[i].length);
                        if (rc)
                            break;
                        in_names += namelen;
                    }
                    if (rc == 0) {
                        self->refSeqs = nrefs;
                        self->refSeq = calloc(self->refSeqs, sizeof(self->refSeq[0]));
                        if (self->refSeq == NULL)
                            rc = RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
                    }
                    if (rc == 0) {
                        rs_by_name = malloc(self->refSeqs * sizeof(*rs_by_name));
                        if (rs_by_name == NULL)
                            rc = RC(rcAlign, rcFile, rcReading, rcMemory, rcExhausted);
                    }
                    if (rc == 0) {
                        self->headerData2 = names;
                        
                        for (i = 0; i != nrefs; ++i) {
                            rs_by_name[i] = self->refSeq + i;
                            self->refSeq[i].id = i;
                            self->refSeq[i].name = names + refs[i].name;
                            self->refSeq[i].length = refs[i].length;
                        }
                        /* cast to to quiet a warning from VS C */
                        ksort( (void*const)rs_by_name, self->refSeqs, sizeof(rs_by_name[0]), comp_RefSeqName, NULL );
                    }
                    free(refs);
                }
            }
        }
        if (rc == 0) {
            self->fpos_first = self->fpos_cur;
            self->ucfirst = self->bufCurrent;

            rc = CountReadGroups(htxt, hlen, &self->readGroups);
            if (rc == 0) {
                self->readGroup = calloc(self->readGroups, sizeof(self->readGroup[0]));
                if (self->readGroup != NULL)
                    rc = ParseHeader(self, htxt, hlen, rs_by_name);
                else
                    rc = RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
            }
        }
    }
    if ( rs_by_name != NULL )
        free( (void*) rs_by_name );
    
    return rc;
}

static rc_t BAMIndexWhack(const BAMIndex *);

static rc_t BAMFileWhack(BAMFile *self) {
    if (self->refSeq)
        free(self->refSeq);
    if (self->readGroup)
        free(self->readGroup);
    if (self->header)
        free((void *)self->header);
    if (self->headerData1)
        free((void *)self->headerData1);
    if (self->headerData2)
        free((void *)self->headerData2);
    if (self->ndx)
        BAMIndexWhack(self->ndx);
    BGZFileWhack(&self->file);

    return 0;
}

/* file is retained */
LIB_EXPORT rc_t CC BAMFileMakeWithKFile(const BAMFile **cself, const KFile *file)
{
    BAMFile *self = calloc(1, sizeof(*self));
    rc_t rc;
    
    if (self == NULL)
        return RC(rcAlign, rcFile, rcConstructing, rcMemory, rcExhausted);
    
    memset(self, 0, sizeof(*self));
    atomic32_set(&self->refcount, 1);
    rc = BGZFileInit(&self->file, file);
    if (rc == 0) {
        rc = ProcessHeader(self);
        if (rc == 0) {
            *cself = self;
            return 0;
        }
    }
    BAMFileWhack(self);
    return rc;
}

LIB_EXPORT rc_t CC BAMFileVMakeWithDir(const BAMFile **result,
                                         const KDirectory *dir,
                                         const char *path,
                                         va_list args
                                         )
{
    rc_t rc;
    const KFile *kf;
    
    if (result == NULL)
        return RC(rcAlign, rcFile, rcOpening, rcParam, rcNull);
    *result = NULL;
    rc = KDirectoryVOpenFileRead(dir, &kf, path, args);
    if (rc == 0) {
        rc = BAMFileMakeWithKFile(result, kf);
        KFileRelease(kf);
    }
    return rc;
}

LIB_EXPORT rc_t CC BAMFileMakeWithDir(const BAMFile **result,
                                        const KDirectory *dir,
                                        const char *path, ...
                                        )
{
    va_list args;
    rc_t rc;
    
    va_start(args, path);
    rc = BAMFileVMakeWithDir(result, dir, path, args);
    va_end(args);
    return rc;
}

LIB_EXPORT rc_t CC BAMFileMake(const BAMFile **cself, const char *path, ...)
{
    KDirectory *dir;
    va_list args;
    rc_t rc;
    
    if (cself == NULL)
        return RC(rcAlign, rcFile, rcOpening, rcParam, rcNull);
    *cself = NULL;
    
    rc = KDirectoryNativeDir(&dir);
    if (rc) return rc;
    va_start(args, path);
    rc = BAMFileVMakeWithDir(cself, dir, path, args);
    va_end(args);
    KDirectoryRelease(dir);
    return rc;
}

LIB_EXPORT rc_t CC BAMFileMakeWithKPath(const BAMFile **cself, const KPath *kpath)
{
    char path[4096];
    size_t nread;
    rc_t rc;

    rc = KPathReadPath(kpath, path, sizeof(path), &nread);
    if (rc == 0)
        rc = BAMFileMake(cself, "%.*s", (int)nread, path);
    return rc;
}

LIB_EXPORT rc_t CC BAMFileAddRef(const BAMFile *cself) {
    if (cself != NULL)
        atomic32_inc(&((BAMFile *)cself)->refcount);
    return 0;
}

LIB_EXPORT rc_t CC BAMFileRelease(const BAMFile *cself) {
    rc_t rc = 0;
    BAMFile *self = (BAMFile *)cself;
    
    if (cself != NULL) {
        if (atomic32_dec_and_test(&self->refcount)) {
            rc = BAMFileWhack(self);
            if (rc)
                atomic32_set(&self->refcount, 1);
            else
                free(self);
        }
    }
    return rc;
}

LIB_EXPORT float CC BAMFileGetProportionalPosition(const BAMFile *self)
{
    return BGZFileProPos(&self->file);
}

LIB_EXPORT rc_t CC BAMFileGetPosition(const BAMFile *self, BAMFilePosition *pos) {
    *pos = (self->fpos_cur << 16) | self->bufCurrent;
    return 0;
}

static rc_t BAMFileSetPositionInt(const BAMFile *cself, uint64_t fpos, uint16_t bpos)
{
    BAMFile *self = (BAMFile *)cself;
    
    if (cself->fpos_first > fpos || fpos > cself->file.fsize ||
        (fpos == cself->fpos_first && bpos < cself->ucfirst))
    {
        return RC(rcAlign, rcFile, rcPositioning, rcParam, rcInvalid);
    }
    if (cself->fpos_cur == fpos) {
        if (bpos <= cself->bufSize) {
            self->eof = false;
            self->bufCurrent = bpos;
            return 0;
        }
        return RC(rcAlign, rcFile, rcPositioning, rcParam, rcInvalid);
    }
    BGZFileSetPos(&self->file, fpos);
    self->eof = false;
    self->bufSize = 0; /* force re-read */
    self->bufCurrent = bpos;
    self->fpos_cur = fpos;
    return 0;
}

LIB_EXPORT rc_t CC BAMFileSetPosition(const BAMFile *cself, const BAMFilePosition *pos)
{
    return BAMFileSetPositionInt(cself, *pos >> 16, (uint16_t)(*pos));
}

LIB_EXPORT rc_t CC BAMFileRewind(const BAMFile *cself)
{
    return BAMFileSetPositionInt(cself, cself->fpos_first, cself->ucfirst);
}

static void BAMFileUnlockBuffer(BAMFile *self) {
    if (self->bufLocker != NULL) {
        memcpy(self->bufLocker->storage, self->bufLocker->data, self->bufLocker->datasize);
        self->bufLocker->data = (bam_alignment *)&self->bufLocker->storage[0];
        self->bufLocker = NULL;
    }
}

static
rc_t BAMAlignmentParse(BAMAlignment *y, const uint8_t data[], uint32_t datasize)
{
    y->datasize = datasize;
    y->data = (bam_alignment *)&data[0];
    
    /* calculate the offsets and verify that none are beyond the end of data */
    y->cigar = sizeof(struct bam_alignment_s) + getReadNameLength(y) - 1;
    if (y->cigar >= datasize)
        return RC(rcAlign, rcFile, rcReading, rcRow, rcInvalid);
    
    y->seq   = y->cigar + 4 * getCigarCount(y);
    if (y->seq >= datasize)
        return RC(rcAlign, rcFile, rcReading, rcRow, rcInvalid);
    
    y->qual  = y->seq + ((getReadLen(y) + 1) >> 1);
    if (y->qual >= datasize)
        return RC(rcAlign, rcFile, rcReading, rcRow, rcInvalid);
    
    y->extra = y->qual + getReadLen(y);
    if (y->extra > datasize) /* if extra is empty, it will be equal to datasize */
        return RC(rcAlign, rcFile, rcReading, rcRow, rcInvalid);
    
    y->readGroupName = 0;
    y->csread = 0;
    return 0;
}

LIB_EXPORT rc_t CC BAMFileRead(const BAMFile *cself, const BAMAlignment **rhs)
{
    BAMFile *self = (BAMFile *)cself;
    BAMAlignment *y;
    uint32_t datasize;
    rc_t rc;
    
    *rhs = NULL;

    if (cself == NULL)
        return RC(rcAlign, rcFile, rcReading, rcParam, rcNull);
    
    if (cself->bufCurrent >= cself->bufSize && cself->eof)
        return RC(rcAlign, rcFile, rcReading, rcRow, rcNotFound);

    BAMFileUnlockBuffer(self);

    rc = BAMFileReadUI32(self, &datasize);
    if (rc) {
        if (GetRCObject(rc) == rcData && GetRCState(rc) == rcInsufficient) {
            self->eof = true;
            rc = RC(rcAlign, rcFile, rcReading, rcRow, rcNotFound);
        }
        return rc;
    }
    /* it should never be bigger than 64K */
    if ((datasize & 0xFFFF) != datasize) {
        /* probably a seek to a bad position */
        return RC(rcAlign, rcFile, rcReading, rcData, rcTooBig);
    }
    y = malloc(sizeof(*y) - sizeof(y->storage) + datasize);
    if (y == NULL)
        return RC(rcAlign, rcFile, rcReading, rcMemory, rcExhausted);

    y->datasize = datasize;
    
    if (self->bufCurrent + datasize <= self->bufSize) {
        /* all of the data needed for this record is in the buffer
         * so instead of copying it, we'll take the pointer and
         * place a reservation on the buffer to prevent it from
         * going away on us.  Note that enough storage was allocated
         * that if the reservation needs to be broken there is space
         * to copy the data into.
         */
        rc = BAMAlignmentParse(y, &self->buffer[self->bufCurrent], datasize);
        if (rc) {
            free(y);
            return rc;
        }
        self->bufLocker = y;
        self->bufCurrent += datasize;
    }
    else {
        rc = BAMFileReadn(self, datasize, y->storage);
        if (rc == 0)
            rc = BAMAlignmentParse(y, y->storage, datasize);
        if (rc) {
            free(y);
            return rc;
        }
    }

    BAMFileAddRef(y->parent = self);
    atomic32_set(&y->refcount, 1);
    
    *rhs = y;
    return 0;
}

LIB_EXPORT rc_t CC BAMFileGetRefSeqById(const BAMFile *cself, int32_t id, const BAMRefSeq **rhs)
{
    *rhs = NULL;
    if (id >= 0 && id < cself->refSeqs)
        *rhs = &cself->refSeq[id];
    return 0;
}

LIB_EXPORT rc_t CC BAMFileGetReadGroupByName(const BAMFile *cself, const char *name, const BAMReadGroup **rhs)
{
    BAMReadGroup rg;
    
    *rhs = NULL;

    rg.name = name;
    if (rg.name != NULL)
        *rhs = kbsearch(&rg, cself->readGroup, cself->readGroups, sizeof(rg), comp_ReadGroup, NULL);

    return 0;
}

LIB_EXPORT rc_t CC BAMFileGetRefSeqCount(const BAMFile *cself, unsigned *rhs)
{
    *rhs = cself->refSeqs;
    return 0;
}

LIB_EXPORT rc_t CC BAMFileGetRefSeq(const BAMFile *cself, unsigned i, const BAMRefSeq **rhs)
{
    *rhs = NULL;
    if (i < cself->refSeqs)
        *rhs = &cself->refSeq[i];
    return 0;
}

LIB_EXPORT rc_t CC BAMFileGetReadGroupCount(const BAMFile *cself, unsigned *rhs)
{
    *rhs = cself->readGroups;
    return 0;
}

LIB_EXPORT rc_t CC BAMFileGetReadGroup(const BAMFile *cself, unsigned i, const BAMReadGroup **rhs)
{
    *rhs = NULL;
    if (i < cself->readGroups)
        *rhs = &cself->readGroup[i];
    return 0;
}

LIB_EXPORT rc_t CC BAMFileGetHeaderText(BAMFile const *cself, char const **header, size_t *header_len)
{
    *header = cself->header;
    *header_len = strlen(*header);
    return 0;
}

/* MARK: BAM Alignment Stuff */

LIB_EXPORT rc_t CC BAMAlignmentAddRef(const BAMAlignment *cself)
{
    if (cself != NULL)
        atomic32_inc(&((BAMAlignment *)cself)->refcount);
    return 0;
}

static rc_t BAMAlignmentWhack(BAMAlignment *self)
{
    if (self->parent->bufLocker == self)
        self->parent->bufLocker = NULL;
    BAMFileRelease(self->parent);
    free(self);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentRelease(const BAMAlignment *cself)
{
    if (cself != NULL) {
        BAMAlignment *self = (BAMAlignment *)cself;
        
        if (atomic32_dec_and_test(&self->refcount))
            BAMAlignmentWhack(self);
    }
    return 0;
}

#if 0
LIB_EXPORT uint16_t CC BAMAlignmentIffyFields(const BAMAlignment *self)
{
}

LIB_EXPORT uint16_t CC BAMAlignmentBadFields(const BAMAlignment *self)
{
}
#endif

static uint32_t BAMAlignmentGetCigarElement(const BAMAlignment *self, unsigned i)
{
    uint32_t y;
    
    memcpy(&y, &self->data->raw[self->cigar + i * 4], 4);
    return LE2HUI32(&y);
}

LIB_EXPORT rc_t CC BAMAlignmentGetRefSeqId(const BAMAlignment *cself, int32_t *rhs)
{
    *rhs = getRefSeqId(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetPosition(const BAMAlignment *cself, int64_t *rhs)
{
    *rhs = getPosition(cself);
    return 0;
}

LIB_EXPORT bool CC BAMAlignmentIsMapped(const BAMAlignment *cself)
{
    if (((getFlags(cself) & BAMFlags_SelfIsUnmapped) == 0) && getRefSeqId(cself) >= 0 && getPosition(cself) >= 0)
        return true;
    return false;
}

LIB_EXPORT rc_t CC BAMAlignmentGetAlignmentDetail(
                                                  const BAMAlignment *self,
                                                  BAMAlignmentDetail *rslt, uint32_t count, uint32_t *actual,
                                                  int32_t *pfirst, int32_t *plast
                                                  )
{
    unsigned i;
    unsigned ccnt; /* cigar count */
    int32_t  gpos; /* refSeq pos in global coordinates */
    unsigned rpos; /* read pos (always local coordinates) */
    uint32_t rlen; /* read length */
    int32_t first = -1;
    int32_t last = -1;

    if (!self)
        return RC(rcAlign, rcFile, rcReading, rcSelf, rcNull);

    rlen = getReadLen(self);
    ccnt = getCigarCount(self);
    gpos = getPosition(self);
    
    if (gpos < 0)
        ccnt = 0;
    
    if (actual)
        *actual = ccnt;
    
    if (pfirst)
        *pfirst = -1;

    if (plast)
        *plast = -1;

    if (ccnt == 0)
        return 0;
    
    if (rslt == NULL) {
        if (actual == NULL)
            return RC(rcAlign, rcFile, rcReading, rcParam, rcNull);
        count = 0;
    }
    
    if (count < ccnt)
        return RC(rcAlign, rcFile, rcReading, rcBuffer, rcInsufficient);
        
    for (rpos = 0, i = 0; i != ccnt; ++i) {
        uint32_t len = BAMAlignmentGetCigarElement(self, i);
        int op = len & 0x0F;
        
        if (op > sizeof(cigarChars))
            return RC(rcAlign, rcFile, rcReading, rcData, rcInvalid);
        
        op = cigarChars[op];
        len >>= 4;
        
        rslt[i].refSeq_pos = gpos;
        rslt[i].read_pos = rpos;
        rslt[i].length = len;
        rslt[i].type = (BAMCigarType)op;
        
        switch ((BAMCigarType)op) {
        case ct_Match:
        case ct_Equal:
            if (first == -1)
                first = i;
            last = i;
            gpos += len;
            rpos += len;
            break;
        case ct_Insert:
        case ct_SoftClip:
            gpos += len;
            break;
        case ct_Delete:
        case ct_Skip:
            rpos += len;
            break;
        case ct_HardClip:
        case ct_Padded:
            rslt[i].refSeq_pos = -1;
            rslt[i].read_pos = -1;
            break;
        default:
            break;
        }
        
        if (rslt[i].read_pos > rlen)
            return RC(rcAlign, rcFile, rcReading, rcData, rcInvalid);
    }
    if (pfirst)
        *pfirst = first;
    
    if (plast)
        *plast = last;
    
    return 0;
}

static
unsigned ReferenceLengthFromCIGAR(const BAMAlignment *self)
{
    unsigned i;
    unsigned n = getCigarCount(self);
    unsigned y;
    
    for (i = 0, y = 0; i != n; ++i) {
        uint32_t const len = BAMAlignmentGetCigarElement(self, i);
        
        switch (cigarChars[len & 0x0F]) {
        case ct_Match:
        case ct_Equal:
        case ct_NotEqual:
        case ct_Delete:
        case ct_Skip:
            y += len >> 4;
            break;
        default:
            break;
        }
    }
    return y;
}
#if 0
Ken: don't delete it! It's documentation!
static
unsigned SequenceLengthFromCIGAR(const BAMAlignment *self)
{
    unsigned i;
    unsigned n = getCigarCount(self);
    unsigned y;
    
    for (i = 0, y = 0; i != n; ++i) {
        uint32_t const len = BAMAlignmentGetCigarElement(self, i);
        
        switch (cigarChars[len & 0x0F]) {
        case ct_Match:
        case ct_Equal:
        case ct_NotEqual:
        case ct_Insert:
        case ct_SoftClip:
            y += len >> 4;
            break;
        default:
            break;
        }
    }
    return y;
}
#endif

LIB_EXPORT rc_t CC BAMAlignmentGetPosition2(const BAMAlignment *cself, int64_t *rhs, uint32_t *length)
{
    *rhs = getPosition(cself);
    if (*rhs >= 0)
        *length = ReferenceLengthFromCIGAR(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetReadGroupName(const BAMAlignment *cself, const char **rhs)
{
    if (cself->readGroupName == 0) {
        const unsigned n = cself->datasize;
        unsigned i = cself->extra;
        rc_t rc = 0;
        
        while (i + 3 < n) {
            const char *const tag = (const char *)&cself->data->raw[i];
            const int type = cself->data->raw[i + 2];
            const uint8_t *const value = &cself->data->raw[i + 3];
            unsigned size = 3;
            
            switch (type) {
            case dt_CSTRING:
                if (tag[0] == 'R' && tag[1] == 'G')
                    ((BAMAlignment *)cself)->readGroupName = i + 3;
            case dt_HEXSTRING:
                while (size != n && tag[size] != '\0')
                    ++size;
                if (size == n)
                    rc = RC(rcAlign, rcFile, rcReading, rcData, rcInvalid);
                else
                    ++size;
                break;
            case dt_INT8:
            case dt_UINT8:
            case dt_ASCII:
                size += 1;
                break;
            case dt_INT16:
            case dt_UINT16:
                size += 2;
                break;
            case dt_FLOAT32:
            case dt_INT:
            case dt_UINT:
                size += 4;
                break;
            case dt_FLOAT64:
                size += 8;
                break;
            case dt_NUM_ARRAY:
                size += 5;
                {
                    uint32_t elem_count;
                    
                    memcpy(&elem_count, &value[1], 4);
                    elem_count = LE2HUI32(&elem_count);
                    switch (value[0]) {
                    case dt_INT8:
                    case dt_UINT8:
                        size += elem_count;
                        break;
                    case dt_INT16:
                    case dt_UINT16:
                        size += elem_count * 2;
                        break;
                    case dt_FLOAT32:
                    case dt_INT:
                    case dt_UINT:
                        size += elem_count * 4;
                        break;
                    case dt_FLOAT64:
                        size += elem_count * 8;
                        break;
                    default:
                        rc = RC(rcAlign, rcFile, rcReading, rcData, rcUnexpected);
                    }
                }
                break;
            default:
                rc = RC(rcAlign, rcFile, rcReading, rcData, rcUnexpected);
            }
            if (rc) {
                ((BAMAlignment *)cself)->readGroupName = 0;
                break;
            }
            i += size;
        }
    }

    if (cself->readGroupName != 0)
        *rhs = (const char *)&cself->data->raw[cself->readGroupName];
    else
        *rhs = NULL;
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetReadName(const BAMAlignment *cself, const char **rhs)
{
    *rhs = getReadName(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetReadName2(const BAMAlignment *cself, const char **rhs, size_t *length)
{
    *length = getReadNameLength(cself);
    *rhs = getReadName(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetFlags(const BAMAlignment *cself, uint16_t *rhs)
{
    *rhs = getFlags(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetMapQuality(const BAMAlignment *cself, uint8_t *rhs)
{
    *rhs = getMapQual(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetCigarCount(const BAMAlignment *cself, unsigned *rhs)
{
    *rhs = getCigarCount(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetRawCigar(const BAMAlignment *cself, uint32_t const **rslt, uint32_t *length)
{
    *rslt = (uint32_t*)&cself->data->raw[cself->cigar];
    *length = getCigarCount(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetCigar(const BAMAlignment *cself, uint32_t i, BAMCigarType *type, uint32_t *length)
{
    uint32_t x;
    
    if (i >= getCigarCount(cself))
        return RC(rcAlign, rcFile, rcReading, rcParam, rcInvalid);

    x = BAMAlignmentGetCigarElement(cself, i);
    *type = (BAMCigarType)(cigarChars[x & 0x0F]);
    *length = x >> 4;
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetReadLength(const BAMAlignment *cself, uint32_t *rhs)
{
    *rhs = getReadLen(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetSequence2(const BAMAlignment *cself, char *rhs, uint32_t start, uint32_t stop)
{
    /*
     *   =    A    C    M    G    R    S    V    T    W    Y    H    K    D    B    N
     * 0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
     * 1111 1000 0100 1100 0010 1010 0110 1110 0001 1001 0101 1101 0011 1011 0111 0000
     *   N    T    G    K    C    Y    S    B    A    W    R    D    M    H    V    =
     */
    static const char  tr[16] = "=ACMGRSVTWYHKDBN";
    /* static const char ctr[16] = "=TGKCYSBAWRDMHVN"; */
    unsigned const n = getReadLen(cself);
    const uint8_t * const seq = &cself->data->raw[cself->seq];
    unsigned si, di;
    
    if (stop == 0 || stop > n)
        stop = n;
    
    for (di = 0, si = start; si != stop; ++si, ++di) {
        unsigned const b4na2 = seq[si >> 1];
        unsigned const b4na = (si & 1) == 0 ? (b4na2 >> 4) : (b4na2 & 0x0F);
        
        rhs[di] = tr[b4na];
    }
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetSequence(const BAMAlignment *cself, char *rhs)
{
    return BAMAlignmentGetSequence2(cself, rhs, 0, 0);
}

static rc_t BAMAlignmentGetCSRead(BAMAlignment const *cself);

LIB_EXPORT bool CC BAMAlignmentHasColorSpace(BAMAlignment const *cself)
{
    if (cself->csread == 0) {
        BAMAlignmentGetCSRead(cself);
        if (cself->csread == 0)
            return false;
    }
    return true;
}

LIB_EXPORT rc_t CC BAMAlignmentGetCSKey(BAMAlignment const *cself, char rhs[1])
{
    if (cself->csread == 0) {
        BAMAlignmentGetCSRead(cself);
        if (cself->csread == 0)
            return RC(rcAlign, rcFile, rcReading, rcData, rcNotFound);
    }
    *rhs = cself->data->raw[cself->csread];
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetCSSequence(BAMAlignment const *cself, char rhs[])
{
    if (cself->csread == 0) {
        BAMAlignmentGetCSRead(cself);
        if (cself->csread == 0)
            return RC(rcAlign, rcFile, rcReading, rcData, rcNotFound);
    }
    memcpy(rhs, &cself->data->raw[cself->csread + 1], getReadLen(cself));
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetQuality(const BAMAlignment *cself, const uint8_t **rhs)
{
    *rhs = &cself->data->raw[cself->qual];
    return 0;
}

typedef bool (* i_OptData_f)(BAMAlignment const *cself, void *ctx, char const tag[2], BAMOptDataValueType type, unsigned count,
                             void const *value, unsigned size);

static rc_t BAMAlignmentParseOptData(BAMAlignment const *cself, void *ctx, i_OptData_f f)
{
    unsigned const n = cself->datasize;
    unsigned i = cself->extra;
    rc_t rc = 0;
    
    while (rc == 0 && i + 3 < n) {
        char const *const tag = (char const *)&cself->data->raw[i];
        int type = cself->data->raw[i + 2];
        const uint8_t *const vp = &cself->data->raw[i + 3];
        unsigned len = 0;
        unsigned size = 0;
        unsigned count = 1;
        unsigned offset = 0;
        
        switch (type) {
        case dt_CSTRING:
        case dt_HEXSTRING:
            len = 0;
            while (i + len + 3 != n && vp[len] != '\0')
                ++len;
            if (i + len + 3 == n) {
                rc = RC(rcAlign, rcFile, rcReading, rcData, rcInvalid);
                break;
            }
            size = len;
            ++len;
            break;
        case dt_INT8:
        case dt_UINT8:
        case dt_ASCII:
            size = len = 1;
            break;
        case dt_INT16:
        case dt_UINT16:
            size = len = 2;
            break;
        case dt_INT:
        case dt_FLOAT32:
        case dt_UINT:
            size = len = 4;
            break;
        case dt_FLOAT64:
            size = len = 8;
            break;
        case dt_NUM_ARRAY:
            offset = len = 5;
            {
                unsigned elem_size = 0;
                uint32_t elem_count = 0;
                
                switch (vp[0]) {
                case dt_INT8:
                case dt_UINT8:
                    elem_size = 1;
                    break;
                case dt_INT16:
                case dt_UINT16:
                    elem_size = 2;
                    break;
                case dt_FLOAT32:
                case dt_INT:
                case dt_UINT:
                    elem_size = 4;
                    break;
                case dt_FLOAT64:
                    elem_size = 8;
                    break;
                default:
                    rc = RC(rcAlign, rcFile, rcReading, rcData, rcUnexpected);
                    break;
                }
                if (rc)
                    break;
                memcpy(&elem_count, &vp[1], 4);
                elem_count = LE2HUI32(&elem_count);
                len += elem_size * elem_count;
                if (i + len + 3 > n) {
                    rc = RC(rcAlign, rcFile, rcReading, rcData, rcInvalid);
                    break;
                }
                type = vp[0];
                count = elem_count;
                size = elem_size;
            }
            break;
        default:
            rc = RC(rcAlign, rcFile, rcReading, rcData, rcUnexpected);
            break;
        }
        if (rc)
            break;
        i += len + 3;
        if (f(cself, ctx, tag, type, count, &vp[offset], size))
            break;
    }
    return rc;
}

static bool BAMAlignmentGetCS(BAMAlignment const *cself, void *ctx, char const tag[2],
                              BAMOptDataValueType type, unsigned count,
                              void const *value, unsigned size)
{
    if (tag[0] == 'C' && tag[1] == 'S' && size == getReadLen(cself) + 1) {
        *(uint8_t const **)ctx = value;
        return true;
    }
    return false;
}

static rc_t BAMAlignmentGetCSRead(BAMAlignment const *cself)
{
    uint8_t const *cs = NULL;
    rc_t rc = BAMAlignmentParseOptData(cself, &cs, BAMAlignmentGetCS);
    if (rc == 0 && cs != NULL) {
        ((BAMAlignment *)cself)->csread = cs - cself->data->raw;
    }
    return rc;
}

static bool BAMAlignmentGetOQ(BAMAlignment const *cself, void *ctx, char const tag[2],
                              BAMOptDataValueType type, unsigned count,
                              void const *value, unsigned size)
{
    if (tag[0] == 'O' && tag[1] == 'Q') {
        *(uint8_t const **)ctx = value;
        return true;
    }
    return false;
}

LIB_EXPORT rc_t CC BAMAlignmentGetQuality2(BAMAlignment const *cself, uint8_t const **rhs, uint8_t *offset)
{
    rc_t rc;
    
    *offset = 33;
    *rhs = NULL;
    rc = BAMAlignmentParseOptData(cself, ( void* ) rhs, BAMAlignmentGetOQ);
    if (rc == 0 && *rhs != NULL)
        return 0;
    *offset = 0;
    *rhs = &cself->data->raw[cself->qual];
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetMateRefSeqId(const BAMAlignment *cself, int32_t *rhs)
{
    *rhs = getMateRefSeqId(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetMatePosition(const BAMAlignment *cself, int64_t *rhs)
{
    *rhs = getMatePos(cself);
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentGetInsertSize(const BAMAlignment *cself, int64_t *rhs)
{
    *rhs = getInsertSize(cself);
    return 0;
}

typedef struct OptForEach_ctx_s {
    BAMOptData *val;
    BAMOptData **alloced;
    size_t valsize;
    rc_t rc;
    BAMOptionalDataFunction user_f;
    void *user_ctx;
} OptForEach_ctx_t;

static bool i_OptDataForEach(BAMAlignment const *cself, void *Ctx, char const tag[2], BAMOptDataValueType type, unsigned count, void const *value, unsigned size)
{
    OptForEach_ctx_t *ctx = (OptForEach_ctx_t *)Ctx;
    size_t const need = (size_t)&((BAMOptData const *)NULL)->u.f64[(count * size + sizeof(double))/sizeof(double)];
    
    if (need > ctx->valsize) {
        void *const temp = realloc(ctx->alloced, need);
        if (temp == NULL) {
            ctx->rc = RC(rcAlign, rcFile, rcReading, rcMemory, rcExhausted);
            return true;
        }
        *ctx->alloced = ctx->val = temp;
        ctx->valsize = need;
    }
    ctx->val->type = type;
    ctx->val->element_count = (type == dt_CSTRING || type == dt_HEXSTRING) ? size : count;
    
    memcpy(ctx->val->u.u8, value, size * count);
#if __BYTE_ORDER == __BIG_ENDIAN
    {{
        unsigned di;
        
        switch (size) {
        case 2:
            for (di = 0; di != elem_count; ++di)
                ctx->val->u.u16[di] = LE2HUI16(&ctx->val->u.u16[di]);
            break;
        case 4:
            for (di = 0; di != elem_count; ++di)
                ctx->val->u.u32[di] = LE2HUI32(&ctx->val->u.u32[di]);
            break;
        case 8:
            for (di = 0; di != elem_count; ++di)
                ctx->val->u.u64[di] = LE2HUI64(&ctx->val->u.u64[di]);
            break;
        }
    }}
#endif
    ctx->rc = ctx->user_f(ctx->user_ctx, tag, ctx->val);
    return ctx->rc != 0;
}

#if USE_OLD_BAM_OPT_FOR_EACH
LIB_EXPORT rc_t CC BAMAlignmentOptDataForEach(const BAMAlignment *cself, void *ctx, BAMOptionalDataFunction f)
{
    const char *const auxData = (const char *)&cself->data->raw[cself->extra];
    const unsigned n = cself->datasize - cself->extra;
    unsigned i;
    rc_t rc;
    
    if (n < 4)
        return 0;
    for (i = 0; i < n - 3; ) {
        const char *const tag = &auxData[i];
        const int type = ((const uint8_t *)auxData)[i + 2];
        const uint8_t *const value = (const uint8_t *)&auxData[i + 3];
        unsigned size = 3;
        
        switch (type) {
        case dt_CSTRING:
        case dt_HEXSTRING:
            while (size != n && tag[size] != '\0')
                ++size;
            if (size == n)
                return RC(rcAlign, rcFile, rcReading, rcData, rcInvalid);
            ++size;
            break;
        case dt_INT8:
        case dt_UINT8:
        case dt_ASCII:
            size += 1;
            break;
        case dt_INT16:
        case dt_UINT16:
            size += 2;
            break;
        case dt_FLOAT32:
        case dt_INT:
        case dt_UINT:
            size += 4;
            break;
        case dt_FLOAT64:
            size += 8;
            break;
        case dt_NUM_ARRAY:
            /* TODO: implement */
        default:
            return RC(rcAlign, rcFile, rcReading, rcData, rcUnexpected);
        }
        rc = f(ctx, tag, type, value, size - 3);
        if (rc)
            return rc;
        i += size;
    }
    return 0;
}

LIB_EXPORT rc_t CC BAMAlignmentOptDataForEach2(const BAMAlignment *cself, void *ctx, BAMOptionalDataFunction2 f)
#else

LIB_EXPORT rc_t CC BAMAlignmentOptDataForEach(const BAMAlignment *cself, void *user_ctx, BAMOptionalDataFunction f)
#endif
{
    union u {
        BAMOptData value;
        uint8_t storage[4096];
    } value_auto;
    OptForEach_ctx_t ctx;
    rc_t rc;
    
    ctx.val = &value_auto.value;
    ctx.alloced = NULL;
    ctx.valsize = sizeof(value_auto);
    ctx.rc = 0;
    ctx.user_f = f;
    ctx.user_ctx = user_ctx;
    
    rc = BAMAlignmentParseOptData(cself, &ctx, i_OptDataForEach);
    if (rc == 0)
        rc = ctx.rc;
    if (ctx.alloced)
        free(ctx.alloced);
    return rc;
}

/* MARK: BAMIndex stuff */

static uint64_t get_pos(const uint8_t *buf)
{
    uint64_t v64;

    memcpy(&v64, buf, 8);
    return LE2HUI64(&v64);
}

#define MAX_BIN 37449
static uint16_t bin2ival(uint16_t bin)
{
    if (bin < 1)
        return 0; /* (bin - 0) << 15; */
    
    if (bin < 9)
        return (bin - 1) << 12;
    
    if (bin < 73)
        return (bin - 9) << 9;
    
    if (bin < 585)
        return (bin - 73) << 6;
    
    if (bin < 4681)
        return (bin - 585) << 3;
    
    if (bin < 37449)
        return (bin - 4681) << 0;
    
    return 0;
}

static uint16_t bin_ival_count(uint16_t bin)
{
    if (bin < 1)
        return 1 << 15;
    
    if (bin < 9)
        return 1 << 12;
    
    if (bin < 73)
        return 1 << 9;
    
    if (bin < 585)
        return 1 << 6;
    
    if (bin < 4681)
        return 1 << 3;
    
    if (bin < 37449)
        return 1;
    
    return 0;
}

enum BAMIndexStructureTypes {
    bai_StartStopPairs,
    bai_16kIntervals
};

typedef rc_t (*WalkIndexStructureCallBack)(const uint8_t data[], size_t dlen,
                                           unsigned refNo,
                                           unsigned refs,
                                           enum BAMIndexStructureTypes type,
                                           unsigned binNo,
                                           unsigned bins,
                                           unsigned elements,
                                           void *ctx);

static
rc_t WalkIndexStructure(const uint8_t buf[], size_t blen,
                        WalkIndexStructureCallBack func,
                        void *ctx
                        )
{
    unsigned cp = 0;
    int32_t nrefs;
    unsigned i;
    rc_t rc;
    
    if (cp + 4 > blen)
        return RC(rcAlign, rcIndex, rcReading, rcData, rcInsufficient);
    if (memcmp(buf + cp, "BAI\1", 4) != 0)
        return RC(rcAlign, rcIndex, rcReading, rcFormat, rcUnknown);
    cp += 4;
    
    if (cp + 4 > blen)
        return RC(rcAlign, rcIndex, rcReading, rcData, rcInsufficient);
    memcpy(&nrefs, buf + cp, 4);
    nrefs = LE2HI32(&nrefs);
    cp += 4;
    
    if (nrefs == 0)
        return RC(rcAlign, rcIndex, rcReading, rcData, rcEmpty);
    
    for (i = 0; i < nrefs; ++i) {
        int32_t bins;
        int32_t chunks;
        int32_t intervals;
        unsigned di;
        
        if (cp + 4 > blen)
            return RC(rcAlign, rcIndex, rcReading, rcData, rcInsufficient);
        memcpy(&bins, buf + cp, 4); cp += 4;
        bins = LE2HI32(&bins);
        
        for (di = 0; di < bins; ++di) {
            uint32_t binNo;
            
            if (cp + 8 > blen)
                return RC(rcAlign, rcIndex, rcReading, rcData, rcInsufficient);
            memcpy(&binNo, buf + cp, 4); cp += 4;
            memcpy(&chunks, buf + cp, 4); cp += 4;
            binNo = LE2HUI32(&binNo);
            chunks = LE2HI32(&chunks);
            if (cp + 16 * chunks > blen)
                return RC(rcAlign, rcIndex, rcReading, rcData, rcInsufficient);
            rc = func(&buf[cp], 16 * chunks, i, nrefs, bai_StartStopPairs, binNo, bins, chunks, ctx);
            if (rc)
                return rc;
            cp += 16 * chunks;
        }
        if (cp + 4 > blen)
            return RC(rcAlign, rcIndex, rcReading, rcData, rcInsufficient);
        memcpy(&intervals, buf + cp, 4); cp += 4;
        intervals = LE2HI32(&intervals);
        if (cp + 8 * intervals > blen)
            return RC(rcAlign, rcIndex, rcReading, rcData, rcInsufficient);
        rc = func(&buf[cp], 8 * intervals, i, nrefs, bai_16kIntervals, ~(unsigned)0, bins, intervals, ctx);
        if (rc)
            return rc;
        cp += 8 * intervals;
    }
    if (cp > blen)
        return RC(rcAlign, rcIndex, rcReading, rcData, rcInsufficient);
    return 0;
}

struct LoadIndex1_s {
    const BAMFile *self;
    int refNo;
    unsigned refs;
    unsigned intervals;
    unsigned total_interval_count;
};

static
rc_t LoadIndex1(const uint8_t data[], size_t dlen, unsigned refNo,
                unsigned refs, enum BAMIndexStructureTypes type,
                unsigned binNo, unsigned bins,
                unsigned elements, void *Ctx)
{
    struct LoadIndex1_s *ctx = (struct LoadIndex1_s *)Ctx;
    
    ctx->refs = refs;
    if (refNo != ctx->refNo) {
        ctx->total_interval_count += ctx->intervals;
        ctx->intervals = 0;
        ctx->refNo = refNo;
    }
    if (elements != 0) {
        if (refNo > ctx->self->refSeqs)
            return RC(rcAlign, rcIndex, rcReading, rcData, rcInvalid);
        ctx->intervals = (ctx->self->refSeq[refNo].length + 16383) >> 14;
        if (type == bai_16kIntervals && elements > ctx->intervals)
            return RC(rcAlign, rcIndex, rcReading, rcData, rcExcessive);
    }
    return 0;
}

struct LoadIndex2_s {
    const BAMFile *self;
    BAMFilePosition **refSeq;
    BAMFilePosition *cur;
#if _DEBUGGING
    BAMFilePosition *end;
#endif
    const uint8_t *base;
    unsigned bins[MAX_BIN + 1];
    bool hasData;
};

static
rc_t LoadIndex2a(const uint8_t data[], size_t dlen, unsigned refNo,
                 unsigned refs, enum BAMIndexStructureTypes type,
                 unsigned binNo, unsigned bins,
                 unsigned elements, struct LoadIndex2_s *ctx)
{
    const unsigned max_ival = (ctx->self->refSeq[refNo].length + 16383) >> 14;
    unsigned i;
    unsigned cp;
    unsigned k;
    uint32_t chunk_count;
    uint64_t minOffset[1u << 15];

    assert(ctx->refSeq[refNo] == NULL);
    ctx->refSeq[refNo] = ctx->cur;
    ctx->cur += max_ival;
    
#if _DEBUGGING
    assert(refNo < ctx->self->refSeqs);
    assert(ctx->cur <= ctx->end);
    assert(elements <= max_ival);
#endif
    /* get the positions of the first records in the 16kbp intervals */
    for (cp = i = 0; i != elements; ++i, cp += 8)
        ctx->refSeq[refNo][i] = get_pos(&data[cp]);
    /* get the positions of the first records in the 16kbp bins */
    for (i = MAX_BIN; i != 0; ) {
        const unsigned ival = bin2ival(--i);
        const unsigned n_ival = bin_ival_count(i);
        uint64_t found;
        
        cp = ctx->bins[i];
        if (cp == 0)
            continue;
        if (n_ival > 1)
            break;
        
#if _DEBUGGING
        {{
            uint32_t bin_no;

            memcpy(&bin_no, ctx->base + cp, 4);
            assert(i == LE2HI32(&bin_no));
        }}
#endif
        cp += 4;
        memcpy(&chunk_count, ctx->base + cp, 4); cp += 4;
        chunk_count = LE2HI32(&chunk_count);
        found = ctx->refSeq[refNo][ival];
        for (k = 0; k < chunk_count; ++k) {
            const uint64_t start = get_pos(ctx->base + cp);
            
            cp += 16;
            if (found == 0 || start < found)
                found = start;
        }
        ctx->refSeq[refNo][ival] = found;
    }
    /* The interval list now contains the offsets to the first alignment
     * that starts at or after the interval's starting position.
     * An interval's starting position is 16kpb * interval number.
     *
     * We will now use the information from the bigger bins to find the
     * offsets of the first chunk of alignments that ends after an
     * interval's first alignment.
     */
    memset(minOffset, 0, sizeof(minOffset));
    for (i = 0; i != MAX_BIN; ++i) {
        const unsigned ival = bin2ival(i);
        unsigned n_ival = bin_ival_count(i);
        
        cp = ctx->bins[i];
        if (cp == 0)
            continue;
        if (n_ival <= 1)
            break;
        
        if (ival + n_ival > max_ival)
            n_ival = max_ival - ival;
        
        memcpy(&chunk_count, ctx->base + cp + 4, 4); cp += 8;
        chunk_count = LE2HI32(&chunk_count);
        for (k = 0; k < chunk_count; ++k) {
            const uint64_t start = get_pos(ctx->base + cp);
            const uint64_t end   = get_pos(ctx->base + cp + 8);
            unsigned l;
            
            cp += 16;
            for (l = 0; l != n_ival; ++l) {
                if (start < ctx->refSeq[refNo][ival + l] &&
                    ctx->refSeq[refNo][ival + l] <= end &&
                    (start < minOffset[ival + l] ||
                     minOffset[ival + l] == 0
                     )
                    )
                {
                    minOffset[ival + l] = start;
                }
            }
        }
    }
    /* update the intervals to the new earlier offsets if any */
    for (i = 0; i != max_ival; ++i) {
        if (minOffset[i] != 0)
            ctx->refSeq[refNo][i] = minOffset[i];
    }
    memset(ctx->bins, 0, sizeof(ctx->bins));
    ctx->hasData = false;
    return 0;
}

static
rc_t LoadIndex2(const uint8_t data[], size_t dlen, unsigned refNo,
                unsigned refs, enum BAMIndexStructureTypes type,
                unsigned binNo, unsigned bins,
                unsigned elements, void *Ctx)
{
    struct LoadIndex2_s *ctx = (struct LoadIndex2_s *)Ctx;
    
    if (type == bai_StartStopPairs) {
        if (binNo < MAX_BIN && elements != 0) {
            ctx->bins[binNo] = &data[-8] - ctx->base;
            ctx->hasData = true;
        }
    }
    else if (elements != 0 || ctx->hasData)
        return LoadIndex2a(data, dlen, refNo, refs, type, binNo, bins,
                           elements, (struct LoadIndex2_s *)Ctx);
    return 0;
}    

static
rc_t LoadIndex(BAMFile *self, const uint8_t buf[], size_t blen)
{
    BAMIndex *idx;
    rc_t rc;
    struct LoadIndex1_s loadIndex1ctx;
    unsigned const posArray = ((uintptr_t)&((const BAMFilePosition **)(NULL))[self->refSeqs]) / sizeof(BAMFilePosition *);

    memset(&loadIndex1ctx, 0, sizeof(loadIndex1ctx));
    loadIndex1ctx.refNo = -1;
    loadIndex1ctx.self = self;
    
    rc = WalkIndexStructure(buf, blen, LoadIndex1, &loadIndex1ctx);
    if (rc == 0) {
        loadIndex1ctx.total_interval_count += loadIndex1ctx.intervals;
        idx = calloc(1, posArray * sizeof(BAMFilePosition *) +
                     loadIndex1ctx.total_interval_count * sizeof(BAMFilePosition));
        if (idx == NULL)
            rc = RC(rcAlign, rcIndex, rcReading, rcMemory, rcExhausted);
        else {
            struct LoadIndex2_s *loadIndex2ctx;
            
            if (self->ndx)
                BAMIndexWhack(self->ndx);
            self->ndx = idx;
            
            loadIndex2ctx = malloc(sizeof(*loadIndex2ctx));
            if (loadIndex2ctx == NULL) {
                rc = RC(rcAlign, rcIndex, rcReading, rcMemory, rcExhausted);
                free(idx);
                self->ndx = NULL;
            }
            else {
                memset(loadIndex2ctx->bins, 0, sizeof(loadIndex2ctx->bins));
                loadIndex2ctx->self = self;
                loadIndex2ctx->refSeq = &idx->refSeq[0];
                loadIndex2ctx->base = buf;
                loadIndex2ctx->hasData = false;
                loadIndex2ctx->cur = (BAMFilePosition *)&idx->refSeq[posArray];
#if _DEBUGGING
                loadIndex2ctx->end = loadIndex2ctx->cur + loadIndex1ctx.total_interval_count;
#endif
                
                WalkIndexStructure(buf, blen, LoadIndex2, loadIndex2ctx);
                free(loadIndex2ctx);
            }
        }
    }
    return rc;
}

static
rc_t BAMFileOpenIndexInternal(const BAMFile *self, const char *path)
{
    const KFile *kf;
    rc_t rc;
    size_t fsize;
    uint8_t *buf;
    KDirectory *dir;
    
    rc = KDirectoryNativeDir(&dir);
    if (rc) return rc;
    rc = KDirectoryOpenFileRead(dir, &kf, path);
    KDirectoryRelease(dir);
    if (rc) return rc;
    {
        uint64_t u64;

        rc = KFileSize(kf, &u64);
        if (sizeof(size_t) < sizeof(u64) && (size_t)u64 != u64) {
            KFileRelease(kf);
            return RC(rcAlign, rcIndex, rcReading, rcData, rcExcessive);
        }
        fsize = u64;
    }
    if (rc == 0) {
        buf = malloc(fsize);
        if (buf != NULL) {
            size_t read;
            
            rc = KFileRead(kf, 0, buf, fsize, &read);
            KFileRelease(kf);
            if (rc == 0) {
                if (read == fsize) {
                    rc = LoadIndex((BAMFile *)self, buf, read);
                    free(buf);
                    return rc;
                }
                rc = RC(rcAlign, rcIndex, rcReading, rcData, rcInvalid);
            }
            free(buf);
        }
        else
            rc = RC(rcAlign, rcIndex, rcReading, rcMemory, rcExhausted);
    }
    return rc;
}

LIB_EXPORT rc_t CC BAMFileOpenIndex(const BAMFile *self, const char *path)
{
    return BAMFileOpenIndexInternal(self, path);
}

LIB_EXPORT rc_t CC BAMFileOpenIndexWithKPath(const BAMFile *self, const KPath *kpath)
{
    char path[4096];
    size_t nread;
    rc_t rc = KPathReadPath(kpath, path, sizeof(path), &nread);

    if (rc == 0) {
        path[nread] = '\0';
        rc = BAMFileOpenIndexInternal(self, path);
    }
    return rc;
}

LIB_EXPORT bool CC BAMFileIsIndexed(const BAMFile *self)
{
	if (self && self->ndx)
		return true;
	return false;
}

LIB_EXPORT bool CC BAMFileIndexHasRefSeqId(const BAMFile *self, uint32_t refSeqId)
{
	if (self && self->ndx && self->ndx->refSeq[refSeqId])
		return true;
	return false;
}

static rc_t BAMFileGetAlignPos(const BAMFile *self, int64_t *beg, int64_t *end, int32_t *refSeq)
{
    const BAMAlignment *check;
    rc_t rc;
    
    rc = BAMFileRead(self, &check);
    if (rc)
        return rc;
    BAMAlignmentGetPosition(check, beg);
    BAMAlignmentGetRefSeqId(check, refSeq);
    *end = *beg + ReferenceLengthFromCIGAR(check);
    BAMAlignmentRelease(check);
    return rc;
}

LIB_EXPORT rc_t CC BAMFileSeek(const BAMFile *self, uint32_t refSeqId, uint64_t alignStart, uint64_t alignEnd)
{
    BAMFilePosition rpos = 0;
    rc_t rc;
    int64_t prev_alignPos;
    int64_t alignPos;
    int64_t alignEndPos;
    int32_t refSeq;
    
    if (self->ndx == NULL)
        return RC(rcAlign, rcFile, rcPositioning, rcIndex, rcNotFound);
    if (refSeqId >= self->refSeqs)
        return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
    if (self->ndx->refSeq[refSeqId] == NULL)
        return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
    if (alignStart >= self->refSeq[refSeqId].length)
        return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
    if (alignEnd > self->refSeq[refSeqId].length)
        alignEnd = self->refSeq[refSeqId].length;
    
    {{
        unsigned adjust = 0;
        uint32_t ival_start = (uint32_t)(alignStart >> 14);
        uint32_t ival_end = (uint32_t)((alignEnd + 16383) >> 14);
        
        /* find the first interval >= alignStart that has an alignment */
        while (ival_start != ival_end && (rpos = self->ndx->refSeq[refSeqId][ival_start]) == 0)
            ++ival_start;
        
        if (rpos == 0)
            return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
        do {
            rc = BAMFileSetPosition(self, &rpos);
            if (rc == 0)
                rc = BAMFileGetAlignPos(self, &alignPos, &alignEndPos, &refSeq);
            if (rc)
                return RC(rcAlign, rcFile, rcPositioning, rcIndex, rcInvalid);
            if (refSeq != refSeqId)
                return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
            if (alignPos <= alignEnd)
                break; /* we found the interval we were looking for */
            
            /* we over-shot */
            if (++adjust >= ival_start)
                return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
            if ((rpos = self->ndx->refSeq[refSeqId][ival_start - adjust]) == 0)
                return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
        } while (1);
    }}
    prev_alignPos = alignPos;
    
    do {
        if (alignPos > alignEnd)
            return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
        
        /* if the alignment overlaps the target range then we are done */
        if (alignPos >= alignStart || alignEndPos >= alignStart)
            return BAMFileSetPosition(self, &rpos);
        
        /* start linear scan */
        rc = BAMFileGetPosition(self, &rpos);
        if (rc)
            return rc;
        rc = BAMFileGetAlignPos(self, &alignPos, &alignEndPos, &refSeq);
        if (rc) return rc;
        if (refSeq != refSeqId)
            return RC(rcAlign, rcFile, rcPositioning, rcData, rcNotFound);
        
        /*  indexed BAM must be sorted by position
         *  so verify that we are not out of order
         *  whether this means that the index is bad
         *  or the file is bad, likely both
         *  fix the file and regenerate the index
         */
        if (prev_alignPos > alignPos)
            return RC(rcAlign, rcFile, rcPositioning, rcData, rcInvalid);
        prev_alignPos = alignPos;
    } while (1);
}

static rc_t BAMIndexWhack(const BAMIndex *cself) {
    free((void *)cself);
    return 0;
}

/* MARK: BAM Validation Stuff */

static rc_t OpenKPathRead(const KFile **fp, struct KPath const *path)
{
    char buffer[4096];
    size_t blen;
    rc_t rc = KPathReadPath(path, buffer, sizeof(buffer), &blen);
    
    if (rc == 0) {
        KDirectory *dir;
        
        rc = KDirectoryNativeDir(&dir);
        if (rc == 0) {
            rc = KDirectoryOpenFileRead(dir, fp, "%.*s", (int)blen, buffer);
            KDirectoryRelease(dir);
        }
    }
    return rc;
}

static rc_t ReadKPath(void **data, size_t *dsize, struct KPath const *path)
{
    const KFile *fp;
    rc_t rc = OpenKPathRead(&fp, path);
    
    if (rc == 0) {
        uint8_t *buff;
        uint64_t fsz;
        size_t bsz = 0;
        
        rc = KFileSize(fp, &fsz);
        if (rc == 0) {
            if ((size_t)fsz != fsz)
                return RC(rcAlign, rcFile, rcReading, rcFile, rcTooBig);
            buff = malloc(fsz);
            if (buff == NULL)
                return RC(rcAlign, rcFile, rcReading, rcMemory, rcExhausted);
            do {
                size_t nread;
                
                rc = KFileRead(fp, 0, buff + bsz, fsz - bsz, &nread);
                if (rc)
                    break;
                bsz += nread;
            } while (bsz < (size_t)fsz);
            if (rc == 0) {
                *data = buff;
                *dsize = bsz;
                return 0;
            }
            free(buff);
        }
    }
    return rc;
}

static rc_t KPath2BGZF(BGZFile *bgzf, struct KPath const *path)
{
    const KFile *fp;
    rc_t rc = OpenKPathRead(&fp, path);
    
    if (rc == 0) {
        rc = BGZFileInit(bgzf, fp);
        KFileRelease(fp);
    }
    return rc;
}

struct index_data {
    uint64_t position;
    unsigned refNo;
    unsigned binNo;
    bool found;
};

struct buffer_data {
    uint64_t position;
    size_t size;
};

typedef struct BAMValidate_ctx_s BAMValidate_ctx_t;
struct BAMValidate_ctx_s {
    BAMValidateCallback callback;
    void *ctx;
    BAMValidateStats *stats;
    const uint8_t *bai;
    int32_t *refLen;
    struct index_data *position;
    uint8_t *buf;
    uint8_t *nxt;
    size_t bsize;
    size_t alloced;
    size_t dnext;
    uint32_t options;
    uint32_t lastRefId;
    uint32_t lastRefPos;
    unsigned npositions;
    unsigned mpositions;
    unsigned nrefs;
    bool cancelled;
};

static
rc_t IndexValidateStructure(const uint8_t data[], size_t dlen,
                            unsigned refNo,
                            unsigned refs,
                            enum BAMIndexStructureTypes type,
                            unsigned binNo,
                            unsigned bins,
                            unsigned elements,
                            void *Ctx)
{
    BAMValidate_ctx_t *ctx = Ctx;
    rc_t rc = 0;
    
    ctx->stats->baiFilePosition = data - ctx->bai;
    rc = ctx->callback(ctx->ctx, 0, ctx->stats);
    if (rc)
        ctx->cancelled = true;
    return rc;
}

static int CC comp_index_data(const void *A, const void *B, void *ignored)
{
    const struct index_data *a = A;
    const struct index_data *b = B;
    
    if (a->position < b->position)
        return -1;
    else if (a->position > b->position)
        return 1;
    else
        return 0;
}

static
rc_t BAMValidateLoadIndex(const uint8_t data[], size_t dlen,
                          unsigned refNo,
                          unsigned refs,
                          enum BAMIndexStructureTypes type,
                          unsigned binNo,
                          unsigned bins,
                          unsigned elements,
                          void *Ctx)
{
    BAMValidate_ctx_t *ctx = Ctx;
    unsigned const n = type == bai_16kIntervals ? elements : elements * 2;
    unsigned i;
    unsigned j;
    
    if (type == bai_StartStopPairs && binNo >= MAX_BIN)
        return 0;
    
    if (ctx->npositions + elements > ctx->mpositions) {
        void *temp;
        
        do { ctx->mpositions <<= 1; } while (ctx->npositions + elements > ctx->mpositions);
        temp = realloc(ctx->position, ctx->mpositions * sizeof(ctx->position[0]));
        if (temp == NULL)
            return RC(rcAlign, rcIndex, rcReading, rcMemory, rcExhausted);
        ctx->position = temp;
    }
    for (j = i = 0; i != n; ++i) {
        uint64_t const pos = get_pos(&data[i * 8]);
        
        if (type == bai_StartStopPairs && (i & 1) != 0)
            continue;
        
        if (pos) {
            ctx->position[ctx->npositions + j].refNo = refNo;
            ctx->position[ctx->npositions + j].binNo = binNo;
            ctx->position[ctx->npositions + j].position = pos;
            ++j;
        }
    }
    ctx->npositions += j;
    return 0;
}

static
rc_t BAMValidateHeader(const uint8_t data[],
                       unsigned dsize,
                       unsigned *header_len,
                       unsigned *refs_start,
                       unsigned *nrefs,
                       unsigned *data_start
                       )
{
    int32_t hlen;
    int32_t refs;
    unsigned i;
    unsigned cp;
    
    if (dsize < 8)
        return RC(rcAlign, rcFile, rcValidating, rcData, rcIncomplete);
    
    if (memcmp(data, "BAM\1", 4) != 0)
        return RC(rcAlign, rcFile, rcValidating, rcFormat, rcUnrecognized);
    
    memcpy(&hlen, &data[4], 4);
    hlen = LE2HI32(&hlen);
    if (hlen < 0)
        return RC(rcAlign, rcFile, rcValidating, rcData, rcInvalid);
    
    if (dsize < hlen + 12)
        return RC(rcAlign, rcFile, rcValidating, rcData, rcIncomplete);
    
    memcpy(&refs, &data[hlen + 8], 4);
    refs = LE2HI32(&refs);
    if (refs < 0)
        return RC(rcAlign, rcFile, rcValidating, rcData, rcInvalid);
    
    for (cp = hlen + 12, i = 0; i != refs; ++i) {
        int32_t nlen;
        
        if (dsize < cp + 4)
            return RC(rcAlign, rcFile, rcValidating, rcData, rcIncomplete);
        
        memcpy(&nlen, &data[cp], 4);
        nlen = LE2HI32(&nlen);
        if (nlen < 0)
            return RC(rcAlign, rcFile, rcValidating, rcData, rcInvalid);
        
        if (dsize < cp + nlen + 4)
            return RC(rcAlign, rcFile, rcValidating, rcData, rcIncomplete);
        
        cp += nlen + 4;
    }
    
    *nrefs = refs;
    *refs_start = 12 + (*header_len = hlen);
    *data_start = cp;
    return 0;
}

static rc_t BAMValidateIndex(struct KPath const *bampath,
                             struct KPath const *baipath,
                             BAMValidateOption options,
                             BAMValidateCallback callback,
                             void *callbackContext
                             )
{
    rc_t rc = 0;
    BGZFile bam;
    uint8_t *bai = NULL;
    size_t bai_size;
    BAMValidateStats stats;
    BAMValidate_ctx_t ctx;
    uint8_t data[2 * ZLIB_BLOCK_SIZE];
    uint32_t dsize = 0;
    uint64_t pos = 0;
    uint32_t temp;
    int32_t ref = -1;
    int32_t rpos = -1;
    
    if ((options & bvo_IndexOptions) == 0)
        return callback(callbackContext, 0, &stats);

    rc = ReadKPath((void **)&bai, &bai_size, baipath);
    if (rc)
        return rc;
    
    memset(&stats, 0, sizeof(stats));
    memset(&ctx, 0, sizeof(ctx));
    
    ctx.bai = bai;
    ctx.stats = &stats;
    ctx.options = options;
    ctx.ctx = callbackContext;
    ctx.callback = callback;
    
    if ((options & bvo_IndexOptions) == bvo_IndexStructure)
        return WalkIndexStructure(bai, bai_size, IndexValidateStructure, &ctx);

    rc = KPath2BGZF(&bam, bampath);
    if (rc == 0) {
        ctx.mpositions = 1024 * 32;
        ctx.position = malloc(ctx.mpositions * sizeof(ctx.position[0]));
        if (ctx.position == NULL)
            return RC(rcAlign, rcIndex, rcReading, rcMemory, rcExhausted);
        
        rc = WalkIndexStructure(bai, bai_size, BAMValidateLoadIndex, &ctx);
        free(bai);
        if (rc) {
            stats.indexStructureIsBad = true;
            rc = callback(callbackContext, rc, &stats);
        }
        else {
            unsigned i = 0;
            
            stats.indexStructureIsGood = true;
            stats.baiFileSize = ctx.npositions;
            
            ksort(ctx.position, ctx.npositions, sizeof(ctx.position[0]), comp_index_data, 0);
            
            stats.bamFileSize = bam.fsize;
            
            while (i < ctx.npositions) {
                uint64_t const ifpos = ctx.position[i].position >> 16;
                uint16_t const bpos = (uint16_t)ctx.position[i].position;
                
                stats.baiFilePosition = i;
                if (i == 0 || ifpos != pos) {
                    stats.bamFilePosition = pos = ifpos;
                    rc = BGZFileSetPos(&bam, pos);
                    if (rc == 0)
                        rc = BGZFileRead(&bam, data, &dsize);
                    if (rc) {
                        ++stats.indexFileOffset.error;
                        do {
                            ++i;
                            if (i == ctx.npositions)
                                break;
                            if (ctx.position[i].position >> 16 != pos)
                                break;
                            ++stats.indexFileOffset.error;
                        } while (1);
                    }
                    else
                        ++stats.indexFileOffset.good;

                    rc = callback(callbackContext, rc, &stats);
                    if (rc)
                        break;
                }
                else
                    ++stats.indexFileOffset.good;
                if ((options & bvo_IndexOptions) > bvo_IndexOffsets1) {
                    int32_t rsize = 0;
                    BAMAlignment algn;
                    
                    if (bpos >= dsize)
                        goto BAD_BLOCK_OFFSET;
                    if (dsize - bpos < 4) {
                    READ_MORE:
                        if (dsize > ZLIB_BLOCK_SIZE)
                            goto BAD_BLOCK_OFFSET;

                        rc = BGZFileRead(&bam, data + dsize, &temp);
                        if (rc) {
                            ++stats.blockCompression.error;
                            goto BAD_BLOCK_OFFSET;
                        }
                        dsize += temp;
                        if (dsize - bpos < 4 || dsize - bpos < rsize)
                            goto BAD_BLOCK_OFFSET;
                    }
                    memcpy(&temp, data + bpos, 4);
                    rsize = LE2HI32(&temp);
                    if (rsize <= 0)
                        goto BAD_BLOCK_OFFSET;
                    if (rsize > 0xFFFF) {
                        ++stats.indexBlockOffset.warning;
                        ++i;
                        continue;
                    }
                    if (dsize - bpos < rsize)
                        goto READ_MORE;
                    rc = BAMAlignmentParse(&algn, data + bpos + 4, rsize);
                    if (rc)
                        goto BAD_BLOCK_OFFSET;
                    ++stats.indexBlockOffset.good;
                    if ((options & bvo_IndexOptions) > bvo_IndexOffsets2) {
                        int32_t const refSeqId = getRefSeqId(&algn);
                        uint16_t const binNo = getBin(&algn);
                        int32_t const position = getPosition(&algn);
                        
                        if (ctx.position[i].refNo == refSeqId &&
                            (ctx.position[i].binNo == binNo ||
                             ctx.position[i].binNo == ~((unsigned)0)
                        ))
                            ++stats.indexBin.good;
                        else if (ctx.position[i].refNo == refSeqId)
                            ++stats.indexBin.warning;
                        else
                            ++stats.indexBin.error;
                        
                        if (refSeqId < ref || position < rpos)
                            ++stats.inOrder.error;
                        
                        ref = refSeqId;
                        rpos = position;
                    }
                }
                if (0) {
                BAD_BLOCK_OFFSET:
                    ++stats.indexBlockOffset.error;
                }
                ++i;
            }
        }
        
        free(ctx.position);
        BGZFileWhack(&bam);
    }
    stats.bamFilePosition = stats.bamFileSize;
    return callback(callbackContext, rc, &stats);
}

static rc_t BAMValidate3(BAMValidate_ctx_t *ctx,
                         BAMAlignment const *algn
                         )
{
    rc_t rc = 0;
    uint16_t const flags = getFlags(algn);
    int32_t const refSeqId = getRefSeqId(algn);
    int32_t const refPos = getPosition(algn);
    unsigned const mapQ = getMapQual(algn);
    bool const aligned =
        ((flags & BAMFlags_SelfIsUnmapped) == 0) && 
        (refSeqId >= 0) && (refSeqId < ctx->nrefs) &&
        (refPos >= 0) && (refPos < ctx->refLen[refSeqId]) && (mapQ > 0);
    
    if (ctx->options & bvo_ExtraFields) {
    }
    if (aligned) {
        if ((ctx->options & bvo_Sorted) != 0) {
            if (ctx->lastRefId < refSeqId || (ctx->lastRefId == refSeqId && ctx->lastRefPos <= refPos))
                ++ctx->stats->inOrder.good;
            else
                ++ctx->stats->inOrder.error;
            ctx->lastRefId = refSeqId;
            ctx->lastRefPos = refPos;
        }
        if (ctx->options & bvo_CIGARConsistency) {
        }
        if (ctx->options & bvo_BinConsistency) {
        }
    }
    if (ctx->options & bvo_FlagsConsistency) {
    }
    if (ctx->options & bvo_QualityValues) {
    }
    if (ctx->options & bvo_MissingSequence) {
    }
    if (ctx->options & bvo_MissingQuality) {
    }
    if (ctx->options & bvo_FlagsStats) {
    }
    return rc;
}

static rc_t BAMValidate2(void *Ctx, const BGZFile *file,
                         rc_t rc, uint64_t fpos,
                         const zlib_block_t data, unsigned dsize)
{
    BAMValidate_ctx_t *ctx = Ctx;
    rc_t rc2;
    bool fatal = false;
    
    ctx->stats->bamFilePosition = fpos;
    if (rc) {
        if (ctx->options == bvo_BlockHeaders)
            ++ctx->stats->blockHeaders.error;
        else
            ++ctx->stats->blockCompression.error;
    }
    else if (ctx->options == bvo_BlockHeaders) {
        ++ctx->stats->blockHeaders.good;
    }
    else if (ctx->options == bvo_BlockCompression) {
        ++ctx->stats->blockHeaders.good;
        ++ctx->stats->blockCompression.good;
    }
    else if (dsize) {
        ctx->bsize += dsize;
        if (!ctx->stats->bamHeaderIsBad && !ctx->stats->bamHeaderIsGood) {
            unsigned header_len;
            unsigned refs_start;
            unsigned nrefs;
            unsigned data_start;
            
            rc2 = BAMValidateHeader(ctx->buf, ctx->bsize,
                                       &header_len, &refs_start,
                                       &nrefs, &data_start);
            
            if (rc2 == 0) {
                ctx->stats->bamHeaderIsGood = true;
                if (ctx->options & bvo_BinConsistency) {
                    ctx->refLen = malloc(nrefs * sizeof(ctx->refLen[0]));
                    if (ctx->refLen == NULL) {
                        rc = RC(rcAlign, rcFile, rcValidating, rcMemory, rcExhausted);
                        fatal = true;
                    }
                    else {
                        unsigned cp;
                        unsigned i;
                        
                        ctx->nrefs = nrefs;
                        for (i = 0, cp = refs_start; cp != data_start; ++i) {
                            int32_t len;
                            
                            memcpy(&len, &ctx->buf[cp], 4);
                            memcpy(&ctx->refLen[i], &ctx->buf[cp + 4 + len], 4);
                            cp += len + 8;
                        }
                    }
                }
                ctx->dnext = data_start;
            }
            else if (GetRCState(rc2) != rcIncomplete || GetRCObject(rc2) != rcData) {
                ctx->stats->bamHeaderIsBad = true;
                ctx->options = bvo_BlockCompression;
                rc = rc2;
            }
            else
                ctx->dnext = ctx->bsize;
        }
        if (rc == 0) {
            if (ctx->stats->bamHeaderIsGood) {
                unsigned cp = ctx->dnext;
                
                while (cp + 4 < ctx->bsize) {
                    int32_t rsize;
                    
                    memcpy(&rsize, &ctx->buf[cp], 4);
                    rsize = LE2HI32(&rsize);
                    if (rsize < 0) {
                        ++ctx->stats->blockStructure.error;
                        ctx->options = bvo_BlockStructure;
                        
                        /* throw away the rest of the current buffer */
                        if (cp >= ctx->bsize - dsize)
                            cp = ctx->bsize;
                        else
                            cp = ctx->bsize - dsize;
                        
                        rc = RC(rcAlign, rcFile, rcValidating, rcData, rcInvalid);
                        break;
                    }
                    else if (cp + 4 + rsize < ctx->bsize) {
                        if (rsize > UINT16_MAX)
                            ++ctx->stats->blockStructure.warning;
                        else
                            ++ctx->stats->blockStructure.good;
                        if (ctx->options > bvo_BlockStructure) {
                            BAMAlignment algn;
                            
                            rc = BAMAlignmentParse(&algn, &ctx->buf[cp + 4], rsize);
                            if (rc == 0) {
                                ++ctx->stats->recordStructure.good;
                                if (ctx->options > bvo_RecordStructure)
                                    rc = BAMValidate3(ctx, &algn);
                            }
                            else
                                ++ctx->stats->recordStructure.error;
                        }
                        cp += 4 + rsize;
                    }
                    else
                        break;
                }
                if (&ctx->buf[cp] >= data) {
                    if (cp < ctx->bsize) {
                        ctx->bsize -= cp;
                        memmove(ctx->buf, &ctx->buf[cp], ctx->bsize);
                        cp = ctx->bsize;
                    }
                    else {
                        assert(cp == ctx->bsize);
                        cp = ctx->bsize = 0;
                    }
                }
                ctx->dnext = cp;
            }
            if (ctx->alloced < ctx->bsize + ZLIB_BLOCK_SIZE) {
                void *temp;
                
                temp = realloc(ctx->buf, ctx->alloced + ZLIB_BLOCK_SIZE);
                if (temp == NULL) {
                    rc = RC(rcAlign, rcFile, rcValidating, rcMemory, rcExhausted);
                    fatal = true;
                }
                else {
                    ctx->buf = temp;
                    ctx->alloced += ZLIB_BLOCK_SIZE;
                }
            }
            ctx->nxt = &ctx->buf[ctx->dnext];
        }
    }
    rc2 = ctx->callback(ctx->ctx, rc, ctx->stats);
    ctx->cancelled |= rc2 != 0;
    return fatal ? rc : rc2;
}

static rc_t BAMValidateBAM(struct KPath const *bampath,
                           BAMValidateOption options,
                           BAMValidateCallback callback,
                           void *callbackContext
                           )
{
    rc_t rc;
    BGZFile bam;
    BAMValidate_ctx_t ctx;
    BAMValidateStats stats;

    if (bampath == NULL)
        return RC(rcAlign, rcFile, rcValidating, rcParam, rcNull);
    
    memset(&ctx, 0, sizeof(ctx));
    memset(&stats, 0, sizeof(stats));
    
    ctx.callback = callback;
    ctx.ctx = callbackContext;
    ctx.options = options;
    ctx.stats = &stats;
    
    if (options > bvo_BlockCompression) {
        ctx.alloced = ZLIB_BLOCK_SIZE * 2;
        ctx.nxt = ctx.buf = malloc(ctx.alloced);
        
        if (ctx.buf == NULL)
            return RC(rcAlign, rcFile, rcValidating, rcMemory, rcExhausted);
    }
    
    if (options > bvo_RecordStructure)
        options = bvo_RecordStructure | (options & 0xFFF0);
    
    rc = KPath2BGZF(&bam, bampath);
    if (rc == 0) {
        stats.bamFileSize = bam.fsize;
        if ((options & 7) > bvo_BlockHeaders)
            rc = BGZFileWalkBlocks(&bam, true, (zlib_block_t *)&ctx.nxt, BAMValidate2, &ctx);
        else
            rc = BGZFileWalkBlocks(&bam, false, NULL, BAMValidate2, &ctx);
    }
    BGZFileWhack(&bam);
    return rc;
}

static rc_t CC dummy_cb(void *ctx, rc_t result, const BAMValidateStats *stats)
{
    return 0;
}

LIB_EXPORT rc_t CC BAMValidate(struct KPath const *bampath,
                               struct KPath const *baipath,
                               BAMValidateOption options,
                               BAMValidateCallback callback,
                               void *callbackContext
                               )
{
    if (callback == NULL)
        callback = dummy_cb;
    if (bampath == NULL)
        return RC(rcAlign, rcFile, rcValidating, rcParam, rcNull);
    if (baipath == NULL) {
        if (options & bvo_IndexOptions)
            return RC(rcAlign, rcFile, rcValidating, rcParam, rcNull);
        return BAMValidateBAM(bampath, options, callback, callbackContext);
    }
    return BAMValidateIndex(bampath, baipath, options, callback, callbackContext);
}
