Skip to content

Commit

Permalink
Complete testing for num_records
Browse files Browse the repository at this point in the history
Remove return value check, as it wasn't diagnostic
  • Loading branch information
jeromekelleher authored and brentp committed Jan 19, 2024
1 parent 5c81a7a commit 8b5e1c0
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 15 deletions.
20 changes: 15 additions & 5 deletions cyvcf2/cyvcf2.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -698,19 +698,29 @@ cdef class VCF(HTSFile):
assert self.hidx == NULL

if idx == NULL:
raise ValueError("File must be indexed to compute num_records")
raise ValueError(
"File must be indexed to compute num_records (tip: use bcftools index)")

nseq = hts_idx_nseq(idx)
total = 0;
for tid in range(nseq):
ret = hts_idx_get_stat(idx, tid, &records, &v);
if ret < 0:
raise Exception("Error in `htslib::hts_idx_get_stat` ret: %d" % (ret))
# NOTE: the return value here doesn't seem to indicate an error
# condition, and correct values are computed when it returns < 0.
# bcftools index -n doesn't strictly check the output.
hts_idx_get_stat(idx, tid, &records, &v);
total += records
return total

property num_records:
"Number of records in the file derived from the index"
"""
The number of VCF records in the file, computed from the index.
If the file is not indexed (or an index has not been set using
``set_index``) a ValueError is raised.
Note that incorrect values may be returned if a mismatched
index file (i.e., the index for a different VCF file) is used.
This is not detected as an error condition.
"""
def __get__(self):
return self._num_records()

Expand Down
20 changes: 10 additions & 10 deletions cyvcf2/tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,7 @@ def test_issue17_no_gt():
"test-strict-gt-option-flag.vcf.gz",
"multi-contig.vcf.gz",
"multi-contig.bcf",
"test.snpeff.bcf",
])
def test_num_records_indexed(path):
vcf = VCF(os.path.join(HERE, path))
Expand Down Expand Up @@ -1392,6 +1393,15 @@ def test_num_records_set_index_multiple_times():
vcf.set_index(tbi_index)
assert n == vcf.num_records

def test_num_records_set_wrong_index():
path = os.path.join(HERE, "multi-contig.vcf.gz")
index = os.path.join(HERE, "test.vcf.gz.tbi")
vcf = VCF(path)
vcf.set_index(index)
# We compute the number of records from the index, and don't report an
# error
assert vcf.num_records == 115
assert vcf.num_records != len(list(vcf))

@pytest.mark.parametrize("path", [
"test-genotypes.vcf",
Expand All @@ -1400,13 +1410,3 @@ def test_num_records_no_index(path):
vcf = VCF(os.path.join(HERE, path))
with pytest.raises(ValueError, match="must be indexed"):
vcf.num_records

def test_num_records_incompatible_index():
b = VCF('{}/test.snpeff.bcf'.format(HERE))
with pytest.raises(Exception, match="hts_idx_get_stat"):
b.num_records
assert len(list(b)) == 10

b.set_index("{}/test-diff.csi".format(HERE))
print(b.num_records)
# Not sure why this is giving an error - is it an old file?

0 comments on commit 8b5e1c0

Please sign in to comment.