19 Create count-matrix from multiple PyRanges
If you have multiple pyranges you want to create a count-matrix from, you can
use count_overlaps(grs, features=None, how=None, nb_cpu=1, strandedness=None)
.
grs is a dictionary of pyranges, features is the pyrange you want to count
overlaps in. If no features-arguments is provided, one is created from the grs.
from io import StringIO
import pyranges as pr
import pandas as pd
= """Chromosome Start End
a chr1 6 12
chr1 10 20
chr1 22 27
chr1 24 30"""
= """Chromosome Start End
b chr1 12 32
chr1 14 30"""
= """Chromosome Start End
c chr1 8 15
chr1 10 14
chr1 32 34"""
= [pr.PyRanges(pd.read_table(StringIO(x), sep="\s+")) for x in [a, b, c]]
grs = {k: v for k, v in zip("abc", grs)}
grs = pr.concat(grs.values()).split()
features print(features)
## +--------------+-----------+-----------+
## | Chromosome | Start | End |
## | (object) | (int32) | (int32) |
## |--------------+-----------+-----------|
## | chr1 | 6 | 8 |
## | chr1 | 8 | 10 |
## | chr1 | 10 | 12 |
## | chr1 | 12 | 14 |
## | ... | ... | ... |
## | chr1 | 24 | 27 |
## | chr1 | 27 | 30 |
## | chr1 | 30 | 32 |
## | chr1 | 32 | 34 |
## +--------------+-----------+-----------+
## Unstranded PyRanges object has 12 rows and 3 columns from 1 chromosomes.
## For printing, the PyRanges was sorted on Chromosome.
print(pr.count_overlaps(grs, features))
## +--------------+-----------+-----------+-----------+-----------+-----------+
## | Chromosome | Start | End | a | b | c |
## | (object) | (int32) | (int32) | (int32) | (int32) | (int32) |
## |--------------+-----------+-----------+-----------+-----------+-----------|
## | chr1 | 6 | 8 | 1 | 0 | 0 |
## | chr1 | 8 | 10 | 1 | 0 | 1 |
## | chr1 | 10 | 12 | 2 | 0 | 2 |
## | chr1 | 12 | 14 | 1 | 1 | 2 |
## | ... | ... | ... | ... | ... | ... |
## | chr1 | 24 | 27 | 2 | 2 | 0 |
## | chr1 | 27 | 30 | 1 | 2 | 0 |
## | chr1 | 30 | 32 | 0 | 1 | 0 |
## | chr1 | 32 | 34 | 0 | 0 | 1 |
## +--------------+-----------+-----------+-----------+-----------+-----------+
## Unstranded PyRanges object has 12 rows and 6 columns from 1 chromosomes.
## For printing, the PyRanges was sorted on Chromosome.
print(pr.count_overlaps(grs))
## +--------------+-----------+-----------+-----------+-----------+-----------+
## | Chromosome | Start | End | a | b | c |
## | (object) | (int32) | (int32) | (int32) | (int32) | (int32) |
## |--------------+-----------+-----------+-----------+-----------+-----------|
## | chr1 | 6 | 8 | 1 | 0 | 0 |
## | chr1 | 8 | 10 | 1 | 0 | 1 |
## | chr1 | 10 | 12 | 2 | 0 | 2 |
## | chr1 | 12 | 14 | 1 | 1 | 2 |
## | ... | ... | ... | ... | ... | ... |
## | chr1 | 24 | 27 | 2 | 2 | 0 |
## | chr1 | 27 | 30 | 1 | 2 | 0 |
## | chr1 | 30 | 32 | 0 | 1 | 0 |
## | chr1 | 32 | 34 | 0 | 0 | 1 |
## +--------------+-----------+-----------+-----------+-----------+-----------+
## Unstranded PyRanges object has 12 rows and 6 columns from 1 chromosomes.
## For printing, the PyRanges was sorted on Chromosome.