9 Finding the closest intervals
With the nearest-method, you can search for the feature in other that is nearest the ones in self.
import pyranges as pr
gr = pr.load_dataset("chipseq")
gr2 = pr.load_dataset("chipseq_background")
print(gr.nearest(gr2, suffix="_Input"))
## +--------------+-----------+-----------+------------+-----------+--------------+---------------+-------------+--------------+---------------+----------------+------------+
## | Chromosome | Start | End | Name | Score | Strand | Start_Input | End_Input | Name_Input | Score_Input | Strand_Input | Distance |
## | (category) | (int64) | (int64) | (object) | (int64) | (category) | (int64) | (int64) | (object) | (int64) | (category) | (int64) |
## |--------------+-----------+-----------+------------+-----------+--------------+---------------+-------------+--------------+---------------+----------------+------------|
## | chr1 | 226987592 | 226987617 | U0 | 0 | + | 226987603 | 226987628 | U0 | 0 | - | 0 |
## | chr15 | 26105515 | 26105540 | U0 | 0 | + | 26105493 | 26105518 | U0 | 0 | + | 0 |
## | chr8 | 38747226 | 38747251 | U0 | 0 | - | 38747236 | 38747261 | U0 | 0 | + | 0 |
## | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
## | chrY | 8316773 | 8316798 | U0 | 0 | + | 20557165 | 20557190 | U0 | 0 | + | 2312314 |
## | chrY | 7463444 | 7463469 | U0 | 0 | + | 20557165 | 20557190 | U0 | 0 | + | 3165643 |
## | chrY | 7405376 | 7405401 | U0 | 0 | - | 20557165 | 20557190 | U0 | 0 | + | 3223711 |
## +--------------+-----------+-----------+------------+-----------+--------------+---------------+-------------+--------------+---------------+----------------+------------+
## PyRanges object has 10000 sequences from 24 chromosomes.
The nearest method takes a strandedness option, which can either be "same"
, "opposite"
or False
/None
print(gr.nearest(gr2, suffix="_Input", strandedness="opposite"))
## +--------------+-----------+-----------+------------+-----------+--------------+---------------+-------------+--------------+---------------+----------------+------------+
## | Chromosome | Start | End | Name | Score | Strand | Start_Input | End_Input | Name_Input | Score_Input | Strand_Input | Distance |
## | (category) | (int64) | (int64) | (object) | (int64) | (category) | (int64) | (int64) | (object) | (int64) | (category) | (int64) |
## |--------------+-----------+-----------+------------+-----------+--------------+---------------+-------------+--------------+---------------+----------------+------------|
## | chr1 | 226987592 | 226987617 | U0 | 0 | + | 226987603 | 226987628 | U0 | 0 | - | 0 |
## | chr8 | 38747226 | 38747251 | U0 | 0 | - | 38747236 | 38747261 | U0 | 0 | + | 0 |
## | chr1 | 212609534 | 212609559 | U0 | 0 | + | 212410559 | 212410584 | U0 | 0 | - | 198951 |
## | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
## | chrY | 13517892 | 13517917 | U0 | 0 | - | 11776321 | 11776346 | U0 | 0 | + | 1741547 |
## | chrY | 8010951 | 8010976 | U0 | 0 | - | 20557165 | 20557190 | U0 | 0 | + | 2632551 |
## | chrY | 7405376 | 7405401 | U0 | 0 | - | 20557165 | 20557190 | U0 | 0 | + | 3238126 |
## +--------------+-----------+-----------+------------+-----------+--------------+---------------+-------------+--------------+---------------+----------------+------------+
## PyRanges object has 10000 sequences from 24 chromosomes.
The nearest method also takes two variables, namely how and overlap. How can take the values None
, "next"
and "previous"
. The default is None
, which means that PyRanges looks in both directions. The default is None
. The overlap argument is a bool which indicates whether you want to include overlaps or not.
f1 = pr.load_dataset("f1")
print(f1)
## +--------------+-----------+-----------+------------+-----------+--------------+
## | Chromosome | Start | End | Name | Score | Strand |
## | (category) | (int64) | (int64) | (object) | (int64) | (category) |
## |--------------+-----------+-----------+------------+-----------+--------------|
## | chr1 | 3 | 6 | interval1 | 0 | + |
## | chr1 | 5 | 7 | interval2 | 0 | - |
## | chr1 | 8 | 9 | interval3 | 0 | + |
## +--------------+-----------+-----------+------------+-----------+--------------+
## PyRanges object has 3 sequences from 1 chromosomes.
f2 = pr.load_dataset("f2")
print(f2)
## +--------------+-----------+-----------+------------+-----------+--------------+
## | Chromosome | Start | End | Name | Score | Strand |
## | (category) | (int64) | (int64) | (object) | (int64) | (category) |
## |--------------+-----------+-----------+------------+-----------+--------------|
## | chr1 | 1 | 2 | a | 0 | + |
## | chr1 | 6 | 7 | b | 0 | - |
## +--------------+-----------+-----------+------------+-----------+--------------+
## PyRanges object has 2 sequences from 1 chromosomes.
print(f2.nearest(f1, strandedness="opposite", how="next"))
## +--------------+-----------+-----------+------------+-----------+--------------+-----------+-----------+------------+-----------+--------------+------------+
## | Chromosome | Start | End | Name | Score | Strand | Start_b | End_b | Name_b | Score_b | Strand_b | Distance |
## | (category) | (int64) | (int64) | (object) | (int64) | (category) | (int64) | (int64) | (object) | (int64) | (category) | (int64) |
## |--------------+-----------+-----------+------------+-----------+--------------+-----------+-----------+------------+-----------+--------------+------------|
## | chr1 | 1 | 2 | a | 0 | + | 5 | 7 | interval2 | 0 | - | 4 |
## | chr1 | 6 | 7 | b | 0 | - | 8 | 9 | interval3 | 0 | + | 2 |
## +--------------+-----------+-----------+------------+-----------+--------------+-----------+-----------+------------+-----------+--------------+------------+
## PyRanges object has 2 sequences from 1 chromosomes.
print(f2.nearest(f1, strandedness="opposite", how="next", overlap=False))
## +--------------+-----------+-----------+------------+-----------+--------------+-----------+-----------+------------+-----------+--------------+------------+
## | Chromosome | Start | End | Name | Score | Strand | Start_b | End_b | Name_b | Score_b | Strand_b | Distance |
## | (category) | (int64) | (int64) | (object) | (int64) | (category) | (int64) | (int64) | (object) | (int64) | (category) | (int64) |
## |--------------+-----------+-----------+------------+-----------+--------------+-----------+-----------+------------+-----------+--------------+------------|
## | chr1 | 1 | 2 | a | 0 | + | 5 | 7 | interval2 | 0 | - | 4 |
## | chr1 | 6 | 7 | b | 0 | - | 8 | 9 | interval3 | 0 | + | 2 |
## +--------------+-----------+-----------+------------+-----------+--------------+-----------+-----------+------------+-----------+--------------+------------+
## PyRanges object has 2 sequences from 1 chromosomes.