11 An introduction to RLEs

Rles are runlengths - a datastructure that can compactly describe the coverage of a genome (or some other score that is associated with each nucleotide). It is used for efficient genomewide arithmetic operations on these scores.

from pyrle import Rle # or: from pyranges import Rle
runs = [10, 10, 10, 10]
values = [0, 1, 0, 0]
r1 = Rle(runs, values)
print(r1)
## +--------+------+------+------+
## | Runs   |   10 |   10 |   20 |
## |--------+------+------+------|
## | Values |    0 |    1 |    0 |
## +--------+------+------+------+
## Rle of length 40 containing 3 elements
runs2 = [11, 9, 20]
values2 = [100, 0, 100]
r2 = Rle(runs2, values2)
print(r2)
## +--------+------+-----+------+
## | Runs   |   11 |   9 |   20 |
## |--------+------+-----+------|
## | Values |  100 |   0 |  100 |
## +--------+------+-----+------+
## Rle of length 40 containing 3 elements
print(r1 + r2)
## +--------+------+-----+-----+------+
## | Runs   |   10 |   1 |   9 |   20 |
## |--------+------+-----+-----+------|
## | Values |  100 | 101 |   1 |  100 |
## +--------+------+-----+-----+------+
## Rle of length 40 containing 4 elements
print(r1 * r2)
## +--------+------+-----+------+
## | Runs   |   10 |   1 |   29 |
## |--------+------+-----+------|
## | Values |    0 | 100 |    0 |
## +--------+------+-----+------+
## Rle of length 40 containing 3 elements
print(r1.runs)
## [10 10 20]
print(r1.values)
## [0. 1. 0.]
r1 = r1 + 5
print(r1)
## +--------+------+------+------+
## | Runs   |   10 |   10 |   20 |
## |--------+------+------+------|
## | Values |    5 |    6 |    5 |
## +--------+------+------+------+
## Rle of length 40 containing 3 elements
print(r2 / r1)
## +--------+------+---------+-----+------+
## | Runs   |   10 |       1 |   9 |   20 |
## |--------+------+---------+-----+------|
## | Values |   20 | 16.6667 |   0 |   20 |
## +--------+------+---------+-----+------+
## Rle of length 40 containing 4 elements