24 Statistics: rowbased operations
pyranges includes fast rowbased Pearson and Spearman correlations, including a fast row-based version of rankdata.
import pyranges as pr
= pr.random(1000)
gr import numpy as np
= (int(1e3), 5)
size = np.random.randint(0, high=20, size=size)
d1 = np.random.randint(0, high=20, size=size)
d2 = pr.stats.rowbased_spearman(d1, d2) # or rowbased_pearson(d1, d2)
spearman print(spearman[:5])
## [ 0.15389675 -0.76315789 0.68421053 -0.4 0. ]
= pr.stats.rowbased_rankdata(d1) # similar to scipy.stats.rankdata
ranks print(ranks)
## 0 1 2 3 4
## 0 5.0 4.0 1.0 3.0 2.0
## 1 2.0 1.0 5.0 3.5 3.5
## 2 5.0 2.5 2.5 1.0 4.0
## 3 4.0 5.0 2.0 3.0 1.0
## 4 5.0 3.0 1.0 4.0 2.0
## .. ... ... ... ... ...
## 995 4.5 2.5 4.5 1.0 2.5
## 996 5.0 1.0 3.0 4.0 2.0
## 997 3.0 4.5 1.0 4.5 2.0
## 998 4.0 4.0 4.0 1.0 2.0
## 999 4.0 1.0 5.0 2.5 2.5
##
## [1000 rows x 5 columns]