24 Statistics: rowbased operations
pyranges includes fast rowbased Pearson and Spearman correlations, including a fast row-based version of rankdata.
import pyranges as pr
gr = pr.random(1000)
import numpy as np
size = (int(1e3), 5)
d1 = np.random.randint(0, high=20, size=size)
d2 = np.random.randint(0, high=20, size=size)
spearman = pr.stats.rowbased_spearman(d1, d2) # or rowbased_pearson(d1, d2)
print(spearman[:5])## [ 0.15389675 -0.76315789 0.68421053 -0.4 0. ]
ranks = pr.stats.rowbased_rankdata(d1) # similar to scipy.stats.rankdata
print(ranks)## 0 1 2 3 4
## 0 5.0 4.0 1.0 3.0 2.0
## 1 2.0 1.0 5.0 3.5 3.5
## 2 5.0 2.5 2.5 1.0 4.0
## 3 4.0 5.0 2.0 3.0 1.0
## 4 5.0 3.0 1.0 4.0 2.0
## .. ... ... ... ... ...
## 995 4.5 2.5 4.5 1.0 2.5
## 996 5.0 1.0 3.0 4.0 2.0
## 997 3.0 4.5 1.0 4.5 2.0
## 998 4.0 4.0 4.0 1.0 2.0
## 999 4.0 1.0 5.0 2.5 2.5
##
## [1000 rows x 5 columns]