24 Statistics: rowbased operations

pyranges includes fast rowbased Pearson and Spearman correlations, including a fast row-based version of rankdata.

import pyranges as pr
gr = pr.random(1000)
import numpy as np
size = (int(1e3), 5)
d1 = np.random.randint(0, high=20, size=size)
d2 = np.random.randint(0, high=20, size=size)
spearman = pr.stats.rowbased_spearman(d1, d2) # or rowbased_pearson(d1, d2)
print(spearman[:5])
## [ 0.15389675 -0.76315789  0.68421053 -0.4         0.        ]
ranks = pr.stats.rowbased_rankdata(d1) # similar to scipy.stats.rankdata
print(ranks)
##        0    1    2    3    4
## 0    5.0  4.0  1.0  3.0  2.0
## 1    2.0  1.0  5.0  3.5  3.5
## 2    5.0  2.5  2.5  1.0  4.0
## 3    4.0  5.0  2.0  3.0  1.0
## 4    5.0  3.0  1.0  4.0  2.0
## ..   ...  ...  ...  ...  ...
## 995  4.5  2.5  4.5  1.0  2.5
## 996  5.0  1.0  3.0  4.0  2.0
## 997  3.0  4.5  1.0  4.5  2.0
## 998  4.0  4.0  4.0  1.0  2.0
## 999  4.0  1.0  5.0  2.5  2.5
## 
## [1000 rows x 5 columns]