Skip to content
This repository was archived by the owner on Oct 21, 2025. It is now read-only.
Merged

Dev #104

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions diffxpy/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from . import test
from . import enrich
from . import fit
from . import stats
from . import utils
from .. import pkg_constants
1 change: 1 addition & 0 deletions diffxpy/api/fit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from diffxpy.fit import model, residuals, partition
2 changes: 1 addition & 1 deletion diffxpy/api/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from diffxpy.testing.utils import constraint_matrix_from_string, constraint_matrix_from_dict, \
constraint_system_from_star
from diffxpy.testing.utils import design_matrix, design_matrix_from_xarray, design_matrix_from_anndata
from diffxpy.testing.utils import design_matrix
from diffxpy.testing.utils import view_coef_names, preview_coef_names
1 change: 1 addition & 0 deletions diffxpy/fit/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .fit import model, residuals, partition
3 changes: 3 additions & 0 deletions diffxpy/fit/external.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from diffxpy.testing.tests import _fit
from diffxpy.testing.utils import parse_gene_names, parse_sample_description, parse_size_factors, parse_grouping, \
constraint_system_from_star
811 changes: 811 additions & 0 deletions diffxpy/fit/fit.py

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions diffxpy/models/batch_bfgs/optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@
class Estim_BFGS_Model():

def __init__(self, Estim_BFGS, nproc):
self._num_observations = Estim_BFGS.X.shape[0]
self._num_features = Estim_BFGS.X.shape[1]
self._num_observations = Estim_BFGS.x.shape[0]
self._num_features = Estim_BFGS.x.shape[1]
self._features = Estim_BFGS.feature_names
self._observations = Estim_BFGS.X.shape[0]
self._observations = Estim_BFGS.x.shape[0]
self._design_loc = Estim_BFGS.design_loc
self._design_scale = Estim_BFGS.design_scale
self._loss = xr.DataArray(Estim_BFGS.full_loss(nproc))
self._log_probs = -self._loss
self._probs = np.exp(self._log_probs)
self._mles = xr.DataArray(np.transpose(Estim_BFGS.mles()))
self._gradient = xr.DataArray(np.zeros([Estim_BFGS.X.shape[1]]))
self._gradient = xr.DataArray(np.zeros([Estim_BFGS.x.shape[1]]))
self._fisher_inv = xr.DataArray(Estim_BFGS.fisher_inv)
self._idx_loc = np.arange(0, Estim_BFGS.design_loc.shape[1])
self._idx_scale = np.arange(Estim_BFGS.design_loc.shape[1],
Expand Down
2 changes: 0 additions & 2 deletions diffxpy/pkg_constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
DE_TREAT_ZEROVAR_TT_AS_SIG = True

BATCHGLM_OPTIM_GD = False
BATCHGLM_OPTIM_ADAM = False
BATCHGLM_OPTIM_ADAGRAD = False
Expand Down
31 changes: 14 additions & 17 deletions diffxpy/stats/stats.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from typing import Union

import numpy as np
import numpy.linalg
import scipy.sparse
import scipy.stats
import xarray as xr
from typing import Union


def likelihood_ratio_test(
Expand Down Expand Up @@ -34,42 +33,40 @@ def likelihood_ratio_test(
delta_df = df_full - df_reduced
# Compute the deviance test statistic.
delta_dev = 2 * (ll_full - ll_reduced)
# Compute the p-values based on the deviance and its expection based on the chi-square distribution.
# Compute the p-values based on the deviance and its expectation based on the chi-square distribution.
pvals = 1 - scipy.stats.chi2(delta_df).cdf(delta_dev)
return pvals


def mann_whitney_u_test(
x0: np.ndarray,
x1: np.ndarray,
x0: Union[np.ndarray, scipy.sparse.csr_matrix],
x1: Union[np.ndarray, scipy.sparse.csr_matrix]
):
"""
Perform Wilcoxon rank sum test (Mann-Whitney U test) along second axis
(for each gene).
Perform Wilcoxon rank sum test (Mann-Whitney U test) along second axis, ie. for each gene.

The Wilcoxon rank sum test is a non-parameteric test
to compare two groups of observations.

:param x0: np.array (observations x genes)
:param x0: (observations x genes)
Observations in first group by gene
:param x1: np.array (observations x genes)
:param x1: (observations x genes)
Observations in second group by gene.
"""
axis = 1
if np.any(np.ndim(x0) != np.ndim(x1)):
raise ValueError('stats.wilcoxon(): number of dimensions is not allowed to differ between x0 and x1!')
raise ValueError('number of dimensions is not allowed to differ between x0 and x1')
# Reshape into 2D array if only one test is performed.
if np.ndim(x0) == 1:
x0 = x0.reshape([x0.shape[0], 1])
x1 = x1.reshape([x1.shape[0], 1])
if np.any(x0.shape[axis] != x1.shape[axis]):
raise ValueError(
'stats.wilcoxon(): the first axis (number of tests) is not allowed to differ between x0 and x1!')
raise ValueError('the first axis (number of tests) is not allowed to differ between x0 and x1')

pvals = np.asarray([
scipy.stats.mannwhitneyu(
x=x0[:, i].flatten(),
y=x1[:, i].flatten(),
x=np.asarray(x0[:, i].todense()).flatten() if isinstance(x0, scipy.sparse.csr_matrix) else x0[:, i],
y=np.asarray(x1[:, i].todense()).flatten() if isinstance(x0, scipy.sparse.csr_matrix) else x1[:, i],
use_continuity=True,
alternative="two-sided"
).pvalue for i in range(x0.shape[1])
Expand Down Expand Up @@ -252,6 +249,8 @@ def wald_test_chisq(
if theta_mle.shape[0] != theta_covar.shape[1]:
raise ValueError(
'stats.wald_test(): theta_mle and theta_covar have to contain the same number of parameters')
if len(theta_covar.shape) != 3:
raise ValueError('stats.wald_test(): theta_covar should have 3 dimensions but has %i' % len(theta_covar.shape))
if theta_mle.shape[1] != theta_covar.shape[0]:
raise ValueError('stats.wald_test(): theta_mle and theta_covar have to contain the same number of genes')
if theta_covar.shape[1] != theta_covar.shape[2]:
Expand All @@ -264,8 +263,6 @@ def wald_test_chisq(

theta_diff = theta_mle - theta0
# Convert to nd.array to avoid gufunc error.
if isinstance(theta_diff, xr.DataArray):
theta_diff = theta_diff.values
wald_statistic = np.array([
np.matmul(
np.matmul(
Expand Down
Loading