From e472ef6bf0f63deb15853f73556f78b3e05c70a9 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Tue, 27 Aug 2019 09:31:13 -0700
Subject: [PATCH 01/25] intermediate commit towards new pairwise API

- all pairwise DETs have the same methods
- shared methods are in a new pairwiseDET base class
---
 diffxpy/testing/det.py             | 710 ++++++++++++-----------------
 diffxpy/testing/tests.py           |   4 +-
 diffxpy/unit_test/test_pairwise.py |  19 +-
 3 files changed, 307 insertions(+), 426 deletions(-)

diff --git a/diffxpy/testing/det.py b/diffxpy/testing/det.py
index 8c01ce6..24b8363 100644
--- a/diffxpy/testing/det.py
+++ b/diffxpy/testing/det.py
@@ -1819,7 +1819,7 @@ def __init__(self, correction_type: str):
             - "global": correct all p-values in one operation
             - "by_test": correct the p-values of each test individually
         """
-        super().__init__()
+        _DifferentialExpressionTest.__init__(self=self)
         self._correction_type = correction_type
 
     def _correction(self, method):
@@ -1851,7 +1851,7 @@ def summary(self, **kwargs) -> pd.DataFrame:
         assert self.gene_ids is not None
 
         # calculate maximum logFC of lower triangular fold change matrix
-        raw_logfc = self.log2_fold_change()
+        raw_logfc = self.log_fold_change(base=2.)
 
         # first flatten all dimensions up to the last 'gene' dimension
         flat_logfc = raw_logfc.reshape(-1, raw_logfc.shape[-1])
@@ -1875,137 +1875,139 @@ def summary(self, **kwargs) -> pd.DataFrame:
         return res
 
 
-class DifferentialExpressionTestPairwise(_DifferentialExpressionTestMulti):
+class _DifferentialExpressionTestPairwiseBase(_DifferentialExpressionTestMulti):
     """
-    Pairwise unit_test between more than 2 groups per gene.
+    Pairwise differential expression tests base class.
+
+    Defines API of accessing test results of pairs of groups. The underlaying accessors depend
+    on the type of test and are defined in the subclasses.
     """
 
-    def __init__(self, gene_ids, pval, logfc, ave, groups, tests, correction_type: str):
-        super().__init__(correction_type=correction_type)
-        self._gene_ids = np.asarray(gene_ids)
-        self._logfc = logfc
-        self._pval = pval
-        self._mean = np.asarray(ave).flatten()
-        self.groups = list(np.asarray(groups))
-        self._tests = tests
+    groups: List[str]
 
-        _ = self.qval
+    def _get_group_idx(self, groups0, groups1):
+        if np.any([x not in self.groups for x in groups0]):
+            raise ValueError('element of groups1 not recognized; not in groups %s' % self.groups)
+        if np.any([x not in self.groups for x in groups1]):
+            raise ValueError('element of groups2 not recognized; not in groups %s' % self.groups)
 
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return self._gene_ids
-
-    @property
-    def x(self):
-        return None
+        return np.array([self.groups.index(x) for x in groups0]), \
+               np.array([self.groups.index(x) for x in groups1])
 
-    @property
-    def tests(self):
+    @abc.abstractmethod
+    def _pval_pairs(self, idx0, idx1):
         """
-        If `keep_full_test_objs` was set to `True`, this will return a matrix of differential expression tests.
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
         """
-        if self._tests is None:
-            raise ValueError("Individual tests were not kept!")
+        pass
 
-        return self._tests
+    @abc.abstractmethod
+    def _qval_pairs(self, idx0, idx1, method):
+        """
+        Test-specific q-values accessor for the comparison of groups1 to groups2.
 
-    def log_fold_change(self, base=np.e, **kwargs):
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: q-values
         """
-        Returns matrix of fold changes per gene
+        pass
+
+    @abc.abstractmethod
+    def _log_fold_change_pairs(self, idx0, idx1, base):
         """
-        if base == np.e:
-            return self._logfc
-        else:
-            return self._logfc / np.log(base)
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
 
-    def _check_groups(self, group1, group2):
-        if group1 not in self.groups:
-            raise ValueError('group1 not recognized')
-        if group2 not in self.groups:
-            raise ValueError('group2 not recognized')
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        pass
 
-    def pval_pair(self, group1, group2):
+    def pval_pairs(self, groups0, groups1):
         """
-        Get p-values of the comparison of group1 and group2.
+        Get p-values of the comparison of groups1 to groups2.
 
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
         :return: p-values
         """
-        assert self._pval is not None
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._pval_pairs(idx0=idx0, idx1=idx1)
 
-        self._check_groups(group1, group2)
-        return self._pval[self.groups.index(group1), self.groups.index(group2), :]
-
-    def qval_pair(self, group1, group2):
+    def qval_pairs(self, groups0, groups1, method="fdr_bh"):
         """
-        Get q-values of the comparison of group1 and group2.
+        Get q-values of the comparison of groups1 to groups2.
 
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
         :return: q-values
         """
-        assert self._qval is not None
-
-        self._check_groups(group1, group2)
-        return self._qval[self.groups.index(group1), self.groups.index(group2), :]
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._qval_pairs(idx0=idx0, idx1=idx1, method=method)
 
-    def log10_pval_pair_clean(self, group1, group2, log10_threshold=-30):
+    def log10_pval_pairs_clean(self, groups0, groups1, log10_threshold=-30):
         """
         Return log10 transformed and cleaned p-values.
 
         NaN p-values are set to one and p-values below log10_threshold
         in log10 space are set to log10_threshold.
 
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
         :param log10_threshold: minimal log10 p-value to return.
         :return: Cleaned log10 transformed p-values.
         """
-        pvals = np.reshape(self.pval_pair(group1=group1, group2=group2), -1)
+        pvals = np.reshape(self.pval_pairs(groups0=groups0, groups1=groups1), -1)
         pvals = np.nextafter(0, 1, out=pvals, where=pvals == 0)
         log10_pval_clean = np.log(pvals) / np.log(10)
         log10_pval_clean[np.isnan(log10_pval_clean)] = 1
         log10_pval_clean = np.clip(log10_pval_clean, log10_threshold, 0, log10_pval_clean)
         return log10_pval_clean
 
-    def log10_qval_pair_clean(self, group1, group2, log10_threshold=-30):
+    def log10_qval_pairs_clean(self, groups0, groups1, log10_threshold=-30):
         """
         Return log10 transformed and cleaned q-values.
 
         NaN p-values are set to one and q-values below log10_threshold
         in log10 space are set to log10_threshold.
 
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
         :param log10_threshold: minimal log10 q-value to return.
         :return: Cleaned log10 transformed q-values.
         """
-        qvals = np.reshape(self.qval_pair(group1=group1, group2=group2), -1)
+        qvals = np.reshape(self.qval_pairs(groups0=groups0, groups1=groups1), -1)
         qvals = np.nextafter(0, 1, out=qvals, where=qvals == 0)
         log10_qval_clean = np.log(qvals) / np.log(10)
         log10_qval_clean[np.isnan(log10_qval_clean)] = 1
         log10_qval_clean = np.clip(log10_qval_clean, log10_threshold, 0, log10_qval_clean)
         return log10_qval_clean
 
-    def log_fold_change_pair(
+    def log_fold_change_pairs(
             self,
-            group1,
-            group2,
+            groups0,
+            groups1,
             base=np.e
     ):
         """
         Get log fold changes of the comparison of group1 and group2.
 
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
         :param base: Base of logarithm.
         :return: log fold changes
         """
-        assert self._logfc is not None
-
-        self._check_groups(group1, group2)
-        return self.log_fold_change(base=base)[self.groups.index(group1), self.groups.index(group2), :]
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._log_fold_change_pairs(idx0=idx0, idx1=idx1, base=base)
 
     def summary(
             self,
@@ -2036,39 +2038,54 @@ def summary(
 
         return res
 
-    def summary_pair(
+    def summary_pairs(
             self,
-            group1,
-            group2,
+            groups0,
+            groups1,
             qval_thres=None,
             fc_upper_thres=None,
             fc_lower_thres=None,
-            mean_thres=None
+            mean_thres=None,
+            method="fdr_bh"
     ) -> pd.DataFrame:
         """
         Summarize differential expression results into an output table.
 
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
         :param qval_thres: Upper bound of corrected p-values for gene to be included.
         :param fc_upper_thres: Upper bound of fold-change for gene to be included.
         :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
         :param mean_thres: Lower bound of average expression for gene to be included.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
         :return: pandas.DataFrame with the following columns:
-
             - gene: the gene id's
-            - pval: the per-gene p-value of the selected test
-            - qval: the per-gene q-value of the selected test
-            - log2fc: the per-gene log2 fold change of the selected test
+            - pval: the minimum per-gene p-value of all tests
+            - qval: the minimum per-gene q-value of all tests
+            - log2fc: the maximal/minimal (depending on which one is higher) log2 fold change of the genes
             - mean: the mean expression of the gene across all groups
         """
         assert self.gene_ids is not None
 
+        pval = self.pval_pairs(groups0=groups0, groups1=groups1)
+        qval = self.qval_pairs(groups0=groups0, groups1=groups1, method=method)
+
+        # calculate maximum logFC of lower triangular fold change matrix
+        raw_logfc = self.log_fold_change_pairs(groups0=groups0, groups1=groups1, base=2)
+
+        # first flatten all dimensions up to the last 'gene' dimension
+        flat_logfc = raw_logfc.reshape(-1, raw_logfc.shape[-1])
+        # next, get argmax of flattened logfc and unravel the true indices from it
+        r, c = np.unravel_index(flat_logfc.argmax(0), raw_logfc.shape[:2])
+        # if logfc is maximal in the lower triangular matrix, multiply it with -1
+        logfc = raw_logfc[r, c, np.arange(raw_logfc.shape[-1])] * np.where(r <= c, 1, -1)
+
         res = pd.DataFrame({
             "gene": self.gene_ids,
-            "pval": self.pval_pair(group1=group1, group2=group2),
-            "qval": self.qval_pair(group1=group1, group2=group2),
-            "log2fc": self.log_fold_change_pair(group1=group1, group2=group2, base=2),
+            "pval": np.min(pval, axis=(0, 1)),
+            "qval": np.min(qval, axis=(0, 1)),
+            "log2fc": np.asarray(logfc),
             "mean": np.asarray(self.mean)
         })
 
@@ -2083,7 +2100,105 @@ def summary_pair(
         return res
 
 
-class DifferentialExpressionTestZTest(_DifferentialExpressionTestMulti):
+class DifferentialExpressionTestPairwiseStandard(_DifferentialExpressionTestPairwiseBase):
+    """
+    Pairwise differential expression tests class for tests other than z-tests.
+    """
+
+    def __init__(
+            self,
+            gene_ids,
+            pval,
+            logfc,
+            ave,
+            groups,
+            tests,
+            correction_type: str
+    ):
+        super().__init__(correction_type=correction_type)
+        self._gene_ids = np.asarray(gene_ids)
+        self._logfc = logfc
+        self._pval = pval
+        self._mean = np.asarray(ave).flatten()
+        self.groups = list(np.asarray(groups))
+        self._tests = tests
+
+        _ = self.qval
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return self._gene_ids
+
+    @property
+    def x(self):
+        return None
+
+    @property
+    def tests(self):
+        """
+        If `keep_full_test_objs` was set to `True`, this will return a matrix of differential expression tests.
+        """
+        if self._tests is None:
+            raise ValueError("Individual tests were not kept!")
+
+        return self._tests
+
+    def _correction_pairs(self, idx0, idx1, method):
+        if self._correction_type.lower() == "global":
+            pvals = np.reshape(self._pval(idx0=idx0, idx1=idx1), -1)
+            qvals = correction.correct(pvals=pvals, method=method)
+            qvals = np.reshape(qvals, self._pval(idx0=idx0, idx1=idx1).shape)
+            return qvals
+        elif self._correction_type.lower() == "by_test":
+            qvals = np.apply_along_axis(
+                func1d=lambda pv: correction.correct(pvals=pv, method=method),
+                axis=-1,
+                arr=self.pval(idx0=idx0, idx1=idx1)
+            )
+            return qvals
+
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        assert np.all([x < self._pval.shape[1] for x in idx0])
+        assert np.all([x < self._pval.shape[1] for x in idx1])
+        return self._pval[idx0, idx1]
+
+    def _qval_pairs(self, idx0, idx1, method="fdr_bh"):
+        """
+        Test-specific q-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: q-values
+        """
+        return self._correction_pairs(idx0=idx0, idx1=idx1, method=method)
+
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        assert np.all([x < self._pval.shape[1] for x in idx0])
+        assert np.all([x < self._pval.shape[1] for x in idx1])
+        if base == np.e:
+            return self._logfc[idx0, idx1]
+        else:
+            return self._logfc[idx0, idx1] / np.log(base)
+
+
+class DifferentialExpressionTestZTest(_DifferentialExpressionTestPairwiseBase):
     """
     Pairwise unit_test between more than 2 groups per gene.
     """
@@ -2160,11 +2275,43 @@ def _ave(self):
         :return: np.ndarray
         """
 
-        return np.mean(self.model_estim.x, axis=0)
+        return np.asarray(np.mean(self.model_estim.x, axis=0)).flatten()
 
-    def log_fold_change(self, base=np.e, **kwargs):
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        assert np.all([x < self.pval.shape[1] for x in idx0])
+        assert np.all([x < self.pval.shape[1] for x in idx1])
+        return self.pval[idx0, idx1]
+
+    def _qval_pairs(self, idx0, idx1, method="fdr_bh"):
+        """
+        Test-specific q-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            TODO not used yet.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: q-values
+        """
+        assert np.all([x < self.qval.shape[1] for x in idx0])
+        assert np.all([x < self.qval.shape[1] for x in idx1])
+        return self.qval[idx0, idx1]
+
+    def _log_fold_change_pairs(self, idx0, idx1, base):
         """
-        Returns matrix of fold changes per gene
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
         """
         if self._logfc is None:
             groups = self.groups
@@ -2185,105 +2332,12 @@ def log_fold_change(self, base=np.e, **kwargs):
             self._logfc = logfc
 
         if base == np.e:
-            return self._logfc
+            return self._logfc[idx0, idx1]
         else:
-            return self._logfc / np.log(base)
-
-    def _check_groups(self, group1, group2):
-        if group1 not in self.groups:
-            raise ValueError('group1 not recognized')
-        if group2 not in self.groups:
-            raise ValueError('group2 not recognized')
-
-    def pval_pair(self, group1, group2):
-        self._check_groups(group1, group2)
-        return self.pval[self.groups.index(group1), self.groups.index(group2), :]
-
-    def qval_pair(self, group1, group2):
-        self._check_groups(group1, group2)
-        return self.qval[self.groups.index(group1), self.groups.index(group2), :]
-
-    def log_fold_change_pair(self, group1, group2, base=np.e):
-        self._check_groups(group1, group2)
-        return self.log_fold_change(base=base)[self.groups.index(group1), self.groups.index(group2), :]
-
-    def summary(
-            self,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: Summary table of differential expression test.
-        """
-        res = super().summary(**kwargs)
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-    def summary_pair(
-            self,
-            group1,
-            group2,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: pandas.DataFrame with the following columns:
-
-            - gene: the gene id's
-            - pval: the per-gene p-value of the selected test
-            - qval: the per-gene q-value of the selected test
-            - log2fc: the per-gene log2 fold change of the selected test
-            - mean: the mean expression of the gene across all groups
-        """
-        assert self.gene_ids is not None
-
-        res = pd.DataFrame({
-            "gene": self.gene_ids,
-            "pval": self.pval_pair(group1=group1, group2=group2),
-            "qval": self.qval_pair(group1=group1, group2=group2),
-            "log2fc": self.log_fold_change_pair(group1=group1, group2=group2, base=2),
-            "mean": np.asarray(self.mean)
-        })
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
+            return self._logfc[idx0, idx1] / np.log(base)
 
 
-class DifferentialExpressionTestZTestLazy(_DifferentialExpressionTestMulti):
+class DifferentialExpressionTestZTestLazy(_DifferentialExpressionTestPairwiseBase):
     """
     Pairwise unit_test between more than 2 groups per gene with lazy evaluation.
 
@@ -2303,7 +2357,10 @@ def __init__(
             grouping, groups,
             correction_type="global"
     ):
-        super().__init__(correction_type=correction_type)
+        _DifferentialExpressionTestMulti.__init__(
+            self=self,
+            correction_type=correction_type
+        )
         self.model_estim = model_estim
         self.grouping = grouping
         if isinstance(groups, list):
@@ -2319,7 +2376,7 @@ def __init__(
         theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
         self._theta_sd = np.sqrt(theta_sd)
 
-    def _correction(self, pvals, method="fdr_bh") -> np.ndarray:
+    def _correction_pairs(self, pvals, method="fdr_bh") -> np.ndarray:
         """
         Performs multiple testing corrections available in statsmodels.stats.multitest.multipletests().
 
@@ -2353,18 +2410,22 @@ def _test(self, **kwargs):
         """
         pass
 
-    def _test_pairs(self, groups0, groups1):
-        num_features = self.model_estim.x.shape[1]
-        pvals = np.tile(np.NaN, [len(groups0), len(groups1), num_features])
+    def _test_pairs(self, idx0, idx1):
+        """
 
-        for i, g0 in enumerate(groups0):
-            for j, g1 in enumerate(groups1):
-                if g0 != g1:
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        pvals = np.tile(np.NaN, [len(idx0), len(idx1), self.model_estim.x.shape[1]])
+        for i in idx0:
+            for j in idx1:
+                if i != j:
                     pvals[i, j] = stats.two_coef_z_test(
-                        theta_mle0=self._theta_mle[g0],
-                        theta_mle1=self._theta_mle[g1],
-                        theta_sd0=self._theta_sd[g0],
-                        theta_sd1=self._theta_sd[g1]
+                        theta_mle0=self._theta_mle[i],
+                        theta_mle1=self._theta_mle[j],
+                        theta_sd0=self._theta_sd[i],
+                        theta_sd1=self._theta_sd[j]
                     )
                 else:
                     pvals[i, j] = 1
@@ -2394,7 +2455,7 @@ def _ave(self):
         :return: np.ndarray
         """
 
-        return np.mean(self.model_estim.x, axis=0)
+        return np.asarray(np.mean(self.model_estim.x, axis=0)).flatten()
 
     @property
     def pval(self, **kwargs):
@@ -2419,243 +2480,58 @@ def log_fold_change(self, base=np.e, **kwargs):
         This function is not available in lazy results evaluation as it would
         require all pairwise tests to be performed.
         """
-        pass
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
 
-    def summary(
-            self,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
+    @property
+    def summary(self, **kwargs):
         """
-        Summarize differential expression results into an output table.
-
         This function is not available in lazy results evaluation as it would
         require all pairwise tests to be performed.
-
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: Summary table of differential expression test.
         """
-        pass
-
-    def _check_groups(self, groups0, groups1):
-        if not isinstance(groups0, list):
-            groups0 = [groups0]
-        if not isinstance(groups1, list):
-            groups1 = [groups1]
-        for g in groups0:
-            if g not in self.groups:
-                raise ValueError('groups0 element '+str(g)+' not recognized')
-        for g in groups1:
-            if g not in self.groups:
-                raise ValueError('groups1 element '+str(g)+' not recognized')
-
-    def _groups_idx(self, groups):
-        if not isinstance(groups, list):
-            groups = [groups]
-        return np.array([self.groups.index(x) for x in groups])
-
-    def pval_pairs(self, groups0=None, groups1=None):
-        """
-        Return p-values for all pairwise comparisons of groups0 and groups1.
-
-        If you want to test one group (such as a control) against all other groups
-        (one test for each other group), give the control group id in groups0
-        and leave groups1=None, groups1 is then set to the full set of all groups.
-
-        :param groups0: First set of groups in pair-wise comparison.
-        :param groups1: Second set of groups in pair-wise comparison.
-        :return: P-values of pair-wise comparison.
-        """
-        if groups0 is None:
-            groups0 = self.groups
-        if groups1 is None:
-            groups1 = self.groups
-        self._check_groups(groups0, groups1)
-        groups0 = self._groups_idx(groups0)
-        groups1 = self._groups_idx(groups1)
-        return self._test_pairs(groups0=groups0, groups1=groups1)
-
-    def qval_pairs(self, groups0=None, groups1=None, method="fdr_bh", **kwargs):
-        """
-        Return multiple testing-corrected p-values for all
-        pairwise comparisons of groups0 and groups1.
-
-        If you want to test one group (such as a control) against all other groups
-        (one test for each other group), give the control group id in groups0
-        and leave groups1=None, groups1 is then set to the full set of all groups.
-
-        :param groups0: First set of groups in pair-wise comparison.
-        :param groups1: Second set of groups in pair-wise comparison.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        :return: Multiple testing-corrected p-values of pair-wise comparison.
-        """
-        if groups0 is None:
-            groups0 = self.groups
-        if groups1 is None:
-            groups1 = self.groups
-        self._check_groups(groups0, groups1)
-        groups0 = self._groups_idx(groups0)
-        groups1 = self._groups_idx(groups1)
-        pval = self.pval_pair(groups0=groups0, groups1=groups1)
-        return self._correction(pval=pval, method=method, **kwargs)
-
-    def log_fold_change_pairs(self, groups0=None, groups1=None, base=np.e):
-        """
-        Return log-fold changes for all pairwise comparisons of groups0 and groups1.
-
-        If you want to test one group (such as a control) against all other groups
-        (one test for each other group), give the control group id in groups0
-        and leave groups1=None, groups1 is then set to the full set of all groups.
-
-        :param groups0: First set of groups in pair-wise comparison.
-        :param groups1: Second set of groups in pair-wise comparison.
-        :param base: Base of logarithm of log-fold change.
-        :return: P-values of pair-wise comparison.
-        """
-        if groups0 is None:
-            groups0 = self.groups
-        if groups1 is None:
-            groups1 = self.groups
-        self._check_groups(groups0, groups1)
-        groups0 = self._groups_idx(groups0)
-        groups1 = self._groups_idx(groups1)
-
-        num_features = self._theta_mle.shape[1]
-
-        logfc = np.zeros(shape=(len(groups0), len(groups1), num_features))
-        for i, g0 in enumerate(groups0):
-            for j, g1 in enumerate(groups1):
-                logfc[i, j, :] = self._theta_mle[g0, :] - self._theta_mle[g1, :]
-
-        if base == np.e:
-            return logfc
-        else:
-            return logfc / np.log(base)
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
 
-    def summary_pair(
-            self,
-            group0,
-            group1,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
+    def _pval_pairs(self, idx0, idx1):
         """
-        Summarize differential expression results of single pairwose comparison
-        into an output table.
-
-        :param group0: First set of groups in pair-wise comparison.
-        :param group1: Second set of groups in pair-wise comparison.
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: pandas.DataFrame with the following columns:
+        Test-specific p-values accessor for the comparison of groups0 to groups1.
 
-            - gene: the gene id's
-            - pval: the per-gene p-value of the selected test
-            - qval: the per-gene q-value of the selected test
-            - log2fc: the per-gene log2 fold change of the selected test
-            - mean: the mean expression of the gene across all groups
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
         """
-        assert self.gene_ids is not None
-
-        if len(group0) != 1:
-            raise ValueError("group0 should only contain one entry in summary_pair()")
-        if len(group1) != 1:
-            raise ValueError("group1 should only contain one entry in summary_pair()")
-
-        pval = self.pval_pairs(groups0=group0, groups1=group1)
-        qval = self._correction(pvals=pval, **kwargs)
-        res = pd.DataFrame({
-            "gene": self.gene_ids,
-            "pval": pval.flatten(),
-            "qval": qval.flatten(),
-            "log2fc": self.log_fold_change_pairs(groups0=group0, groups1=group1, base=2).flatten(),
-            "mean": np.asarray(self.mean)
-        })
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
+        return self._test_pairs(idx0=idx0, idx1=idx1)
 
-        return res
-
-    def summary_pairs(
-            self,
-            groups0,
-            groups1=None,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
+    def _qval_pairs(self, idx0, idx1, method="fdr_bh"):
         """
-        Summarize differential expression results of a set of
-        pairwise comparisons into an output table.
-
-        :param groups0: First set of groups in pair-wise comparison.
-        :param groups1: Second set of groups in pair-wise comparison.
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: pandas.DataFrame with the following columns:
+        Test-specific q-values accessor for the comparison of both sets of groups.
 
-            - gene: the gene id's
-            - pval: the minimum per-gene p-value of all tests
-            - qval: the minimum per-gene q-value of all tests
-            - log2fc: the maximal/minimal (depending on which one is higher) log2 fold change of the genes
-            - mean: the mean expression of the gene across all groups
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: q-values
         """
-        assert self.gene_ids is not None
-        if groups1 is None:
-            groups1 = self.groups
-
-        pval = self.pval_pairs(groups0=groups0, groups1=groups1)
-        qval = self._correction(pvals=pval, **kwargs)
-
-        # calculate maximum logFC of lower triangular fold change matrix
-        raw_logfc = self.log_fold_change_pairs(groups0=groups0, groups1=groups1, base=2)
-
-        # first flatten all dimensions up to the last 'gene' dimension
-        flat_logfc = raw_logfc.reshape(-1, raw_logfc.shape[-1])
-        # next, get argmax of flattened logfc and unravel the true indices from it
-        r, c = np.unravel_index(flat_logfc.argmax(0), raw_logfc.shape[:2])
-        # if logfc is maximal in the lower triangular matrix, multiply it with -1
-        logfc = raw_logfc[r, c, np.arange(raw_logfc.shape[-1])] * np.where(r <= c, 1, -1)
+        return self._correction_pairs(pvals=self._pval_pairs(idx0=idx0, idx1=idx1), method=method)
 
-        res = pd.DataFrame({
-            "gene": self.gene_ids,
-            "pval": np.min(pval, axis=(0, 1)),
-            "qval": np.min(qval, axis=(0, 1)),
-            "log2fc": np.asarray(logfc),
-            "mean": np.asarray(self.mean)
-        })
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of both sets of groups.
 
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        logfc = np.zeros(shape=(len(idx0), len(idx1), self._theta_mle.shape[1]))
+        for i in idx0:
+            for j in idx1:
+                logfc[i, j, :] = self._theta_mle[i, :] - self._theta_mle[j, :]
 
-        return res
+        if base == np.e:
+            return logfc
+        else:
+            return logfc / np.log(base)
 
 
 class DifferentialExpressionTestVsRest(_DifferentialExpressionTestMulti):
diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 3242e09..decfb96 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -15,7 +15,7 @@
 from diffxpy.models.batch_bfgs.optim import Estim_BFGS
 from .det import DifferentialExpressionTestLRT, DifferentialExpressionTestWald, \
     DifferentialExpressionTestTT, DifferentialExpressionTestRank, _DifferentialExpressionTestSingle, \
-    DifferentialExpressionTestZTestLazy, DifferentialExpressionTestZTest, DifferentialExpressionTestPairwise, \
+    DifferentialExpressionTestZTestLazy, DifferentialExpressionTestZTest, DifferentialExpressionTestPairwiseStandard, \
     DifferentialExpressionTestVsRest, _DifferentialExpressionTestMulti, DifferentialExpressionTestByPartition, \
     DifferentialExpressionTestWaldCont, DifferentialExpressionTestLRTCont
 from .utils import parse_gene_names, parse_sample_description, parse_size_factors, parse_grouping, \
@@ -1109,7 +1109,7 @@ def pairwise(
                     tests[i, j] = de_test_temp
                     tests[j, i] = de_test_temp
 
-        de_test = DifferentialExpressionTestPairwise(
+        de_test = DifferentialExpressionTestPairwiseStandard(
             gene_ids=gene_names,
             pval=pvals,
             logfc=logfc,
diff --git a/diffxpy/unit_test/test_pairwise.py b/diffxpy/unit_test/test_pairwise.py
index 7907999..055873c 100644
--- a/diffxpy/unit_test/test_pairwise.py
+++ b/diffxpy/unit_test/test_pairwise.py
@@ -37,7 +37,7 @@ def _prepate_data(
         sim.generate_data()
 
         random_sample_description = pd.DataFrame({
-            "condition": np.random.randint(n_groups, size=sim.nobs)
+            "condition": [str(x) for x in np.random.randint(n_groups, size=sim.nobs)]
         })
         return sim, random_sample_description
 
@@ -64,7 +64,7 @@ def _test_null_distribution_basic(
             n_genes=n_genes,
             n_groups=n_groups
         )
-        test = de.test.pairwise(
+        det = de.test.pairwise(
             data=sim.input_data,
             sample_description=sample_description,
             grouping="condition",
@@ -73,13 +73,18 @@ def _test_null_distribution_basic(
             quick_scale=quick_scale,
             noise_model=self.noise_model
         )
-        _ = test.summary()
+        if not lazy:
+            _ = det.summary()
+        # Single pair accessors:
+        _ = det.pval_pairs(groups0="0", groups1="1")
+        _ = det.qval_pairs(groups0="0", groups1="1")
+        _ = det.log10_pval_pair_cleans(groups0="0", groups1="1")
+        _ = det.log10_qval_pair_cleans(groups0="0", groups1="1")
+        _ = det.log_fold_change_pairs(groups0="0", groups1="1")
+        _ = det.summary_pairs(groups0="0", groups1="1")
 
         # Compare p-value distribution under null model against uniform distribution.
-        if lazy:
-            pval_h0 = stats.kstest(test.pval_pairs(groups0=0, groups1=1).flatten(), 'uniform').pvalue
-        else:
-            pval_h0 = stats.kstest(test.pval[0, 1, :].flatten(), 'uniform').pvalue
+        pval_h0 = stats.kstest(det.pval_pairs(groups0="0", groups1="1").flatten(), 'uniform').pvalue
 
         logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
         assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)

From 3439ecd3219a77a2e55e3400b87dc10c6cd1842e Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 28 Aug 2019 08:22:35 -0700
Subject: [PATCH 02/25] finished new pairwise interface

accessors are not throwing errors anymore
---
 diffxpy/testing/det.py             | 659 -----------------------------
 diffxpy/testing/det_pair.py        | 624 +++++++++++++++++++++++++++
 diffxpy/testing/tests.py           |   3 +-
 diffxpy/unit_test/test_pairwise.py |   7 +-
 4 files changed, 631 insertions(+), 662 deletions(-)
 create mode 100644 diffxpy/testing/det_pair.py

diff --git a/diffxpy/testing/det.py b/diffxpy/testing/det.py
index 24b8363..1d692dc 100644
--- a/diffxpy/testing/det.py
+++ b/diffxpy/testing/det.py
@@ -1875,665 +1875,6 @@ def summary(self, **kwargs) -> pd.DataFrame:
         return res
 
 
-class _DifferentialExpressionTestPairwiseBase(_DifferentialExpressionTestMulti):
-    """
-    Pairwise differential expression tests base class.
-
-    Defines API of accessing test results of pairs of groups. The underlaying accessors depend
-    on the type of test and are defined in the subclasses.
-    """
-
-    groups: List[str]
-
-    def _get_group_idx(self, groups0, groups1):
-        if np.any([x not in self.groups for x in groups0]):
-            raise ValueError('element of groups1 not recognized; not in groups %s' % self.groups)
-        if np.any([x not in self.groups for x in groups1]):
-            raise ValueError('element of groups2 not recognized; not in groups %s' % self.groups)
-
-        return np.array([self.groups.index(x) for x in groups0]), \
-               np.array([self.groups.index(x) for x in groups1])
-
-    @abc.abstractmethod
-    def _pval_pairs(self, idx0, idx1):
-        """
-        Test-specific p-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :return: p-values
-        """
-        pass
-
-    @abc.abstractmethod
-    def _qval_pairs(self, idx0, idx1, method):
-        """
-        Test-specific q-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        :return: q-values
-        """
-        pass
-
-    @abc.abstractmethod
-    def _log_fold_change_pairs(self, idx0, idx1, base):
-        """
-        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :param base: Base of logarithm.
-        :return: log fold change values
-        """
-        pass
-
-    def pval_pairs(self, groups0, groups1):
-        """
-        Get p-values of the comparison of groups1 to groups2.
-
-        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
-        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
-        :return: p-values
-        """
-        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
-        return self._pval_pairs(idx0=idx0, idx1=idx1)
-
-    def qval_pairs(self, groups0, groups1, method="fdr_bh"):
-        """
-        Get q-values of the comparison of groups1 to groups2.
-
-        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
-        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        :return: q-values
-        """
-        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
-        return self._qval_pairs(idx0=idx0, idx1=idx1, method=method)
-
-    def log10_pval_pairs_clean(self, groups0, groups1, log10_threshold=-30):
-        """
-        Return log10 transformed and cleaned p-values.
-
-        NaN p-values are set to one and p-values below log10_threshold
-        in log10 space are set to log10_threshold.
-
-        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
-        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
-        :param log10_threshold: minimal log10 p-value to return.
-        :return: Cleaned log10 transformed p-values.
-        """
-        pvals = np.reshape(self.pval_pairs(groups0=groups0, groups1=groups1), -1)
-        pvals = np.nextafter(0, 1, out=pvals, where=pvals == 0)
-        log10_pval_clean = np.log(pvals) / np.log(10)
-        log10_pval_clean[np.isnan(log10_pval_clean)] = 1
-        log10_pval_clean = np.clip(log10_pval_clean, log10_threshold, 0, log10_pval_clean)
-        return log10_pval_clean
-
-    def log10_qval_pairs_clean(self, groups0, groups1, log10_threshold=-30):
-        """
-        Return log10 transformed and cleaned q-values.
-
-        NaN p-values are set to one and q-values below log10_threshold
-        in log10 space are set to log10_threshold.
-
-        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
-        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
-        :param log10_threshold: minimal log10 q-value to return.
-        :return: Cleaned log10 transformed q-values.
-        """
-        qvals = np.reshape(self.qval_pairs(groups0=groups0, groups1=groups1), -1)
-        qvals = np.nextafter(0, 1, out=qvals, where=qvals == 0)
-        log10_qval_clean = np.log(qvals) / np.log(10)
-        log10_qval_clean[np.isnan(log10_qval_clean)] = 1
-        log10_qval_clean = np.clip(log10_qval_clean, log10_threshold, 0, log10_qval_clean)
-        return log10_qval_clean
-
-    def log_fold_change_pairs(
-            self,
-            groups0,
-            groups1,
-            base=np.e
-    ):
-        """
-        Get log fold changes of the comparison of group1 and group2.
-
-        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
-        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
-        :param base: Base of logarithm.
-        :return: log fold changes
-        """
-        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
-        return self._log_fold_change_pairs(idx0=idx0, idx1=idx1, base=base)
-
-    def summary(
-            self,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: Summary table of differential expression test.
-        """
-        res = super().summary(**kwargs)
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-    def summary_pairs(
-            self,
-            groups0,
-            groups1,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            method="fdr_bh"
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
-        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        :return: pandas.DataFrame with the following columns:
-            - gene: the gene id's
-            - pval: the minimum per-gene p-value of all tests
-            - qval: the minimum per-gene q-value of all tests
-            - log2fc: the maximal/minimal (depending on which one is higher) log2 fold change of the genes
-            - mean: the mean expression of the gene across all groups
-        """
-        assert self.gene_ids is not None
-
-        pval = self.pval_pairs(groups0=groups0, groups1=groups1)
-        qval = self.qval_pairs(groups0=groups0, groups1=groups1, method=method)
-
-        # calculate maximum logFC of lower triangular fold change matrix
-        raw_logfc = self.log_fold_change_pairs(groups0=groups0, groups1=groups1, base=2)
-
-        # first flatten all dimensions up to the last 'gene' dimension
-        flat_logfc = raw_logfc.reshape(-1, raw_logfc.shape[-1])
-        # next, get argmax of flattened logfc and unravel the true indices from it
-        r, c = np.unravel_index(flat_logfc.argmax(0), raw_logfc.shape[:2])
-        # if logfc is maximal in the lower triangular matrix, multiply it with -1
-        logfc = raw_logfc[r, c, np.arange(raw_logfc.shape[-1])] * np.where(r <= c, 1, -1)
-
-        res = pd.DataFrame({
-            "gene": self.gene_ids,
-            "pval": np.min(pval, axis=(0, 1)),
-            "qval": np.min(qval, axis=(0, 1)),
-            "log2fc": np.asarray(logfc),
-            "mean": np.asarray(self.mean)
-        })
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-
-class DifferentialExpressionTestPairwiseStandard(_DifferentialExpressionTestPairwiseBase):
-    """
-    Pairwise differential expression tests class for tests other than z-tests.
-    """
-
-    def __init__(
-            self,
-            gene_ids,
-            pval,
-            logfc,
-            ave,
-            groups,
-            tests,
-            correction_type: str
-    ):
-        super().__init__(correction_type=correction_type)
-        self._gene_ids = np.asarray(gene_ids)
-        self._logfc = logfc
-        self._pval = pval
-        self._mean = np.asarray(ave).flatten()
-        self.groups = list(np.asarray(groups))
-        self._tests = tests
-
-        _ = self.qval
-
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return self._gene_ids
-
-    @property
-    def x(self):
-        return None
-
-    @property
-    def tests(self):
-        """
-        If `keep_full_test_objs` was set to `True`, this will return a matrix of differential expression tests.
-        """
-        if self._tests is None:
-            raise ValueError("Individual tests were not kept!")
-
-        return self._tests
-
-    def _correction_pairs(self, idx0, idx1, method):
-        if self._correction_type.lower() == "global":
-            pvals = np.reshape(self._pval(idx0=idx0, idx1=idx1), -1)
-            qvals = correction.correct(pvals=pvals, method=method)
-            qvals = np.reshape(qvals, self._pval(idx0=idx0, idx1=idx1).shape)
-            return qvals
-        elif self._correction_type.lower() == "by_test":
-            qvals = np.apply_along_axis(
-                func1d=lambda pv: correction.correct(pvals=pv, method=method),
-                axis=-1,
-                arr=self.pval(idx0=idx0, idx1=idx1)
-            )
-            return qvals
-
-    def _pval_pairs(self, idx0, idx1):
-        """
-        Test-specific p-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :return: p-values
-        """
-        assert np.all([x < self._pval.shape[1] for x in idx0])
-        assert np.all([x < self._pval.shape[1] for x in idx1])
-        return self._pval[idx0, idx1]
-
-    def _qval_pairs(self, idx0, idx1, method="fdr_bh"):
-        """
-        Test-specific q-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        :return: q-values
-        """
-        return self._correction_pairs(idx0=idx0, idx1=idx1, method=method)
-
-    def _log_fold_change_pairs(self, idx0, idx1, base):
-        """
-        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :param base: Base of logarithm.
-        :return: log fold change values
-        """
-        assert np.all([x < self._pval.shape[1] for x in idx0])
-        assert np.all([x < self._pval.shape[1] for x in idx1])
-        if base == np.e:
-            return self._logfc[idx0, idx1]
-        else:
-            return self._logfc[idx0, idx1] / np.log(base)
-
-
-class DifferentialExpressionTestZTest(_DifferentialExpressionTestPairwiseBase):
-    """
-    Pairwise unit_test between more than 2 groups per gene.
-    """
-
-    model_estim: glm.typing.EstimatorBaseTyping
-    theta_mle: np.ndarray
-    theta_sd: np.ndarray
-
-    def __init__(
-            self,
-            model_estim: glm.typing.EstimatorBaseTyping,
-            grouping,
-            groups,
-            correction_type: str
-    ):
-        super().__init__(correction_type=correction_type)
-        self.model_estim = model_estim
-        self.grouping = grouping
-        self.groups = list(np.asarray(groups))
-
-        # Values of parameter estimates: coefficients x genes array with one coefficient per group
-        self._theta_mle = model_estim.a_var
-        # Standard deviation of estimates: coefficients x genes array with one coefficient per group
-        # Need .copy() here as nextafter needs mutabls copy.
-        theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
-        theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
-        self._theta_sd = np.sqrt(theta_sd)
-        self._logfc = None
-
-        # Call tests in constructor.
-        _ = self.pval
-        _ = self.qval
-
-    def _test(self, **kwargs):
-        groups = self.groups
-        num_features = self.model_estim.x.shape[1]
-
-        pvals = np.tile(np.NaN, [len(groups), len(groups), num_features])
-        pvals[np.eye(pvals.shape[0]).astype(bool)] = 1
-
-        theta_mle = self._theta_mle
-        theta_sd = self._theta_sd
-
-        for i, g1 in enumerate(groups):
-            for j, g2 in enumerate(groups[(i + 1):]):
-                j = j + i + 1
-
-                pvals[i, j] = stats.two_coef_z_test(theta_mle0=theta_mle[i], theta_mle1=theta_mle[j],
-                                                    theta_sd0=theta_sd[i], theta_sd1=theta_sd[j])
-                pvals[j, i] = pvals[i, j]
-
-        return pvals
-
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return np.asarray(self.model_estim.input_data.features)
-
-    @property
-    def x(self):
-        return self.model_estim.x
-
-    @property
-    def log_likelihood(self):
-        return self.model_estim.log_likelihood
-
-    @property
-    def model_gradient(self):
-        return self.model_estim.jacobian
-
-    def _ave(self):
-        """
-        Returns the mean expression by gene
-
-        :return: np.ndarray
-        """
-
-        return np.asarray(np.mean(self.model_estim.x, axis=0)).flatten()
-
-    def _pval_pairs(self, idx0, idx1):
-        """
-        Test-specific p-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :return: p-values
-        """
-        assert np.all([x < self.pval.shape[1] for x in idx0])
-        assert np.all([x < self.pval.shape[1] for x in idx1])
-        return self.pval[idx0, idx1]
-
-    def _qval_pairs(self, idx0, idx1, method="fdr_bh"):
-        """
-        Test-specific q-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :param method: Multiple testing correction method.
-            TODO not used yet.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        :return: q-values
-        """
-        assert np.all([x < self.qval.shape[1] for x in idx0])
-        assert np.all([x < self.qval.shape[1] for x in idx1])
-        return self.qval[idx0, idx1]
-
-    def _log_fold_change_pairs(self, idx0, idx1, base):
-        """
-        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :param base: Base of logarithm.
-        :return: log fold change values
-        """
-        if self._logfc is None:
-            groups = self.groups
-            num_features = self.model_estim.x.shape[1]
-
-            logfc = np.tile(np.NaN, [len(groups), len(groups), num_features])
-            logfc[np.eye(logfc.shape[0]).astype(bool)] = 0
-
-            theta_mle = self._theta_mle
-
-            for i, g1 in enumerate(groups):
-                for j, g2 in enumerate(groups[(i + 1):]):
-                    j = j + i + 1
-
-                    logfc[i, j] = theta_mle[j] - theta_mle[i]
-                    logfc[j, i] = -logfc[i, j]
-
-            self._logfc = logfc
-
-        if base == np.e:
-            return self._logfc[idx0, idx1]
-        else:
-            return self._logfc[idx0, idx1] / np.log(base)
-
-
-class DifferentialExpressionTestZTestLazy(_DifferentialExpressionTestPairwiseBase):
-    """
-    Pairwise unit_test between more than 2 groups per gene with lazy evaluation.
-
-    This class performs pairwise tests upon enquiry only and does not store them
-    and is therefore suited so very large group sets for which the lfc and
-    p-value matrices of the size [genes, groups, groups] are too big to fit into
-    memory.
-    """
-
-    model_estim: glm.typing.EstimatorBaseTyping
-    _theta_mle: np.ndarray
-    _theta_sd: np.ndarray
-
-    def __init__(
-            self,
-            model_estim: glm.typing.EstimatorBaseTyping,
-            grouping, groups,
-            correction_type="global"
-    ):
-        _DifferentialExpressionTestMulti.__init__(
-            self=self,
-            correction_type=correction_type
-        )
-        self.model_estim = model_estim
-        self.grouping = grouping
-        if isinstance(groups, list):
-            self.groups = groups
-        else:
-            self.groups = groups.tolist()
-
-        # Values of parameter estimates: coefficients x genes array with one coefficient per group
-        self._theta_mle = model_estim.a_var
-        # Standard deviation of estimates: coefficients x genes array with one coefficient per group
-        # Need .copy() here as nextafter needs mutabls copy.
-        theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
-        theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
-        self._theta_sd = np.sqrt(theta_sd)
-
-    def _correction_pairs(self, pvals, method="fdr_bh") -> np.ndarray:
-        """
-        Performs multiple testing corrections available in statsmodels.stats.multitest.multipletests().
-
-        This overwrites the parent function which uses self.pval which is not used in this
-        lazy implementation.
-
-        :param pvals: P-value array to correct.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        """
-        if self._correction_type.lower() == "global":
-            pval_shape = pvals.shape
-            pvals = np.reshape(pvals, -1)
-            qvals = correction.correct(pvals=pvals, method=method)
-            qvals = np.reshape(qvals, pval_shape)
-        elif self._correction_type.lower() == "by_test":
-            qvals = np.apply_along_axis(
-                func1d=lambda p: correction.correct(pvals=p, method=method),
-                axis=-1,
-                arr=pvals,
-            )
-        else:
-            raise ValueError("method " + method + " not recognized in _correction()")
-
-        return qvals
-
-    def _test(self, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        pass
-
-    def _test_pairs(self, idx0, idx1):
-        """
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :return: p-values
-        """
-        pvals = np.tile(np.NaN, [len(idx0), len(idx1), self.model_estim.x.shape[1]])
-        for i in idx0:
-            for j in idx1:
-                if i != j:
-                    pvals[i, j] = stats.two_coef_z_test(
-                        theta_mle0=self._theta_mle[i],
-                        theta_mle1=self._theta_mle[j],
-                        theta_sd0=self._theta_sd[i],
-                        theta_sd1=self._theta_sd[j]
-                    )
-                else:
-                    pvals[i, j] = 1
-
-        return pvals
-
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return np.asarray(self.model_estim.input_data.features)
-
-    @property
-    def x(self):
-        return self.model_estim.x
-
-    @property
-    def log_likelihood(self):
-        return self.model_estim.log_likelihood
-
-    @property
-    def model_gradient(self):
-        return self.model_estim.jacobian
-
-    def _ave(self):
-        """
-        Returns the mean expression by gene
-
-        :return: np.ndarray
-        """
-
-        return np.asarray(np.mean(self.model_estim.x, axis=0)).flatten()
-
-    @property
-    def pval(self, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        raise ValueError("This function is not available in lazy results evaluation as it would "
-                         "require all pairwise tests to be performed.")
-
-    @property
-    def qval(self, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        raise ValueError("This function is not available in lazy results evaluation as it would "
-                         "require all pairwise tests to be performed.")
-
-    def log_fold_change(self, base=np.e, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        raise ValueError("This function is not available in lazy results evaluation as it would "
-                         "require all pairwise tests to be performed.")
-
-    @property
-    def summary(self, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        raise ValueError("This function is not available in lazy results evaluation as it would "
-                         "require all pairwise tests to be performed.")
-
-    def _pval_pairs(self, idx0, idx1):
-        """
-        Test-specific p-values accessor for the comparison of groups0 to groups1.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :return: p-values
-        """
-        return self._test_pairs(idx0=idx0, idx1=idx1)
-
-    def _qval_pairs(self, idx0, idx1, method="fdr_bh"):
-        """
-        Test-specific q-values accessor for the comparison of both sets of groups.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        :return: q-values
-        """
-        return self._correction_pairs(pvals=self._pval_pairs(idx0=idx0, idx1=idx1), method=method)
-
-    def _log_fold_change_pairs(self, idx0, idx1, base):
-        """
-        Test-specific log fold change-values accessor for the comparison of both sets of groups.
-
-        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
-        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
-        :param base: Base of logarithm.
-        :return: log fold change values
-        """
-        logfc = np.zeros(shape=(len(idx0), len(idx1), self._theta_mle.shape[1]))
-        for i in idx0:
-            for j in idx1:
-                logfc[i, j, :] = self._theta_mle[i, :] - self._theta_mle[j, :]
-
-        if base == np.e:
-            return logfc
-        else:
-            return logfc / np.log(base)
-
-
 class DifferentialExpressionTestVsRest(_DifferentialExpressionTestMulti):
     """
     Tests between between each group and the rest for more than 2 groups per gene.
diff --git a/diffxpy/testing/det_pair.py b/diffxpy/testing/det_pair.py
new file mode 100644
index 0000000..895c697
--- /dev/null
+++ b/diffxpy/testing/det_pair.py
@@ -0,0 +1,624 @@
+import abc
+try:
+    import anndata
+except ImportError:
+    anndata = None
+import batchglm.api as glm
+import logging
+import numpy as np
+import pandas as pd
+from typing import List, Union
+
+from ..stats import stats
+from . import correction
+
+from .det import _DifferentialExpressionTestMulti
+
+logger = logging.getLogger("diffxpy")
+
+
+class _DifferentialExpressionTestPairwiseBase(_DifferentialExpressionTestMulti):
+    """
+    Pairwise differential expression tests base class.
+
+    Defines API of accessing test results of pairs of groups. The underlying accessors depend
+    on the type of test and are defined in the subclasses.
+    """
+
+    groups: List[str]
+    _pval: Union[None, np.ndarray]
+    _qval: Union[None, np.ndarray]
+    _logfc: Union[None, np.ndarray]
+
+    def _get_group_idx(self, groups0, groups1):
+        if np.any([x not in self.groups for x in groups0]):
+            raise ValueError('element of groups1 not recognized; not in groups %s' % self.groups)
+        if np.any([x not in self.groups for x in groups1]):
+            raise ValueError('element of groups2 not recognized; not in groups %s' % self.groups)
+
+        return np.array([self.groups.index(x) for x in groups0]), \
+               np.array([self.groups.index(x) for x in groups1])
+
+    def _correction_pairs(self, idx0, idx1, method):
+        if self._correction_type.lower() == "global":
+            pval = self._pval_pairs(idx0=idx0, idx1=idx1)
+            pval_reshape = np.reshape(pval, -1)
+            qvals = correction.correct(pvals=pval_reshape, method=method)
+            qvals = np.reshape(qvals, pval.shape)
+        elif self._correction_type.lower() == "by_test":
+            qvals = np.apply_along_axis(
+                func1d=lambda pv: correction.correct(pvals=pv, method=method),
+                axis=-1,
+                arr=self._pval_pairs(idx0=idx0, idx1=idx1)
+            )
+        return qvals
+
+    def log_fold_change(self, base=np.e, **kwargs):
+        if base == np.e:
+            return self._logfc
+        else:
+            return self._logfc / np.log(base)
+
+    @abc.abstractmethod
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        pass
+
+    def _qval_pairs(self, idx0, idx1, method="fdr_bh"):
+        """
+        Test-specific q-values accessor for the comparison of both sets of groups.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: q-values
+        """
+        return self._correction_pairs(idx0=idx0, idx1=idx1, method=method)
+
+    @abc.abstractmethod
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        pass
+
+    def pval_pairs(self, groups0, groups1):
+        """
+        Get p-values of the comparison of groups1 to groups2.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._pval_pairs(idx0=idx0, idx1=idx1)
+
+    def qval_pairs(self, groups0, groups1, method="fdr_bh"):
+        """
+        Get q-values of the comparison of groups1 to groups2.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: q-values
+        """
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._qval_pairs(idx0=idx0, idx1=idx1, method=method)
+
+    def log10_pval_pairs_clean(self, groups0, groups1, log10_threshold=-30):
+        """
+        Return log10 transformed and cleaned p-values.
+
+        NaN p-values are set to one and p-values below log10_threshold
+        in log10 space are set to log10_threshold.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param log10_threshold: minimal log10 p-value to return.
+        :return: Cleaned log10 transformed p-values.
+        """
+        pvals = np.reshape(self.pval_pairs(groups0=groups0, groups1=groups1), -1)
+        pvals = np.nextafter(0, 1, out=pvals, where=pvals == 0)
+        log10_pval_clean = np.log(pvals) / np.log(10)
+        log10_pval_clean[np.isnan(log10_pval_clean)] = 1
+        log10_pval_clean = np.clip(log10_pval_clean, log10_threshold, 0, log10_pval_clean)
+        return log10_pval_clean
+
+    def log10_qval_pairs_clean(self, groups0, groups1, log10_threshold=-30):
+        """
+        Return log10 transformed and cleaned q-values.
+
+        NaN p-values are set to one and q-values below log10_threshold
+        in log10 space are set to log10_threshold.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param log10_threshold: minimal log10 q-value to return.
+        :return: Cleaned log10 transformed q-values.
+        """
+        qvals = np.reshape(self.qval_pairs(groups0=groups0, groups1=groups1), -1)
+        qvals = np.nextafter(0, 1, out=qvals, where=qvals == 0)
+        log10_qval_clean = np.log(qvals) / np.log(10)
+        log10_qval_clean[np.isnan(log10_qval_clean)] = 1
+        log10_qval_clean = np.clip(log10_qval_clean, log10_threshold, 0, log10_qval_clean)
+        return log10_qval_clean
+
+    def log_fold_change_pairs(
+            self,
+            groups0,
+            groups1,
+            base=np.e
+    ):
+        """
+        Get log fold changes of the comparison of group1 and group2.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold changes
+        """
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._log_fold_change_pairs(idx0=idx0, idx1=idx1, base=base)
+
+    def summary(
+            self,
+            qval_thres=None,
+            fc_upper_thres=None,
+            fc_lower_thres=None,
+            mean_thres=None,
+            **kwargs
+    ) -> pd.DataFrame:
+        """
+        Summarize differential expression results into an output table.
+
+        :param qval_thres: Upper bound of corrected p-values for gene to be included.
+        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
+        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
+        :param mean_thres: Lower bound of average expression for gene to be included.
+        :return: Summary table of differential expression test.
+        """
+        res = super(_DifferentialExpressionTestMulti, self).summary(**kwargs)
+
+        return self._threshold_summary(
+            res=res,
+            qval_thres=qval_thres,
+            fc_upper_thres=fc_upper_thres,
+            fc_lower_thres=fc_lower_thres,
+            mean_thres=mean_thres
+        )
+
+    def summary_pairs(
+            self,
+            groups0,
+            groups1,
+            qval_thres=None,
+            fc_upper_thres=None,
+            fc_lower_thres=None,
+            mean_thres=None,
+            method="fdr_bh"
+    ) -> pd.DataFrame:
+        """
+        Summarize differential expression results into an output table.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param qval_thres: Upper bound of corrected p-values for gene to be included.
+        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
+        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
+        :param mean_thres: Lower bound of average expression for gene to be included.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: pandas.DataFrame with the following columns:
+            - gene: the gene id's
+            - pval: the minimum per-gene p-value of all tests
+            - qval: the minimum per-gene q-value of all tests
+            - log2fc: the maximal/minimal (depending on which one is higher) log2 fold change of the genes
+            - mean: the mean expression of the gene across all groups
+        """
+        assert self.gene_ids is not None
+
+        pval = self.pval_pairs(groups0=groups0, groups1=groups1)
+        qval = self.qval_pairs(groups0=groups0, groups1=groups1, method=method)
+
+        # calculate maximum logFC of lower triangular fold change matrix
+        raw_logfc = self.log_fold_change_pairs(groups0=groups0, groups1=groups1, base=2)
+
+        # first flatten all dimensions up to the last 'gene' dimension
+        flat_logfc = raw_logfc.reshape(-1, raw_logfc.shape[-1])
+        # next, get argmax of flattened logfc and unravel the true indices from it
+        r, c = np.unravel_index(flat_logfc.argmax(0), raw_logfc.shape[:2])
+        # if logfc is maximal in the lower triangular matrix, multiply it with -1
+        logfc = raw_logfc[r, c, np.arange(raw_logfc.shape[-1])] * np.where(r <= c, 1, -1)
+
+        res = pd.DataFrame({
+            "gene": self.gene_ids,
+            "pval": np.min(pval, axis=(0, 1)),
+            "qval": np.min(qval, axis=(0, 1)),
+            "log2fc": np.asarray(logfc),
+            "mean": np.asarray(self.mean)
+        })
+
+        res = self._threshold_summary(
+            res=res,
+            qval_thres=qval_thres,
+            fc_upper_thres=fc_upper_thres,
+            fc_lower_thres=fc_lower_thres,
+            mean_thres=mean_thres
+        )
+
+        return res
+
+
+class DifferentialExpressionTestPairwiseStandard(_DifferentialExpressionTestPairwiseBase):
+    """
+    Pairwise differential expression tests class for tests other than z-tests.
+    """
+
+    def __init__(
+            self,
+            gene_ids,
+            pval,
+            logfc,
+            ave,
+            groups,
+            tests,
+            correction_type: str
+    ):
+        super().__init__(correction_type=correction_type)
+        self._gene_ids = np.asarray(gene_ids)
+        self._logfc = logfc
+        self._pval = pval
+        self._mean = np.asarray(ave).flatten()
+        self.groups = list(np.asarray(groups))
+        self._tests = tests
+
+        _ = self.qval
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return self._gene_ids
+
+    @property
+    def x(self):
+        return None
+
+    @property
+    def tests(self):
+        """
+        If `keep_full_test_objs` was set to `True`, this will return a matrix of differential expression tests.
+        """
+        if self._tests is None:
+            raise ValueError("Individual tests were not kept!")
+
+        return self._tests
+
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        assert np.all([x < self._pval.shape[1] for x in idx0])
+        assert np.all([x < self._pval.shape[1] for x in idx1])
+        return self._pval[idx0, :, :][:, idx1, :]
+
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        assert np.all([x < self._pval.shape[1] for x in idx0])
+        assert np.all([x < self._pval.shape[1] for x in idx1])
+        if base == np.e:
+            return self._logfc[idx0, :, :][:, idx1, :]
+        else:
+            return self._logfc[idx0, :, :][:, idx1, :] / np.log(base)
+
+
+class DifferentialExpressionTestZTest(_DifferentialExpressionTestPairwiseBase):
+    """
+    Pairwise unit_test between more than 2 groups per gene. This close harbors tests that are precomputed
+    across all pairs of groups. DifferentialExpressionTestZTestLazy is the alternative class that only allows
+    lazy test evaluation.
+    """
+
+    model_estim: glm.typing.EstimatorBaseTyping
+    theta_mle: np.ndarray
+    theta_sd: np.ndarray
+
+    def __init__(
+            self,
+            model_estim: glm.typing.EstimatorBaseTyping,
+            grouping,
+            groups,
+            correction_type: str
+    ):
+        super().__init__(correction_type=correction_type)
+        self.model_estim = model_estim
+        self.grouping = grouping
+        self.groups = list(np.asarray(groups))
+
+        # Values of parameter estimates: coefficients x genes array with one coefficient per group
+        self._theta_mle = model_estim.a_var
+        # Standard deviation of estimates: coefficients x genes array with one coefficient per group
+        # Need .copy() here as nextafter needs mutabls copy.
+        theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
+        theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
+        self._theta_sd = np.sqrt(theta_sd)
+        self._logfc = None
+
+        # Call tests in constructor.
+        _ = self.pval
+        _ = self.qval
+
+    def _test(self, **kwargs):
+        pvals = np.tile(np.NaN, [len(self.groups), len(self.groups), self.model_estim.x.shape[1]])
+        for i, g1 in enumerate(self.groups):
+            for j, g2 in enumerate(self.groups[(i + 1):]):
+                j = j + i + 1
+                pvals[i, j, :] = stats.two_coef_z_test(
+                    theta_mle0=self._theta_mle[i, :],
+                    theta_mle1=self._theta_mle[j, :],
+                    theta_sd0=self._theta_sd[i, :],
+                    theta_sd1=self._theta_sd[j, :]
+                )
+                pvals[j, i, :] = pvals[i, j, :]
+
+        return pvals
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return np.asarray(self.model_estim.input_data.features)
+
+    @property
+    def x(self):
+        return self.model_estim.x
+
+    @property
+    def log_likelihood(self):
+        return self.model_estim.log_likelihood
+
+    @property
+    def model_gradient(self):
+        return self.model_estim.jacobian
+
+    def _ave(self):
+        """
+        Returns the mean expression by gene
+
+        :return: np.ndarray
+        """
+
+        return np.asarray(np.mean(self.model_estim.x, axis=0)).flatten()
+
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        assert np.all([x < self.pval.shape[1] for x in idx0])
+        assert np.all([x < self.pval.shape[1] for x in idx1])
+        return self.pval[idx0, idx1]
+
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        if self._logfc is None:
+            logfc = np.tile(np.NaN, [len(idx0), len(idx1), self.model_estim.x.shape[1]])
+            for i, xi in enumerate(idx0):
+                for j, xj in enumerate(idx1):
+                    logfc[i, j, :] = self._theta_mle[xj, :] - self._theta_mle[xi, :]
+                    logfc[j, i, :] = -logfc[i, j, :]
+            self._logfc = logfc
+
+        if base == np.e:
+            return self._logfc[idx0, :, :][:, idx1, :]
+        else:
+            return self._logfc[idx0, :, :][:, idx1, :] / np.log(base)
+
+
+class _DifferentialExpressionTestPairwiseLazyBase(_DifferentialExpressionTestPairwiseBase):
+    """
+    Lazy pairwise differential expression tests base class.
+
+    In addition to the method of a standard pairwise test, this base class throws errors for attributes
+    that are not accessible in a lazy pairwise test.
+    """
+
+    def _test(self, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    @property
+    def pval(self, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    @property
+    def qval(self, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    def log_fold_change(self, base=np.e, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    @property
+    def summary(self, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    @abc.abstractmethod
+    def _test_pairs(self, idx0, idx1):
+        """
+        Run differential expression tests on selected pairs of groups.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        pass
+
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups0 to groups1.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        return self._test_pairs(idx0=idx0, idx1=idx1)
+
+
+class DifferentialExpressionTestZTestLazy(_DifferentialExpressionTestPairwiseLazyBase):
+    """
+    Pairwise unit_test between more than 2 groups per gene with lazy evaluation.
+
+    This class performs pairwise tests upon enquiry only and does not store them
+    and is therefore suited so very large group sets for which the lfc and
+    p-value matrices of the size [genes, groups, groups] are too big to fit into
+    memory.
+    """
+
+    model_estim: glm.typing.EstimatorBaseTyping
+    _theta_mle: np.ndarray
+    _theta_sd: np.ndarray
+
+    def __init__(
+            self,
+            model_estim: glm.typing.EstimatorBaseTyping,
+            grouping, groups,
+            correction_type="global"
+    ):
+        _DifferentialExpressionTestMulti.__init__(
+            self=self,
+            correction_type=correction_type
+        )
+        self.model_estim = model_estim
+        self.grouping = grouping
+        if isinstance(groups, list):
+            self.groups = groups
+        else:
+            self.groups = groups.tolist()
+
+        # Values of parameter estimates: coefficients x genes array with one coefficient per group
+        self._theta_mle = model_estim.a_var
+        # Standard deviation of estimates: coefficients x genes array with one coefficient per group
+        # Need .copy() here as nextafter needs mutabls copy.
+        theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
+        theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
+        self._theta_sd = np.sqrt(theta_sd)
+
+    def _test_pairs(self, idx0, idx1):
+        """
+        Run differential expression tests on selected pairs of groups.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        pvals = np.tile(np.NaN, [len(idx0), len(idx1), self.model_estim.x.shape[1]])
+        for i, xi in enumerate(idx0):
+            for j, xj in enumerate(idx1):
+                if i != j:
+                    pvals[i, j, :] = stats.two_coef_z_test(
+                        theta_mle0=self._theta_mle[xi, :],
+                        theta_mle1=self._theta_mle[xj, :],
+                        theta_sd0=self._theta_sd[xi, :],
+                        theta_sd1=self._theta_sd[xj, :]
+                    )
+                else:
+                    pvals[i, j, :] = np.array([1.])
+
+        return pvals
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return np.asarray(self.model_estim.input_data.features)
+
+    @property
+    def x(self):
+        return self.model_estim.x
+
+    @property
+    def log_likelihood(self):
+        return self.model_estim.log_likelihood
+
+    @property
+    def model_gradient(self):
+        return self.model_estim.jacobian
+
+    def _ave(self):
+        """
+        Returns the mean expression by gene
+
+        :return: np.ndarray
+        """
+        return np.asarray(np.mean(self.model_estim.x, axis=0)).flatten()
+
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of both sets of groups.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        logfc = np.zeros(shape=(len(idx0), len(idx1), self._theta_mle.shape[1]))
+        for i, xi in enumerate(idx0):
+            for j, xj in enumerate(idx1):
+                logfc[i, j, :] = self._theta_mle[xi, :] - self._theta_mle[xj, :]
+
+        if base == np.e:
+            return logfc
+        else:
+            return logfc / np.log(base)
diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index decfb96..fa97d38 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -15,9 +15,10 @@
 from diffxpy.models.batch_bfgs.optim import Estim_BFGS
 from .det import DifferentialExpressionTestLRT, DifferentialExpressionTestWald, \
     DifferentialExpressionTestTT, DifferentialExpressionTestRank, _DifferentialExpressionTestSingle, \
-    DifferentialExpressionTestZTestLazy, DifferentialExpressionTestZTest, DifferentialExpressionTestPairwiseStandard, \
     DifferentialExpressionTestVsRest, _DifferentialExpressionTestMulti, DifferentialExpressionTestByPartition, \
     DifferentialExpressionTestWaldCont, DifferentialExpressionTestLRTCont
+from .det_pair import DifferentialExpressionTestZTestLazy, DifferentialExpressionTestZTest, \
+    DifferentialExpressionTestPairwiseStandard
 from .utils import parse_gene_names, parse_sample_description, parse_size_factors, parse_grouping, \
     constraint_system_from_star
 
diff --git a/diffxpy/unit_test/test_pairwise.py b/diffxpy/unit_test/test_pairwise.py
index 055873c..dd88008 100644
--- a/diffxpy/unit_test/test_pairwise.py
+++ b/diffxpy/unit_test/test_pairwise.py
@@ -75,11 +75,14 @@ def _test_null_distribution_basic(
         )
         if not lazy:
             _ = det.summary()
+            _ = det.pval
+            _ = det.qval
+            _ = det.log_fold_change()
         # Single pair accessors:
         _ = det.pval_pairs(groups0="0", groups1="1")
         _ = det.qval_pairs(groups0="0", groups1="1")
-        _ = det.log10_pval_pair_cleans(groups0="0", groups1="1")
-        _ = det.log10_qval_pair_cleans(groups0="0", groups1="1")
+        _ = det.log10_pval_pairs_clean(groups0="0", groups1="1")
+        _ = det.log10_qval_pairs_clean(groups0="0", groups1="1")
         _ = det.log_fold_change_pairs(groups0="0", groups1="1")
         _ = det.summary_pairs(groups0="0", groups1="1")
 

From 0a5f25f92e24fb382bc81c9478fc4fe2c8517fdf Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 28 Aug 2019 11:25:41 -0700
Subject: [PATCH 03/25] improved conintuous model interface

---
 diffxpy/testing/det.py               | 472 ------------------------
 diffxpy/testing/det_cont.py          | 512 +++++++++++++++++++++++++++
 diffxpy/testing/tests.py             |   7 +-
 diffxpy/unit_test/test_continuous.py | 189 +++++-----
 4 files changed, 618 insertions(+), 562 deletions(-)
 create mode 100644 diffxpy/testing/det_cont.py

diff --git a/diffxpy/testing/det.py b/diffxpy/testing/det.py
index 8c01ce6..f13310c 100644
--- a/diffxpy/testing/det.py
+++ b/diffxpy/testing/det.py
@@ -2854,475 +2854,3 @@ def summary(self, qval_thres=None, fc_upper_thres=None,
 
         return res
 
-
-class _DifferentialExpressionTestCont(_DifferentialExpressionTestSingle):
-    _de_test: _DifferentialExpressionTestSingle
-    _model_estim: glm.typing.EstimatorBaseTyping
-    _size_factors: np.ndarray
-    _continuous_coords: np.ndarray
-    _spline_coefs: list
-
-    def __init__(
-            self,
-            de_test: _DifferentialExpressionTestSingle,
-            model_estim: glm.typing.EstimatorBaseTyping,
-            size_factors: np.ndarray,
-            continuous_coords: np.ndarray,
-            spline_coefs: list,
-            noise_model: str
-    ):
-        self._de_test = de_test
-        self._model_estim = model_estim
-        self._size_factors = size_factors
-        self._continuous_coords = continuous_coords
-        self._spline_coefs = spline_coefs
-        self.noise_model = noise_model
-
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return self._de_test.gene_ids
-
-    @property
-    def x(self):
-        return self._de_test.x
-
-    @property
-    def pval(self) -> np.ndarray:
-        return self._de_test.pval
-
-    @property
-    def qval(self) -> np.ndarray:
-        return self._de_test.qval
-
-    @property
-    def mean(self) -> np.ndarray:
-        return self._de_test.mean
-
-    @property
-    def log_likelihood(self) -> np.ndarray:
-        return self._de_test.log_likelihood
-
-    def summary(
-            self,
-            nonnumeric=False,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param nonnumeric: Whether to include non-numeric covariates in fit.
-        """
-        # Collect summary from differential test object.
-        res = self._de_test.summary()
-        # Overwrite fold change with fold change from temporal model.
-        # Note that log2_fold_change calls log_fold_change from this class
-        # and not from the self._de_test object,
-        # which is called by self._de_test.summary().
-        res['log2fc'] = self.log2_fold_change()
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-    def log_fold_change(self, base=np.e, genes=None, nonnumeric=False):
-        """
-        Return log_fold_change based on fitted expression values by gene.
-
-        The log_fold_change is defined as the log of the fold change
-        from the minimal to the maximal fitted value by gene.
-
-        :param base: Basis of logarithm.
-        :param genes: Genes for which to return maximum fitted value. Defaults
-            to all genes if None.
-        :param nonnumeric: Whether to include non-numeric covariates in fit.
-        :return: Log-fold change of fitted expression value by gene.
-        """
-        if genes is None:
-            genes = np.asarray(range(self.x.shape[1]))
-        else:
-            genes = self._idx_genes(genes)
-
-        fc = self.max(genes=genes, non_numeric=nonnumeric) - self.min(genes=genes, non_numeric=nonnumeric)
-        fc = np.nextafter(0, 1, out=fc, where=fc == 0)
-
-        return np.log(fc) / np.log(base)
-
-    def _filter_genes_str(self, genes: list):
-        """
-        Filter genes indexed by ID strings by list of genes given in data set.
-
-        :param genes: List of genes to filter.
-        :return: Filtered list of genes
-        """
-        genes_found = np.array([x in self.gene_ids for x in genes])
-        if any(np.logical_not(genes_found)):
-            logger.info("did not find some genes, omitting")
-            genes = genes[genes_found]
-        return genes
-
-    def _filter_genes_int(self, genes: list):
-        """
-        Filter genes indexed by integers by gene list length.
-
-        :param genes: List of genes to filter.
-        :return: Filtered list of genes
-        """
-        genes_found = np.array([x < self.x.shape[1] for x in genes])
-        if any(genes_found == False):
-            logger.info("did not find some genes, omitting")
-            genes = genes[genes_found]
-        return genes
-
-    def _idx_genes(self, genes):
-        if not isinstance(genes, list):
-            if isinstance(genes, np.ndarray):
-                genes = genes.tolist()
-            else:
-                genes = [genes]
-
-        if isinstance(genes[0], str):
-            genes = self._filter_genes_str(genes)
-            genes = np.array([self.gene_ids.tolist().index(x) for x in genes])
-        elif isinstance(genes[0], int) or isinstance(genes[0], np.int64):
-            genes = self._filter_genes_int(genes)
-        else:
-            raise ValueError("only string and integer elements allowed in genes")
-        return genes
-
-    def _spline_par_loc_idx(self, intercept=True):
-        """
-        Get indices of spline basis model parameters in
-        entire location parameter model parameter set.
-
-        :param intercept: Whether to include intercept.
-        :return: Indices of spline basis parameters of location model.
-        """
-        par_loc_names = self._model_estim.design_loc.coords['design_loc_params'].values.tolist()
-        idx = [par_loc_names.index(x) for x in self._spline_coefs]
-        if 'Intercept' in par_loc_names and intercept == True:
-            idx = np.concatenate([np.where([[x == 'Intercept' for x in par_loc_names]])[0], idx])
-        return idx
-
-    def _continuous_model(self, idx, non_numeric=False):
-        """
-        Recover continuous fit for a gene.
-
-        :param idx: Index of genes to recover fit for.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Continuuos fit for each cell for given gene.
-        """
-        idx = np.asarray(idx)
-        if non_numeric:
-            mu = np.matmul(self._model_estim.input_data.design_loc,
-                           self._model_estim.par_link_loc[:, idx])
-            if self._size_factors is not None:
-                mu = mu + self._size_factors
-        else:
-            idx_basis = self._spline_par_loc_idx(intercept=True)
-            mu = np.matmul(self._model_estim.input_data.design_loc[:, idx_basis],
-                           self._model_estim.par_link_loc[idx_basis, idx])
-
-        mu = np.exp(mu)
-        return mu
-
-    def max(self, genes, non_numeric=False):
-        """
-        Return maximum fitted expression value by gene.
-
-        :param genes: Genes for which to return maximum fitted value.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Maximum fitted expression value by gene.
-        """
-        genes = self._idx_genes(genes)
-        return np.array([np.max(self._continuous_model(idx=i, non_numeric=non_numeric))
-                         for i in genes])
-
-    def min(self, genes, non_numeric=False):
-        """
-        Return minimum fitted expression value by gene.
-
-        :param genes: Genes for which to return maximum fitted value.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Maximum fitted expression value by gene.
-        """
-        genes = self._idx_genes(genes)
-        return np.array([np.min(self._continuous_model(idx=i, non_numeric=non_numeric))
-                         for i in genes])
-
-    def argmax(self, genes, non_numeric=False):
-        """
-        Return maximum fitted expression value by gene.
-
-        :param genes: Genes for which to return maximum fitted value.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Maximum fitted expression value by gene.
-        """
-        genes = self._idx_genes(genes)
-        idx = np.array([np.argmax(self._continuous_model(idx=i, non_numeric=non_numeric))
-                        for i in genes])
-        return self._continuous_coords[idx]
-
-    def argmin(self, genes, non_numeric=False):
-        """
-        Return minimum fitted expression value by gene.
-
-        :param genes: Genes for which to return maximum fitted value.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Maximum fitted expression value by gene.
-        """
-        genes = self._idx_genes(genes)
-        idx = np.array([np.argmin(self._continuous_model(idx=i, non_numeric=non_numeric))
-                        for i in genes])
-        return self._continuous_coords[idx]
-
-    def plot_genes(
-            self,
-            genes,
-            hue=None,
-            size=1,
-            log=True,
-            non_numeric=False,
-            save=None,
-            show=True,
-            ncols=2,
-            row_gap=0.3,
-            col_gap=0.25,
-            return_axs=False
-    ):
-        """
-        Plot observed data and spline fits of selected genes.
-
-        :param genes: Gene IDs to plot.
-        :param hue: Confounder to include in plot.
-        :param size: Point size.
-        :param log: Whether to log values.
-        :param non_numeric:
-        :param save: Path+file name stem to save plots to.
-            File will be save+"_genes.png". Does not save if save is None.
-        :param show: Whether to display plot.
-        :param ncols: Number of columns in plot grid if multiple genes are plotted.
-        :param row_gap: Vertical gap between panel rows relative to panel height.
-        :param col_gap: Horizontal gap between panel columns relative to panel width.
-        :param return_axs: Whether to return axis objects of plots.
-        :return: Matplotlib axis objects.
-        """
-
-        import seaborn as sns
-        import matplotlib.pyplot as plt
-        from matplotlib import gridspec
-        from matplotlib import rcParams
-
-        plt.ioff()
-
-        gene_idx = self._idx_genes(genes)
-
-        # Set up gridspec.
-        ncols = ncols if len(gene_idx) > ncols else len(gene_idx)
-        nrows = len(gene_idx) // ncols + (len(gene_idx) - (len(gene_idx) // ncols) * ncols)
-        gs = gridspec.GridSpec(
-            nrows=nrows,
-            ncols=ncols,
-            hspace=row_gap,
-            wspace=col_gap
-        )
-
-        # Define figure size based on panel number and grid.
-        fig = plt.figure(
-            figsize=(
-                ncols * rcParams['figure.figsize'][0],  # width in inches
-                nrows * rcParams['figure.figsize'][1] * (1 + row_gap)  # height in inches
-            )
-        )
-
-        # Build axis objects in loop.
-        axs = []
-        for i, g in enumerate(gene_idx):
-            ax = plt.subplot(gs[i])
-            axs.append(ax)
-
-            y = self.x[:, g]
-            yhat = self._continuous_model(idx=g, non_numeric=non_numeric)
-            if log:
-                y = np.log(y + 1)
-                yhat = np.log(yhat + 1)
-
-            sns.scatterplot(
-                x=self._continuous_coords,
-                y=y,
-                hue=hue,
-                size=size,
-                ax=ax,
-                legend=False
-            )
-            sns.lineplot(
-                x=self._continuous_coords,
-                y=yhat,
-                hue=hue,
-                ax=ax
-            )
-
-            ax.set_title(genes[i])
-            ax.set_xlabel("continuous")
-            if log:
-                ax.set_ylabel("log expression")
-            else:
-                ax.set_ylabel("expression")
-
-        # Save, show and return figure.
-        if save is not None:
-            plt.savefig(save+'_genes.png')
-
-        if show:
-            plt.show()
-
-        plt.close(fig)
-
-        if return_axs:
-            return axs
-        else:
-            return
-
-    def plot_heatmap(
-            self,
-            genes: Union[list, np.ndarray],
-            save=None,
-            show=True,
-            transform: str = "zscore",
-            nticks=10,
-            cmap: str = "YlGnBu",
-            width=10,
-            height_per_gene=0.5,
-            return_axs=False
-    ):
-        """
-        Plot observed data and spline fits of selected genes.
-
-        :param genes: Gene IDs to plot.
-        :param save: Path+file name stem to save plots to.
-            File will be save+"_genes.png". Does not save if save is None.
-        :param show: Whether to display plot.
-        :param transform: Gene-wise transform to use.
-        :param nticks: Number of x ticks.
-        :param cmap: matplotlib cmap.
-        :param width: Width of heatmap figure.
-        :param height_per_gene: Height of each row (gene) in heatmap figure.
-        :param return_axs: Whether to return axis objects of plots.
-        :return: Matplotlib axis objects.
-        """
-        import seaborn as sns
-        import matplotlib.pyplot as plt
-
-        plt.ioff()
-
-        gene_idx = self._idx_genes(genes)
-
-        # Define figure.
-        fig = plt.figure(figsize=(width, height_per_gene * len(gene_idx)))
-        ax = fig.add_subplot(111)
-
-        # Build heatmap matrix.
-        # Add in data.
-        data = np.array([
-            self._continuous_model(idx=g, non_numeric=False)
-            for i, g in enumerate(gene_idx)
-        ])
-        # Order columns by continuous covariate.
-        idx_x_sorted = np.argsort(self._continuous_coords)
-        data = data[:, idx_x_sorted]
-        xcoord = self._continuous_coords[idx_x_sorted]
-
-        if transform.lower() == "log10":
-            data = np.nextafter(0, 1, out=data, where=data == 0)
-            data = np.log(data) / np.log(10)
-        elif transform.lower() == "zscore":
-            mu = np.mean(data, axis=0)
-            sd = np.std(data, axis=0)
-            sd = np.nextafter(0, 1, out=sd, where=sd == 0)
-            data = np.array([(x - mu[i]) / sd[i] for i, x in enumerate(data)])
-        elif transform.lower() == "none":
-            pass
-        else:
-            raise ValueError("transform not recognized in plot_heatmap()")
-
-        # Create heatmap.
-        sns.heatmap(data=data, cmap=cmap, ax=ax)
-
-        # Set up axis labels.
-        xtick_pos = np.asarray(np.round(np.linspace(
-            start=0,
-            stop=data.shape[1] - 1,
-            num=nticks,
-            endpoint=True
-        )), dtype=int)
-        xtick_lab = [str(np.round(xcoord[np.argmin(np.abs(xcoord - xcoord[i]))], 2))
-                     for i in xtick_pos]
-        ax.set_xticks(xtick_pos)
-        ax.set_xticklabels(xtick_lab)
-        ax.set_xlabel("continuous")
-        plt.yticks(np.arange(len(genes)), genes, rotation='horizontal')
-        ax.set_ylabel("genes")
-
-        # Save, show and return figure.
-        if save is not None:
-            plt.savefig(save + '_genes.png')
-
-        if show:
-            plt.show()
-
-        plt.close(fig)
-
-        if return_axs:
-            return ax
-        else:
-            return
-
-
-class DifferentialExpressionTestWaldCont(_DifferentialExpressionTestCont):
-    de_test: DifferentialExpressionTestWald
-
-    def __init__(
-            self,
-            de_test: DifferentialExpressionTestWald,
-            size_factors: np.ndarray,
-            continuous_coords: np.ndarray,
-            spline_coefs: list,
-            noise_model: str,
-    ):
-        super().__init__(
-            de_test=de_test,
-            model_estim=de_test.model_estim,
-            size_factors=size_factors,
-            continuous_coords=continuous_coords,
-            spline_coefs=spline_coefs,
-            noise_model=noise_model
-        )
-
-
-class DifferentialExpressionTestLRTCont(_DifferentialExpressionTestCont):
-    de_test: DifferentialExpressionTestLRT
-
-    def __init__(
-            self,
-            de_test: DifferentialExpressionTestLRT,
-            size_factors: np.ndarray,
-            continuous_coords: np.ndarray,
-            spline_coefs: list,
-            noise_model: str
-    ):
-        super().__init__(
-            de_test=de_test,
-            model_estim=de_test.full_estim,
-            size_factors=size_factors,
-            continuous_coords=continuous_coords,
-            spline_coefs=spline_coefs,
-            noise_model=noise_model
-        )
diff --git a/diffxpy/testing/det_cont.py b/diffxpy/testing/det_cont.py
new file mode 100644
index 0000000..f3b5d59
--- /dev/null
+++ b/diffxpy/testing/det_cont.py
@@ -0,0 +1,512 @@
+import abc
+try:
+    import anndata
+except ImportError:
+    anndata = None
+import batchglm.api as glm
+import logging
+import numpy as np
+import pandas as pd
+from typing import Union
+
+from .det import _DifferentialExpressionTestSingle, DifferentialExpressionTestWald, DifferentialExpressionTestLRT
+
+logger = logging.getLogger("diffxpy")
+
+
+class _DifferentialExpressionTestCont(_DifferentialExpressionTestSingle):
+    _de_test: _DifferentialExpressionTestSingle
+    _model_estim: glm.typing.EstimatorBaseTyping
+    _size_factors: np.ndarray
+    _continuous_coords: np.ndarray
+    _spline_coefs: list
+
+    def __init__(
+            self,
+            de_test: _DifferentialExpressionTestSingle,
+            model_estim: glm.typing.EstimatorBaseTyping,
+            size_factors: np.ndarray,
+            continuous_coords: np.ndarray,
+            spline_coefs: list,
+            noise_model: str
+    ):
+        self._de_test = de_test
+        self._model_estim = model_estim
+        self._size_factors = size_factors
+        self._continuous_coords = continuous_coords
+        self._spline_coefs = spline_coefs
+        self.noise_model = noise_model
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return self._de_test.gene_ids
+
+    @property
+    def x(self):
+        return self._de_test.x
+
+    @property
+    def pval(self) -> np.ndarray:
+        return self._de_test.pval
+
+    @property
+    def qval(self) -> np.ndarray:
+        return self._de_test.qval
+
+    @property
+    def mean(self) -> np.ndarray:
+        return self._de_test.mean
+
+    @property
+    def log_likelihood(self) -> np.ndarray:
+        return self._de_test.log_likelihood
+
+    def summary(
+            self,
+            non_numeric=False,
+            qval_thres=None,
+            fc_upper_thres=None,
+            fc_lower_thres=None,
+            mean_thres=None
+    ) -> pd.DataFrame:
+        """
+        Summarize differential expression results into an output table.
+
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        """
+        # Collect summary from differential test object.
+        res = self._de_test.summary()
+        # Overwrite fold change with fold change from temporal model.
+        # Note that log2_fold_change calls log_fold_change from this class
+        # and not from the self._de_test object,
+        # which is called by self._de_test.summary().
+        res['log2fc'] = self.log2_fold_change()
+
+        res = self._threshold_summary(
+            res=res,
+            qval_thres=qval_thres,
+            fc_upper_thres=fc_upper_thres,
+            fc_lower_thres=fc_lower_thres,
+            mean_thres=mean_thres
+        )
+
+        return res
+
+    def log_fold_change(self, base=np.e, genes=None, non_numeric=False):
+        """
+        Return log_fold_change based on fitted expression values by gene.
+
+        The log_fold_change is defined as the log of the fold change
+        from the minimal to the maximal fitted value by gene.
+
+        :param base: Basis of logarithm.
+        :param genes: Genes for which to return maximum fitted value. Defaults
+            to all genes if None.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Log-fold change of fitted expression value by gene.
+        """
+        if genes is None:
+            genes = np.asarray(range(self.x.shape[1]))
+        else:
+            genes = self._idx_genes(genes)
+
+        max_val = self.max(genes=genes, non_numeric=non_numeric)
+        min_val = self.min(genes=genes, non_numeric=non_numeric)
+        max_val = np.nextafter(0, 1, out=max_val, where=max_val == 0)
+        min_val = np.nextafter(0, 1, out=min_val, where=min_val == 0)
+        return (np.log(max_val) - np.log(min_val)) / np.log(base)
+
+    def log2_fold_change(self, genes=None, non_numeric=False):
+        """
+        Calculates the pairwise log_2 fold change(s) for this DifferentialExpressionTest.
+
+        :param genes: Genes for which to return maximum fitted value. Defaults
+            to all genes if None.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Log-fold change of fitted expression value by gene.
+        """
+        return self.log_fold_change(base=2, genes=genes, non_numeric=non_numeric)
+
+    def log10_fold_change(self, genes=None, non_numeric=False):
+        """
+        Calculates the log_10 fold change(s) for this DifferentialExpressionTest.
+
+        :param genes: Genes for which to return maximum fitted value. Defaults
+            to all genes if None.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Log-fold change of fitted expression value by gene.
+        """
+        return self.log_fold_change(base=10, genes=genes, non_numeric=non_numeric)
+
+    def _filter_genes_str(self, genes: list):
+        """
+        Filter genes indexed by ID strings by list of genes given in data set.
+
+        :param genes: List of genes to filter.
+        :return: Filtered list of genes
+        """
+        genes_found = np.array([x in self.gene_ids for x in genes])
+        if any(np.logical_not(genes_found)):
+            logger.info("did not find some genes, omitting")
+            genes = genes[genes_found]
+        return genes
+
+    def _filter_genes_int(self, genes: list):
+        """
+        Filter genes indexed by integers by gene list length.
+
+        :param genes: List of genes to filter.
+        :return: Filtered list of genes
+        """
+        genes_found = np.array([x < self.x.shape[1] for x in genes])
+        if any(np.logical_not(genes_found)):
+            logger.info("did not find some genes, omitting")
+            genes = genes[genes_found]
+        return genes
+
+    def _idx_genes(self, genes):
+        if not isinstance(genes, list):
+            if isinstance(genes, np.ndarray):
+                genes = genes.tolist()
+            elif isinstance(genes, pd.Index):
+                genes = genes.tolist()
+            else:
+                genes = [genes]
+
+        if isinstance(genes[0], str):
+            genes = self._filter_genes_str(genes)
+            genes = np.array([self.gene_ids.tolist().index(x) for x in genes])
+        elif isinstance(genes[0], int) or isinstance(genes[0], np.int64):
+            genes = self._filter_genes_int(genes)
+        else:
+            raise ValueError("only string and integer elements allowed in genes")
+        return genes
+
+    def _spline_par_loc_idx(self, intercept=True):
+        """
+        Get indices of spline basis model parameters in
+        entire location parameter model parameter set.
+
+        :param intercept: Whether to include intercept.
+        :return: Indices of spline basis parameters of location model.
+        """
+        par_loc_names = self._model_estim.input_data.loc_names
+        idx = [par_loc_names.index(x) for x in self._spline_coefs]
+        if 'Intercept' in par_loc_names and intercept:
+            idx = np.concatenate([np.where([[x == 'Intercept' for x in par_loc_names]])[0], idx])
+        return idx
+
+    def _continuous_model(self, idx, non_numeric=False):
+        """
+        Recover continuous fit for a gene.
+
+        :param idx: Index of genes to recover fit for.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Continuuos fit for each cell for given gene.
+        """
+        idx = np.asarray(idx)
+        if non_numeric:
+            mu = np.matmul(self._model_estim.input_data.design_loc,
+                           self._model_estim.model.a[:, idx])
+            if self._size_factors is not None:
+                mu = mu + self._size_factors
+        else:
+            idx_basis = self._spline_par_loc_idx(intercept=True)
+            mu = np.matmul(self._model_estim.input_data.design_loc[:, idx_basis],
+                           self._model_estim.model.a[idx_basis, idx])
+
+        mu = np.exp(mu)
+        return mu
+
+    def max(self, genes, non_numeric=False):
+        """
+        Return maximum fitted expression value by gene.
+
+        :param genes: Genes for which to return maximum fitted value.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Maximum fitted expression value by gene.
+        """
+        genes = self._idx_genes(genes)
+        return np.array([np.max(self._continuous_model(idx=i, non_numeric=non_numeric))
+                         for i in genes])
+
+    def min(self, genes, non_numeric=False):
+        """
+        Return minimum fitted expression value by gene.
+
+        :param genes: Genes for which to return maximum fitted value.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Maximum fitted expression value by gene.
+        """
+        genes = self._idx_genes(genes)
+        return np.array([np.min(self._continuous_model(idx=i, non_numeric=non_numeric))
+                         for i in genes])
+
+    def argmax(self, genes, non_numeric=False):
+        """
+        Return maximum fitted expression value by gene.
+
+        :param genes: Genes for which to return maximum fitted value.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Maximum fitted expression value by gene.
+        """
+        genes = self._idx_genes(genes)
+        idx = np.array([np.argmax(self._continuous_model(idx=i, non_numeric=non_numeric))
+                        for i in genes])
+        return self._continuous_coords[idx]
+
+    def argmin(self, genes, non_numeric=False):
+        """
+        Return minimum fitted expression value by gene.
+
+        :param genes: Genes for which to return maximum fitted value.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Maximum fitted expression value by gene.
+        """
+        genes = self._idx_genes(genes)
+        idx = np.array([np.argmin(self._continuous_model(idx=i, non_numeric=non_numeric))
+                        for i in genes])
+        return self._continuous_coords[idx]
+
+    def plot_genes(
+            self,
+            genes,
+            hue=None,
+            size=1,
+            log=True,
+            non_numeric=False,
+            save=None,
+            show=True,
+            ncols=2,
+            row_gap=0.3,
+            col_gap=0.25,
+            return_axs=False
+    ):
+        """
+        Plot observed data and spline fits of selected genes.
+
+        :param genes: Gene IDs to plot.
+        :param hue: Confounder to include in plot.
+        :param size: Point size.
+        :param log: Whether to log values.
+        :param non_numeric:
+        :param save: Path+file name stem to save plots to.
+            File will be save+"_genes.png". Does not save if save is None.
+        :param show: Whether to display plot.
+        :param ncols: Number of columns in plot grid if multiple genes are plotted.
+        :param row_gap: Vertical gap between panel rows relative to panel height.
+        :param col_gap: Horizontal gap between panel columns relative to panel width.
+        :param return_axs: Whether to return axis objects of plots.
+        :return: Matplotlib axis objects.
+        """
+
+        import seaborn as sns
+        import matplotlib.pyplot as plt
+        from matplotlib import gridspec
+        from matplotlib import rcParams
+
+        plt.ioff()
+
+        gene_idx = self._idx_genes(genes)
+
+        # Set up gridspec.
+        ncols = ncols if len(gene_idx) > ncols else len(gene_idx)
+        nrows = len(gene_idx) // ncols + (len(gene_idx) - (len(gene_idx) // ncols) * ncols)
+        gs = gridspec.GridSpec(
+            nrows=nrows,
+            ncols=ncols,
+            hspace=row_gap,
+            wspace=col_gap
+        )
+
+        # Define figure size based on panel number and grid.
+        fig = plt.figure(
+            figsize=(
+                ncols * rcParams['figure.figsize'][0],  # width in inches
+                nrows * rcParams['figure.figsize'][1] * (1 + row_gap)  # height in inches
+            )
+        )
+
+        # Build axis objects in loop.
+        axs = []
+        for i, g in enumerate(gene_idx):
+            ax = plt.subplot(gs[i])
+            axs.append(ax)
+
+            y = self.x[:, g]
+            yhat = self._continuous_model(idx=g, non_numeric=non_numeric)
+            if log:
+                y = np.log(y + 1)
+                yhat = np.log(yhat + 1)
+
+            sns.scatterplot(
+                x=self._continuous_coords,
+                y=y,
+                hue=hue,
+                size=size,
+                ax=ax,
+                legend=False
+            )
+            sns.lineplot(
+                x=self._continuous_coords,
+                y=yhat,
+                hue=hue,
+                ax=ax
+            )
+
+            ax.set_title(genes[i])
+            ax.set_xlabel("continuous")
+            if log:
+                ax.set_ylabel("log expression")
+            else:
+                ax.set_ylabel("expression")
+
+        # Save, show and return figure.
+        if save is not None:
+            plt.savefig(save+'_genes.png')
+
+        if show:
+            plt.show()
+
+        plt.close(fig)
+
+        if return_axs:
+            return axs
+        else:
+            return
+
+    def plot_heatmap(
+            self,
+            genes: Union[list, np.ndarray],
+            save=None,
+            show=True,
+            transform: str = "zscore",
+            nticks=10,
+            cmap: str = "YlGnBu",
+            width=10,
+            height_per_gene=0.5,
+            return_axs=False
+    ):
+        """
+        Plot observed data and spline fits of selected genes.
+
+        :param genes: Gene IDs to plot.
+        :param save: Path+file name stem to save plots to.
+            File will be save+"_genes.png". Does not save if save is None.
+        :param show: Whether to display plot.
+        :param transform: Gene-wise transform to use.
+        :param nticks: Number of x ticks.
+        :param cmap: matplotlib cmap.
+        :param width: Width of heatmap figure.
+        :param height_per_gene: Height of each row (gene) in heatmap figure.
+        :param return_axs: Whether to return axis objects of plots.
+        :return: Matplotlib axis objects.
+        """
+        import seaborn as sns
+        import matplotlib.pyplot as plt
+
+        plt.ioff()
+
+        gene_idx = self._idx_genes(genes)
+
+        # Define figure.
+        fig = plt.figure(figsize=(width, height_per_gene * len(gene_idx)))
+        ax = fig.add_subplot(111)
+
+        # Build heatmap matrix.
+        # Add in data.
+        data = np.array([
+            self._continuous_model(idx=g, non_numeric=False)
+            for i, g in enumerate(gene_idx)
+        ])
+        # Order columns by continuous covariate.
+        idx_x_sorted = np.argsort(self._continuous_coords)
+        data = data[:, idx_x_sorted]
+        xcoord = self._continuous_coords[idx_x_sorted]
+
+        if transform.lower() == "log10":
+            data = np.nextafter(0, 1, out=data, where=data == 0)
+            data = np.log(data) / np.log(10)
+        elif transform.lower() == "zscore":
+            mu = np.mean(data, axis=0)
+            sd = np.std(data, axis=0)
+            sd = np.nextafter(0, 1, out=sd, where=sd == 0)
+            data = np.array([(x - mu[i]) / sd[i] for i, x in enumerate(data)])
+        elif transform.lower() == "none":
+            pass
+        else:
+            raise ValueError("transform not recognized in plot_heatmap()")
+
+        # Create heatmap.
+        sns.heatmap(data=data, cmap=cmap, ax=ax)
+
+        # Set up axis labels.
+        xtick_pos = np.asarray(np.round(np.linspace(
+            start=0,
+            stop=data.shape[1] - 1,
+            num=nticks,
+            endpoint=True
+        )), dtype=int)
+        xtick_lab = [str(np.round(xcoord[np.argmin(np.abs(xcoord - xcoord[i]))], 2))
+                     for i in xtick_pos]
+        ax.set_xticks(xtick_pos)
+        ax.set_xticklabels(xtick_lab)
+        ax.set_xlabel("continuous")
+        plt.yticks(np.arange(len(genes)), genes, rotation='horizontal')
+        ax.set_ylabel("genes")
+
+        # Save, show and return figure.
+        if save is not None:
+            plt.savefig(save + '_genes.png')
+
+        if show:
+            plt.show()
+
+        plt.close(fig)
+
+        if return_axs:
+            return ax
+        else:
+            return
+
+
+class DifferentialExpressionTestWaldCont(_DifferentialExpressionTestCont):
+    de_test: DifferentialExpressionTestWald
+
+    def __init__(
+            self,
+            de_test: DifferentialExpressionTestWald,
+            size_factors: np.ndarray,
+            continuous_coords: np.ndarray,
+            spline_coefs: list,
+            noise_model: str,
+    ):
+        super(DifferentialExpressionTestWaldCont, self).__init__(
+            de_test=de_test,
+            model_estim=de_test.model_estim,
+            size_factors=size_factors,
+            continuous_coords=continuous_coords,
+            spline_coefs=spline_coefs,
+            noise_model=noise_model
+        )
+
+
+class DifferentialExpressionTestLRTCont(_DifferentialExpressionTestCont):
+    de_test: DifferentialExpressionTestLRT
+
+    def __init__(
+            self,
+            de_test: DifferentialExpressionTestLRT,
+            size_factors: np.ndarray,
+            continuous_coords: np.ndarray,
+            spline_coefs: list,
+            noise_model: str
+    ):
+        super().__init__(
+            de_test=de_test,
+            model_estim=de_test.full_estim,
+            size_factors=size_factors,
+            continuous_coords=continuous_coords,
+            spline_coefs=spline_coefs,
+            noise_model=noise_model
+        )
diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 3242e09..f3462ec 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -16,8 +16,8 @@
 from .det import DifferentialExpressionTestLRT, DifferentialExpressionTestWald, \
     DifferentialExpressionTestTT, DifferentialExpressionTestRank, _DifferentialExpressionTestSingle, \
     DifferentialExpressionTestZTestLazy, DifferentialExpressionTestZTest, DifferentialExpressionTestPairwise, \
-    DifferentialExpressionTestVsRest, _DifferentialExpressionTestMulti, DifferentialExpressionTestByPartition, \
-    DifferentialExpressionTestWaldCont, DifferentialExpressionTestLRTCont
+    DifferentialExpressionTestVsRest, _DifferentialExpressionTestMulti, DifferentialExpressionTestByPartition
+from .det_cont import DifferentialExpressionTestWaldCont, DifferentialExpressionTestLRTCont
 from .utils import parse_gene_names, parse_sample_description, parse_size_factors, parse_grouping, \
     constraint_system_from_star
 
@@ -2028,7 +2028,8 @@ def continuous_1d(
             de_test=de_test,
             size_factors=size_factors,
             continuous_coords=sample_description[continuous].values,
-            spline_coefs=new_coefs
+            spline_coefs=new_coefs,
+            noise_model=noise_model
         )
     else:
         raise ValueError('continuous(): Parameter `test` not recognized.')
diff --git a/diffxpy/unit_test/test_continuous.py b/diffxpy/unit_test/test_continuous.py
index 57ad345..20552e1 100644
--- a/diffxpy/unit_test/test_continuous.py
+++ b/diffxpy/unit_test/test_continuous.py
@@ -9,23 +9,38 @@
 import diffxpy.api as de
 
 
-class TestContinuous(unittest.TestCase):
+class _TestContinuous:
 
-    def test_forfatal_functions(self):
-        """
-        Test if de.test.continuous() DifferentialExpressionTestSingle object functions work fine.
-
-        :param n_cells: Number of cells to simulate (number of observations per test).
-        :param n_genes: Number of genes to simulate (number of tests).
-        """
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+    noise_model: str
 
-        num_observations = 10
-        num_features = 2
-
-        sim = Simulator(num_observations=num_observations, num_features=num_features)
+    def _fit_continuous(
+        self,
+        sim,
+        sample_description,
+        test
+    ):
+        test = de.test.continuous_1d(
+            data=sim.input_data,
+            sample_description=sample_description,
+            gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
+            continuous="pseudotime",
+            df=3,
+            formula_loc="~ 1 + pseudotime + batch",
+            formula_scale="~ 1",
+            factor_loc_totest="pseudotime",
+            test=test,
+            quick_scale=True,
+            noise_model=self.noise_model
+        )
+        return test
+
+    def _test_null_model(
+            self,
+            nobs: int,
+            ngenes: int,
+            test: str
+    ):
+        sim = Simulator(num_observations=nobs, num_features=ngenes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
         sim.generate()
 
@@ -33,48 +48,49 @@ def test_forfatal_functions(self):
             "pseudotime": np.random.random(size=sim.nobs),
             "batch": np.random.randint(2, size=sim.nobs)
         })
-
-        test = de.test.continuous_1d(
-            data=sim.X,
-            continuous="pseudotime",
-            df=3,
-            formula_loc="~ 1 + pseudotime + batch",
-            formula_scale="~ 1",
-            factor_loc_totest="pseudotime",
-            test="wald",
+        return self._fit_continuous(
+            sim=sim,
             sample_description=random_sample_description,
-            quick_scale=True,
-            batch_size=None,
-            training_strategy="DEFAULT",
-            dtype="float64"
+            test=test
         )
 
-        summary = test.summary()
+    def _test_forfatal(self, test: str):
+        """
+        Test if de.test.continuous() DifferentialExpressionTestSingle object functions work fine.
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        test = self._test_null_model(nobs=10, ngenes=2, test=test)
         ids = test.gene_ids
 
         # 1. Test all additional functions which depend on model computation:
         # 1.1. Only continuous model:
-        temp = test.log_fold_change(genes=ids, nonnumeric=False)
-        temp = test.max(genes=ids, nonnumeric=False)
-        temp = test.min(genes=ids, nonnumeric=False)
-        temp = test.argmax(genes=ids, nonnumeric=False)
-        temp = test.argmin(genes=ids, nonnumeric=False)
-        temp = test.summary(nonnumeric=False)
+        _ = test.log_fold_change(genes=ids, non_numeric=False)
+        _ = test.max(genes=ids, non_numeric=False)
+        _ = test.min(genes=ids, non_numeric=False)
+        _ = test.argmax(genes=ids, non_numeric=False)
+        _ = test.argmin(genes=ids, non_numeric=False)
+        _ = test.summary(non_numeric=False)
         # 1.2. Full model:
-        temp = test.log_fold_change(genes=ids, nonnumeric=True)
-        temp = test.max(genes=ids, nonnumeric=True)
-        temp = test.min(genes=ids, nonnumeric=True)
-        temp = test.argmax(genes=ids, nonnumeric=True)
-        temp = test.argmin(genes=ids, nonnumeric=True)
-        temp = test.summary(nonnumeric=True)
+        _ = test.log_fold_change(genes=ids, non_numeric=True)
+        _ = test.max(genes=ids, non_numeric=True)
+        _ = test.min(genes=ids, non_numeric=True)
+        _ = test.argmax(genes=ids, non_numeric=True)
+        _ = test.argmin(genes=ids, non_numeric=True)
+        _ = test.summary(non_numeric=True)
 
         return True
 
-    def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 100):
+
+class TestContinuousNb(_TestContinuous, unittest.TestCase):
+
+    def test_forfatal_wald(self):
         """
         Test if de.test.continuous() generates a uniform p-value distribution in the wald test
         if it is given data simulated based on the null model. Returns the p-value
-        of the two-side Kolmgorov-Smirnov test for equality of the observed 
+        of the two-side Kolmgorov-Smirnov test for equality of the observed
         p-value distriubution and a uniform distribution.
 
         :param n_cells: Number of cells to simulate (number of observations per test).
@@ -84,29 +100,47 @@ def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 100):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
-        sim = Simulator(num_observations=n_cells, num_features=n_genes)
-        sim.generate_sample_description(num_batches=0, num_conditions=0)
-        sim.generate()
+        self.noise_model = "nb"
+        np.random.seed(1)
+        _ = self._test_forfatal(test="wald")
+        return True
 
-        random_sample_description = pd.DataFrame({
-            "pseudotime": np.random.random(size=sim.nobs)
-        })
+    def test_forfatal_lrt(self):
+        """
+        Test if de.test.continuous() generates a uniform p-value distribution in the wald test
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed
+        p-value distriubution and a uniform distribution.
 
-        test = de.test.continuous_1d(
-            data=sim.X,
-            continuous="pseudotime",
-            df=3,
-            formula_loc="~ 1 + pseudotime",
-            formula_scale="~ 1",
-            factor_loc_totest="pseudotime",
-            test="wald",
-            sample_description=random_sample_description,
-            quick_scale=True,
-            batch_size=None,
-            training_strategy="DEFAULT",
-            dtype="float64"
-        )
-        summary = test.summary()
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        _ = self._test_forfatal(test="lrt")
+        return True
+
+    def test_null_distribution_wald(self):
+        """
+        Test if de.test.continuous() generates a uniform p-value distribution in the wald test
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed 
+        p-value distriubution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        test = self._test_null_model(nobs=2000, ngenes=100, test="wald")
 
         # Compare p-value distribution under null model against uniform distribution.
         pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
@@ -130,29 +164,9 @@ def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100):
         logging.getLogger("batchglm").setLevel(logging.INFO)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
-        sim = Simulator(num_observations=n_cells, num_features=n_genes)
-        sim.generate_sample_description(num_batches=0, num_conditions=0)
-        sim.generate()
-
-        random_sample_description = pd.DataFrame({
-            "pseudotime": np.random.random(size=sim.nobs)
-        })
-
-        test = de.test.continuous_1d(
-            data=sim.X,
-            continuous="pseudotime",
-            df=3,
-            formula_loc="~ 1 + pseudotime",
-            formula_scale="~ 1",
-            factor_loc_totest="pseudotime",
-            test="lrt",
-            sample_description=random_sample_description,
-            quick_scale=False,
-            batch_size=None,
-            training_strategy="DEFAULT",
-            dtype="float64"
-        )
-        summary = test.summary()
+        self.noise_model = "nb"
+        np.random.seed(1)
+        test = self._test_null_model(nobs=2000, ngenes=100, test="lrt")
 
         # Compare p-value distribution under null model against uniform distribution.
         pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
@@ -162,5 +176,6 @@ def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100):
 
         return True
 
+
 if __name__ == '__main__':
     unittest.main()

From e97389e066e4068bf6084f48fe6d538f8a157237 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 28 Aug 2019 12:01:35 -0700
Subject: [PATCH 04/25] fixed error in plotting gene fits if data is sparse

---
 diffxpy/testing/det_cont.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/diffxpy/testing/det_cont.py b/diffxpy/testing/det_cont.py
index f3b5d59..b4b4436 100644
--- a/diffxpy/testing/det_cont.py
+++ b/diffxpy/testing/det_cont.py
@@ -7,6 +7,8 @@
 import logging
 import numpy as np
 import pandas as pd
+import scipy
+import scipy.sparse
 from typing import Union
 
 from .det import _DifferentialExpressionTestSingle, DifferentialExpressionTestWald, DifferentialExpressionTestLRT
@@ -334,6 +336,8 @@ def plot_genes(
             axs.append(ax)
 
             y = self.x[:, g]
+            if isinstance(y, scipy.sparse.csr_matrix):
+                y = np.asarray(y.todense())
             yhat = self._continuous_model(idx=g, non_numeric=non_numeric)
             if log:
                 y = np.log(y + 1)

From 9c0090525afe54dc2f1d290b503accc6a8493be9 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 28 Aug 2019 12:09:22 -0700
Subject: [PATCH 05/25] added missing flatten to array in plot by gene

---
 diffxpy/testing/det_cont.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diffxpy/testing/det_cont.py b/diffxpy/testing/det_cont.py
index b4b4436..8c0393b 100644
--- a/diffxpy/testing/det_cont.py
+++ b/diffxpy/testing/det_cont.py
@@ -337,7 +337,7 @@ def plot_genes(
 
             y = self.x[:, g]
             if isinstance(y, scipy.sparse.csr_matrix):
-                y = np.asarray(y.todense())
+                y = np.asarray(y.todense()).flatten()
             yhat = self._continuous_model(idx=g, non_numeric=non_numeric)
             if log:
                 y = np.log(y + 1)

From 1d445dccad0ef9c14cc5f42f05b18e917f7e5435 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 28 Aug 2019 17:05:16 -0700
Subject: [PATCH 06/25] supported interpolation x axis in plots

---
 diffxpy/testing/det_cont.py | 45 ++++++++++++++++++++++++-------------
 diffxpy/testing/tests.py    | 20 +++++++++++++++--
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/diffxpy/testing/det_cont.py b/diffxpy/testing/det_cont.py
index 8c0393b..3476c04 100644
--- a/diffxpy/testing/det_cont.py
+++ b/diffxpy/testing/det_cont.py
@@ -30,6 +30,7 @@ def __init__(
             size_factors: np.ndarray,
             continuous_coords: np.ndarray,
             spline_coefs: list,
+            interpolated_spline_basis: np.ndarray,
             noise_model: str
     ):
         self._de_test = de_test
@@ -37,6 +38,7 @@ def __init__(
         self._size_factors = size_factors
         self._continuous_coords = continuous_coords
         self._spline_coefs = spline_coefs
+        self._interpolated_spline_basis = interpolated_spline_basis
         self.noise_model = noise_model
 
     @property
@@ -207,6 +209,7 @@ def _continuous_model(self, idx, non_numeric=False):
         :return: Continuuos fit for each cell for given gene.
         """
         idx = np.asarray(idx)
+
         if non_numeric:
             mu = np.matmul(self._model_estim.input_data.design_loc,
                            self._model_estim.model.a[:, idx])
@@ -220,6 +223,20 @@ def _continuous_model(self, idx, non_numeric=False):
         mu = np.exp(mu)
         return mu
 
+    def _continuous_interpolation(self, idx):
+        """
+        Recover continuous fit for a gene.
+
+        :param idx: Index of genes to recover fit for.
+        :return: Continuuos fit for each cell for given gene.
+        """
+        idx = np.asarray(idx)
+        idx_basis = self._spline_par_loc_idx(intercept=True)
+        eta_loc = np.matmul(self._interpolated_spline_basis[:, :-1], self._model_estim.model.a[idx_basis, :][:, idx])
+        mu = np.exp(eta_loc)
+        t_eval = self._interpolated_spline_basis[:, -1]
+        return t_eval, mu
+
     def max(self, genes, non_numeric=False):
         """
         Return maximum fitted expression value by gene.
@@ -276,7 +293,6 @@ def plot_genes(
             hue=None,
             size=1,
             log=True,
-            non_numeric=False,
             save=None,
             show=True,
             ncols=2,
@@ -291,7 +307,6 @@ def plot_genes(
         :param hue: Confounder to include in plot.
         :param size: Point size.
         :param log: Whether to log values.
-        :param non_numeric:
         :param save: Path+file name stem to save plots to.
             File will be save+"_genes.png". Does not save if save is None.
         :param show: Whether to display plot.
@@ -338,7 +353,7 @@ def plot_genes(
             y = self.x[:, g]
             if isinstance(y, scipy.sparse.csr_matrix):
                 y = np.asarray(y.todense()).flatten()
-            yhat = self._continuous_model(idx=g, non_numeric=non_numeric)
+            t_continuous, yhat = self._continuous_interpolation(idx=g)
             if log:
                 y = np.log(y + 1)
                 yhat = np.log(yhat + 1)
@@ -352,7 +367,7 @@ def plot_genes(
                 legend=False
             )
             sns.lineplot(
-                x=self._continuous_coords,
+                x=t_continuous,
                 y=yhat,
                 hue=hue,
                 ax=ax
@@ -419,23 +434,17 @@ def plot_heatmap(
 
         # Build heatmap matrix.
         # Add in data.
-        data = np.array([
-            self._continuous_model(idx=g, non_numeric=False)
-            for i, g in enumerate(gene_idx)
-        ])
-        # Order columns by continuous covariate.
-        idx_x_sorted = np.argsort(self._continuous_coords)
-        data = data[:, idx_x_sorted]
-        xcoord = self._continuous_coords[idx_x_sorted]
+        xcoord, data = self._continuous_interpolation(idx=gene_idx)
+        data = data.T
 
         if transform.lower() == "log10":
             data = np.nextafter(0, 1, out=data, where=data == 0)
             data = np.log(data) / np.log(10)
         elif transform.lower() == "zscore":
-            mu = np.mean(data, axis=0)
-            sd = np.std(data, axis=0)
+            mu = np.mean(data, axis=0, keepdims=True)
+            sd = np.std(data, axis=0, keepdims=True)
             sd = np.nextafter(0, 1, out=sd, where=sd == 0)
-            data = np.array([(x - mu[i]) / sd[i] for i, x in enumerate(data)])
+            data = (data - mu) / sd
         elif transform.lower() == "none":
             pass
         else:
@@ -483,7 +492,8 @@ def __init__(
             size_factors: np.ndarray,
             continuous_coords: np.ndarray,
             spline_coefs: list,
-            noise_model: str,
+            interpolated_spline_basis: np.ndarray,
+            noise_model: str
     ):
         super(DifferentialExpressionTestWaldCont, self).__init__(
             de_test=de_test,
@@ -491,6 +501,7 @@ def __init__(
             size_factors=size_factors,
             continuous_coords=continuous_coords,
             spline_coefs=spline_coefs,
+            interpolated_spline_basis=interpolated_spline_basis,
             noise_model=noise_model
         )
 
@@ -504,6 +515,7 @@ def __init__(
             size_factors: np.ndarray,
             continuous_coords: np.ndarray,
             spline_coefs: list,
+            interpolated_spline_basis: np.ndarray,
             noise_model: str
     ):
         super().__init__(
@@ -512,5 +524,6 @@ def __init__(
             size_factors=size_factors,
             continuous_coords=continuous_coords,
             spline_coefs=spline_coefs,
+            interpolated_spline_basis=interpolated_spline_basis,
             noise_model=noise_model
         )
diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index f3462ec..517d69a 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -1722,7 +1722,7 @@ def continuous_1d(
         noise_model: str = 'nb',
         size_factors: np.ndarray = None,
         batch_size: int = None,
-        training_strategy: Union[str, List[Dict[str, object]], Callable] = "DEFAULT",
+        training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = None,
         dtype="float64",
         **kwargs
@@ -1906,6 +1906,21 @@ def continuous_1d(
     new_coefs = [continuous + str(i) for i in range(spline_basis.shape[1])]
     spline_basis.columns = new_coefs
     formula_extension = '+'.join(new_coefs)
+    # Generated interpolated spline bases.
+    # Safe interpolated interval in last column, need to extract later.
+    interpolated_interval = np.linspace(
+        np.min(sample_description[continuous].values),
+        np.max(sample_description[continuous].values),
+        100
+    )
+    interpolated_spline_basis = np.hstack([
+        np.ones([100, 1]),
+        patsy.highlevel.dmatrix(
+            "0+bs(" + continuous + ", df=" + str(df) + ")",
+            pd.DataFrame({continuous: interpolated_interval})
+        ).base,
+        np.expand_dims(interpolated_interval, axis=1)
+    ])
 
     # Replace continuous factor in formulas by spline basis coefficients.
     # Note that the brackets around formula_term_continuous propagate the sum
@@ -1978,7 +1993,8 @@ def continuous_1d(
             noise_model=noise_model,
             size_factors=size_factors,
             continuous_coords=sample_description[continuous].values,
-            spline_coefs=new_coefs
+            spline_coefs=new_coefs,
+            interpolated_spline_basis=interpolated_spline_basis
         )
     elif test.lower() == 'lrt':
         if noise_model is None:

From 01d446cb7c7a5b5076d2783a99e07c98d8618e47 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 28 Aug 2019 20:39:43 -0700
Subject: [PATCH 07/25] fixed constraints interface

---
 diffxpy/unit_test/test_constrained.py | 151 +++++---------------------
 1 file changed, 29 insertions(+), 122 deletions(-)

diff --git a/diffxpy/unit_test/test_constrained.py b/diffxpy/unit_test/test_constrained.py
index 78b4851..7b4143a 100644
--- a/diffxpy/unit_test/test_constrained.py
+++ b/diffxpy/unit_test/test_constrained.py
@@ -21,6 +21,7 @@ def test_forfatal_from_string(self):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
+        np.random.seed(1)
         n_cells = 2000
         n_genes = 2
 
@@ -39,21 +40,23 @@ def test_forfatal_from_string(self):
         coefficient_names = ['intercept', 'bio1', 'bio2', 'bio3', 'bio4', 'treatment1']
         dmat_est = pd.DataFrame(data=dmat, columns=coefficient_names)
 
-        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est)
-        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est)
+        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
+        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
 
         # Build constraints:
         constraints_loc = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_loc,
+            dmat=dmat_est_loc.values,
+            coef_names=dmat_est_loc.columns,
             constraints=["bio1+bio2=0", "bio3+bio4=0"]
         )
         constraints_scale = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_scale,
+            dmat=dmat_est_scale.values,
+            coef_names=dmat_est_scale.columns,
             constraints=["bio1+bio2=0", "bio3+bio4=0"]
         )
 
         test = de.test.wald(
-            data=sim.x,
+            data=sim.input_data,
             dmat_loc=dmat_est_loc,
             dmat_scale=dmat_est_scale,
             constraints_loc=constraints_loc,
@@ -70,6 +73,7 @@ def test_forfatal_from_dict(self):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
+        np.random.seed(1)
         n_cells = 2000
         n_genes = 2
 
@@ -83,26 +87,13 @@ def test_forfatal_from_dict(self):
             "batch": ["batch"+str(i // 500) for i in range(n_cells)]
         })
 
-        # Build constraints:
-        dmat_loc, constraints_loc = de.utils.constraint_matrix_from_dict(
-            sample_description=sample_description,
-            formula="~1+cond+batch",
-            constraints={"batch": "cond"},
-            dims=["design_loc_params", "loc_params"]
-        )
-        dmat_scale, constraints_scale = de.utils.constraint_matrix_from_dict(
-            sample_description=sample_description,
-            formula="~1+cond+batch",
-            constraints={"batch": "cond"},
-            dims=["design_scale_params", "scale_params"]
-        )
-
         test = de.test.wald(
-            data=sim.x,
-            dmat_loc=dmat_loc,
-            dmat_scale=dmat_scale,
-            constraints_loc=constraints_loc,
-            constraints_scale=constraints_scale,
+            data=sim.input_data,
+            sample_description=sample_description,
+            formula_loc="~1+cond+batch",
+            formula_scale="~1+cond+batch",
+            constraints_loc={"batch": "cond"},
+            constraints_scale={"batch": "cond"},
             coef_to_test=["cond[T.cond1]"]
         )
         _ = test.summary()
@@ -122,8 +113,8 @@ def test_null_distribution_wald_constrained(self, n_genes: int = 100):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
+        np.random.seed(1)
         n_cells = 2000
-
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
         sim.generate()
@@ -134,26 +125,13 @@ def test_null_distribution_wald_constrained(self, n_genes: int = 100):
             "batch": ["batch" + str(i // 500) for i in range(n_cells)]
         })
 
-        # Build constraints:
-        dmat_loc, constraints_loc = de.utils.constraint_matrix_from_dict(
-            sample_description=sample_description,
-            formula="~1+cond+batch",
-            constraints={"batch": "cond"},
-            dims=["design_loc_params", "loc_params"]
-        )
-        dmat_scale, constraints_scale = de.utils.constraint_matrix_from_dict(
-            sample_description=sample_description,
-            formula="~1+cond+batch",
-            constraints={"batch": "cond"},
-            dims=["design_scale_params", "scale_params"]
-        )
-
         test = de.test.wald(
-            data=sim.x,
-            dmat_loc=dmat_loc,
-            dmat_scale=dmat_scale,
-            constraints_loc=constraints_loc,
-            constraints_scale=constraints_scale,
+            data=sim.input_data,
+            sample_description=sample_description,
+            formula_loc="~1+cond+batch",
+            formula_scale="~1+cond+batch",
+            constraints_loc={"batch": "cond"},
+            constraints_scale={"batch": "cond"},
             coef_to_test=["cond[T.cond1]"]
         )
         _ = test.summary()
@@ -181,8 +159,8 @@ def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
+        np.random.seed(1)
         n_cells = 12000
-
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
         sim.generate()
@@ -213,12 +191,13 @@ def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
                              'tech1', 'tech2', 'tech3', 'tech4']
         dmat_est = pd.DataFrame(data=dmat, columns=coefficient_names)
 
-        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est)
-        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est.iloc[:, [0]])
+        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
+        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est.iloc[:, [0]], return_type="dataframe")
 
         # Build constraints:
         constraints_loc = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_loc,
+            dmat=dmat_est_loc.values,
+            coef_names=dmat_est_loc.columns,
             constraints=["bio1+bio2=0",
                          "bio3+bio4=0",
                          "bio5+bio6=0",
@@ -229,92 +208,20 @@ def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
         constraints_scale = None
 
         test = de.test.wald(
-            data=sim.x,
+            data=sim.input_data,
             dmat_loc=dmat_est_loc,
             dmat_scale=dmat_est_scale,
             constraints_loc=constraints_loc,
             constraints_scale=constraints_scale,
             coef_to_test=["treatment1"]
         )
-        summary = test.summary()
-
-        # Compare p-value distribution under null model against uniform distribution.
-        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
-
-        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
-
-        return True
-
-    def test_null_distribution_wald_multi_constrained_2layer(self, n_genes: int = 50):
-        """
-        Test if de.wald() for multiple coefficients with constraints
-        generates a uniform p-value distribution
-        if it is given data simulated based on the null model. Returns the p-value
-        of the two-side Kolmgorov-Smirnov test for equality of the observed
-        p-value distribution and a uniform distribution.
-
-        n_cells is constant as the design matrix and constraints depend on it.
-
-        :param n_genes: Number of genes to simulate (number of tests).
-        """
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("diffxpy").setLevel(logging.WARNING)
-
-        n_cells = 3000
-
-        sim = Simulator(num_observations=n_cells, num_features=n_genes)
-        sim.generate_sample_description(num_batches=0, num_conditions=0)
-        sim.generate()
-
-        # Build design matrix:
-        dmat = np.zeros([n_cells, 9])
-        dmat[:, 0] = 1
-        dmat[:500, 1] = 1  # bio rep 1
-        dmat[500:1000, 2] = 1  # bio rep 2
-        dmat[1000:1500, 3] = 1  # bio rep 3
-        dmat[1500:2000, 4] = 1  # bio rep 4
-        dmat[2000:2500, 5] = 1  # bio rep 5
-        dmat[2500:3000, 6] = 1  # bio rep 6
-        dmat[1000:2000, 7] = 1  # condition effect 1
-        dmat[2000:3000, 8] = 1  # condition effect 2
-        coefficient_names = ['intercept', 'bio1', 'bio2', 'bio3', 'bio4',
-                             'bio5', 'bio6', 'treatment1', 'treatment2']
-        dmat_est = pd.DataFrame(data=dmat, columns=coefficient_names)
-
-        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est)
-        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est)
-
-        # Build constraints:
-        constraints_loc = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_loc,
-            constraints=["bio1+bio2=0",
-                         "bio3+bio4=0",
-                         "bio5+bio6=0"]
-        )
-        constraints_scale = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_scale,
-            constraints=["bio1+bio2=0",
-                         "bio3+bio4=0",
-                         "bio5+bio6=0"]
-        )
-
-        test = de.test.wald(
-            data=sim.x,
-            dmat_loc=dmat_est_loc,
-            dmat_scale=dmat_est_scale,
-            constraints_loc=constraints_loc,
-            constraints_scale=constraints_scale,
-            coef_to_test=["treatment1", "treatment2"]
-        )
         _ = test.summary()
 
         # Compare p-value distribution under null model against uniform distribution.
         pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
 
         logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % pval_h0
+        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
 
         return True
 

From dbef0d3a6d8c725121d83733cc3fe9bcb2687efa Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Fri, 30 Aug 2019 09:42:39 -0700
Subject: [PATCH 08/25] improved handling of constraints in wald test with
 respect to continuous models

---
 diffxpy/testing/tests.py             | 23 +++++++++++---
 diffxpy/testing/utils.py             | 30 +++++++++----------
 diffxpy/unit_test/test_continuous.py | 45 ++++++++++++++++++----------
 3 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 517d69a..3ac5297 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -19,7 +19,7 @@
     DifferentialExpressionTestVsRest, _DifferentialExpressionTestMulti, DifferentialExpressionTestByPartition
 from .det_cont import DifferentialExpressionTestWaldCont, DifferentialExpressionTestLRTCont
 from .utils import parse_gene_names, parse_sample_description, parse_size_factors, parse_grouping, \
-    constraint_system_from_star
+    constraint_system_from_star, design_matrix, preview_coef_names
 
 
 def _fit(
@@ -580,8 +580,17 @@ def wald(
     constraints_loc_temp = constraints_loc if constraints_loc is not None else np.eye(design_loc.shape[-1])
     if factor_loc_totest is not None:
         # Select coefficients to test via formula model:
+        # Create temporary patsy design matrix to catch events in which design matrix is not patsy anymore here:
+        design_loc_temp = design_matrix(
+            data=data,
+            sample_description=sample_description,
+            formula=formula_loc,
+            as_numeric=as_numeric,
+            dmat=dmat_loc,
+            return_type="patsy"
+        )
         col_indices = np.concatenate([
-            np.arange(design_loc.shape[-1])[design_loc.design_info.slice(x)]
+            np.arange(design_loc_temp.shape[-1])[design_loc_temp.design_info.slice(x)]
             for x in factor_loc_totest
         ])
         assert col_indices.size > 0, "Could not find any matching columns!"
@@ -595,8 +604,14 @@ def wald(
                 design_loc[:, col_indices] = np.where(samples, 1, 0)
     elif coef_to_test is not None:
         # Directly select coefficients to test from design matrix:
-        # Check that coefficients to test are not dependent parameters if constraints are given:
-        coef_loc_names = glm.data.view_coef_names(design_loc).tolist()
+        if sample_description is not None:
+            coef_loc_names = preview_coef_names(
+                sample_description=sample_description,
+                formula=formula_loc,
+                as_numeric=as_numeric
+            ).tolist()
+        else:
+            coef_loc_names = dmat_loc.columns.tolist()
         if not np.all([x in coef_loc_names for x in coef_to_test]):
             raise ValueError(
                 "the requested test coefficients %s were found in model coefficients %s" %
diff --git a/diffxpy/testing/utils.py b/diffxpy/testing/utils.py
index fdefa7e..d73673a 100644
--- a/diffxpy/testing/utils.py
+++ b/diffxpy/testing/utils.py
@@ -52,19 +52,20 @@ def parse_sample_description(
                 "with corresponding sample annotations"
             )
 
-    if anndata is not None and isinstance(data, Raw):
-        # Raw does not have attribute shape.
-        assert data.X.shape[0] == sample_description.shape[0], \
-            "data matrix and sample description must contain same number of cells: %i, %i" % \
-            (data.X.shape[0], sample_description.shape[0])
-    elif isinstance(data, glm.typing.InputDataBase):
-        assert data.x.shape[0] == sample_description.shape[0], \
-            "data matrix and sample description must contain same number of cells: %i, %i" % \
-            (data.x.shape[0], sample_description.shape[0])
-    else:
-        assert data.shape[0] == sample_description.shape[0], \
-            "data matrix and sample description must contain same number of cells: %i, %i" % \
-            (data.shape[0], sample_description.shape[0])
+    if sample_description is not None:
+        if anndata is not None and isinstance(data, Raw):
+            # Raw does not have attribute shape.
+            assert data.X.shape[0] == sample_description.shape[0], \
+                "data matrix and sample description must contain same number of cells: %i, %i" % \
+                (data.X.shape[0], sample_description.shape[0])
+        elif isinstance(data, glm.typing.InputDataBase):
+            assert data.x.shape[0] == sample_description.shape[0], \
+                "data matrix and sample description must contain same number of cells: %i, %i" % \
+                (data.x.shape[0], sample_description.shape[0])
+        else:
+            assert data.shape[0] == sample_description.shape[0], \
+                "data matrix and sample description must contain same number of cells: %i, %i" % \
+                (data.shape[0], sample_description.shape[0])
     return sample_description
 
 
@@ -117,8 +118,7 @@ def dmat_unique(dmat, sample_description):
 
 
 def design_matrix(
-        data: Union[anndata.AnnData, Raw, np.ndarray,
-                    scipy.sparse.csr_matrix] = None,
+        data: Union[anndata.AnnData, Raw, np.ndarray, scipy.sparse.csr_matrix] = None,
         sample_description: Union[None, pd.DataFrame] = None,
         formula: Union[None, str] = None,
         as_numeric: Union[List[str], Tuple[str], str] = (),
diff --git a/diffxpy/unit_test/test_continuous.py b/diffxpy/unit_test/test_continuous.py
index 20552e1..f6ecab2 100644
--- a/diffxpy/unit_test/test_continuous.py
+++ b/diffxpy/unit_test/test_continuous.py
@@ -17,17 +17,19 @@ def _fit_continuous(
         self,
         sim,
         sample_description,
+        constrained,
         test
     ):
         test = de.test.continuous_1d(
             data=sim.input_data,
             sample_description=sample_description,
             gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
-            continuous="pseudotime",
-            df=3,
-            formula_loc="~ 1 + pseudotime + batch",
+            formula_loc="~ 1 + continuous_covariate + batch",
             formula_scale="~ 1",
-            factor_loc_totest="pseudotime",
+            factor_loc_totest="continuous_covariate",
+            continuous="continuous_covariate",
+            constraints_loc={"batch": "continuous_covariate"} if constrained else None,
+            df=3,
             test=test,
             quick_scale=True,
             noise_model=self.noise_model
@@ -38,23 +40,30 @@ def _test_null_model(
             self,
             nobs: int,
             ngenes: int,
-            test: str
+            test: str,
+            constrained: bool
     ):
         sim = Simulator(num_observations=nobs, num_features=ngenes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
         sim.generate()
 
         random_sample_description = pd.DataFrame({
-            "pseudotime": np.random.random(size=sim.nobs),
-            "batch": np.random.randint(2, size=sim.nobs)
+            "continuous_covariate": np.asarray(np.random.randint(0, 5, size=sim.nobs), dtype=float)
         })
+        random_sample_description["batch"] = [str(int(x)) + str(np.random.randint(0, 3))
+                                              for x in random_sample_description["continuous_covariate"]]
         return self._fit_continuous(
             sim=sim,
             sample_description=random_sample_description,
-            test=test
+            test=test,
+            constrained=constrained
         )
 
-    def _test_forfatal(self, test: str):
+    def _test_forfatal(
+            self,
+            test: str,
+            constrained: bool
+    ):
         """
         Test if de.test.continuous() DifferentialExpressionTestSingle object functions work fine.
         """
@@ -62,7 +71,12 @@ def _test_forfatal(self, test: str):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
-        test = self._test_null_model(nobs=10, ngenes=2, test=test)
+        test = self._test_null_model(
+            nobs=10,
+            ngenes=2,
+            test=test,
+            constrained=constrained
+        )
         ids = test.gene_ids
 
         # 1. Test all additional functions which depend on model computation:
@@ -102,7 +116,8 @@ def test_forfatal_wald(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-        _ = self._test_forfatal(test="wald")
+        _ = self._test_forfatal(test="wald", constrained=False)
+        _ = self._test_forfatal(test="wald", constrained=True)
         return True
 
     def test_forfatal_lrt(self):
@@ -121,7 +136,7 @@ def test_forfatal_lrt(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-        _ = self._test_forfatal(test="lrt")
+        _ = self._test_forfatal(test="lrt", constrained=False)
         return True
 
     def test_null_distribution_wald(self):
@@ -140,7 +155,7 @@ def test_null_distribution_wald(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-        test = self._test_null_model(nobs=2000, ngenes=100, test="wald")
+        test = self._test_null_model(nobs=2000, ngenes=100, test="wald", constrained=False)
 
         # Compare p-value distribution under null model against uniform distribution.
         pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
@@ -150,7 +165,7 @@ def test_null_distribution_wald(self):
 
         return True
 
-    def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100):
+    def test_null_distribution_lrt(self):
         """
         Test if de.test.continuous() generates a uniform p-value distribution in lrt
         if it is given data simulated based on the null model. Returns the p-value
@@ -166,7 +181,7 @@ def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100):
 
         self.noise_model = "nb"
         np.random.seed(1)
-        test = self._test_null_model(nobs=2000, ngenes=100, test="lrt")
+        test = self._test_null_model(nobs=2000, ngenes=100, test="lrt", constrained=False)
 
         # Compare p-value distribution under null model against uniform distribution.
         pval_h0 = stats.kstest(test.pval, 'uniform').pvalue

From 17e1a33dc8f8d7c7365d60a2e4488fcda7bf5aff Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Fri, 30 Aug 2019 09:56:05 -0700
Subject: [PATCH 09/25] fixed bug in non lazy z test fold change computation

---
 diffxpy/testing/det_pair.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/diffxpy/testing/det_pair.py b/diffxpy/testing/det_pair.py
index 895c697..3ddd175 100644
--- a/diffxpy/testing/det_pair.py
+++ b/diffxpy/testing/det_pair.py
@@ -420,7 +420,7 @@ def _pval_pairs(self, idx0, idx1):
         """
         assert np.all([x < self.pval.shape[1] for x in idx0])
         assert np.all([x < self.pval.shape[1] for x in idx1])
-        return self.pval[idx0, idx1]
+        return self.pval[idx0, :, :][:, idx1, :]
 
     def _log_fold_change_pairs(self, idx0, idx1, base):
         """
@@ -431,18 +431,16 @@ def _log_fold_change_pairs(self, idx0, idx1, base):
         :param base: Base of logarithm.
         :return: log fold change values
         """
-        if self._logfc is None:
-            logfc = np.tile(np.NaN, [len(idx0), len(idx1), self.model_estim.x.shape[1]])
-            for i, xi in enumerate(idx0):
-                for j, xj in enumerate(idx1):
-                    logfc[i, j, :] = self._theta_mle[xj, :] - self._theta_mle[xi, :]
-                    logfc[j, i, :] = -logfc[i, j, :]
-            self._logfc = logfc
+        logfc = np.tile(np.NaN, [len(idx0), len(idx1), self.model_estim.x.shape[1]])
+        for i, xi in enumerate(idx0):
+            for j, xj in enumerate(idx1):
+                logfc[i, j, :] = self._theta_mle[xj, :] - self._theta_mle[xi, :]
+                logfc[j, i, :] = -logfc[i, j, :]
 
         if base == np.e:
-            return self._logfc[idx0, :, :][:, idx1, :]
+            return logfc
         else:
-            return self._logfc[idx0, :, :][:, idx1, :] / np.log(base)
+            return logfc / np.log(base)
 
 
 class _DifferentialExpressionTestPairwiseLazyBase(_DifferentialExpressionTestPairwiseBase):

From d102989940285202320a9eba521c0437a385ccb0 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Fri, 30 Aug 2019 14:38:33 -0700
Subject: [PATCH 10/25] incremented batchglm dependency

---
 docs/requires.txt | 2 +-
 requirements.txt  | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/requires.txt b/docs/requires.txt
index 5e35d86..90179be 100644
--- a/docs/requires.txt
+++ b/docs/requires.txt
@@ -2,7 +2,7 @@ numpy>=1.14.0
 scipy
 pandas
 patsy>=0.5.0
-batchglm>=0.6.6
+batchglm>=0.6.7
 statsmodels
 anndata
 seaborn
diff --git a/requirements.txt b/requirements.txt
index 5e35d86..90179be 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy>=1.14.0
 scipy
 pandas
 patsy>=0.5.0
-batchglm>=0.6.6
+batchglm>=0.6.7
 statsmodels
 anndata
 seaborn
diff --git a/setup.py b/setup.py
index a932746..b98360e 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
         'scipy>=1.2.1',
         'pandas',
         'patsy>=0.5.0',
-        'batchglm>=0.6.6',
+        'batchglm>=0.6.7',
         'statsmodels',
     ],
     extras_require={

From cffc1cd91518fd42b97097943b321bcbfd1718aa Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Fri, 30 Aug 2019 14:58:53 -0700
Subject: [PATCH 11/25] added size factor unit test

---
 diffxpy/unit_test/test_single_sf_null.py | 128 +++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 diffxpy/unit_test/test_single_sf_null.py

diff --git a/diffxpy/unit_test/test_single_sf_null.py b/diffxpy/unit_test/test_single_sf_null.py
new file mode 100644
index 0000000..ab97115
--- /dev/null
+++ b/diffxpy/unit_test/test_single_sf_null.py
@@ -0,0 +1,128 @@
+import unittest
+import logging
+import numpy as np
+import pandas as pd
+import scipy.stats as stats
+
+import diffxpy.api as de
+
+
+class _TestSingleSfNull:
+
+    def _test_null_distribution_wald(
+            self,
+            n_cells: int,
+            n_genes: int,
+            noise_model: str
+    ):
+        """
+        Test if de.wald() generates a uniform p-value distribution
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed 
+        p-value distribution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        :param noise_model: Noise model to use for data fitting.
+        """
+        if noise_model == "nb":
+            from batchglm.api.models.glm_nb import Simulator
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        elif noise_model == "norm":
+            from batchglm.api.models.glm_norm import Simulator
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        else:
+            raise ValueError("noise model %s not recognized" % noise_model)
+
+        sim = Simulator(num_observations=n_cells, num_features=n_genes)
+        sim.generate_sample_description(num_batches=0, num_conditions=0)
+        sim.generate_params(rand_fn_scale=rand_fn_scale)
+        sim.generate_data()
+
+        random_sample_description = pd.DataFrame({
+            "condition": np.random.randint(2, size=sim.nobs),
+            "batch": np.random.randint(2, size=sim.nobs)
+        })
+        random_sf = np.random.uniform(0.5, 1.5, sim.nobs)
+
+        test = de.test.wald(
+            data=sim.input_data,
+            sample_description=random_sample_description,
+            factor_loc_totest="condition",
+            formula_loc="~ 1 + condition + batch",
+            size_factors=random_sf,
+            batch_size=500,
+            noise_model=noise_model,
+            training_strategy="DEFAULT",
+            dtype="float64"
+        )
+        _ = test.summary()
+
+        # Compare p-value distribution under null model against uniform distribution.
+        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
+
+        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
+        assert pval_h0 > 0.05, ("KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5))
+
+        return True
+
+
+class TestSingleSfNullNb(_TestSingleSfNull, unittest.TestCase):
+    """
+    Negative binomial noise model unit tests that test whether a test generates uniformly
+    distributed p-values if data are sampled from the null model.
+    """
+
+    def test_null_distribution_wald_nb(
+            self,
+            n_cells: int = 2000,
+            n_genes: int = 200
+    ):
+        """
+        Test if wald() generates a uniform p-value distribution for "nb" noise model.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        np.random.seed(1)
+        return self._test_null_distribution_wald(
+            n_cells=n_cells,
+            n_genes=n_genes,
+            noise_model="nb"
+        )
+
+
+class TestSingleSfNullNorm(_TestSingleSfNull, unittest.TestCase):
+    """
+    Normal noise model unit tests that test whether a test generates uniformly
+    distributed p-values if data are sampled from the null model.
+    """
+    def test_null_distribution_wald_norm(
+            self,
+            n_cells: int = 200,
+            n_genes: int = 200
+    ):
+        """
+        Test if wald() generates a uniform p-value distribution for "norm" noise model.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        np.random.seed(1)
+        return self._test_null_distribution_wald(
+            n_cells=n_cells,
+            n_genes=n_genes,
+            noise_model="norm"
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()

From 3b403b5d881022bff982819fe5887bfd851a8489 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Fri, 30 Aug 2019 14:59:03 -0700
Subject: [PATCH 12/25] fixed size factor parsing based on batchglm InputData
 input objects

---
 diffxpy/testing/utils.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/diffxpy/testing/utils.py b/diffxpy/testing/utils.py
index d73673a..9fc7501 100644
--- a/diffxpy/testing/utils.py
+++ b/diffxpy/testing/utils.py
@@ -90,7 +90,14 @@ def parse_size_factors(
         elif isinstance(size_factors, str):
             assert size_factors in sample_description.columns, ""
             size_factors = sample_description[size_factors].values
-        assert size_factors.shape[0] == data.shape[0], "data matrix and size factors must contain same number of cells"
+
+        if anndata is not None and isinstance(data, Raw):
+            data_shape = data.X.shape
+        elif isinstance(data, glm.typing.InputDataBase):
+            data_shape = data.x.shape
+        else:
+            data_shape = data.shape
+        assert size_factors.shape[0] == data_shape[0], "data matrix and size factors must contain same number of cells"
         assert np.all(size_factors > 0), "size_factors <= 0 found, please remove these cells"
     return size_factors
 

From 7538e1a4938e291af0f883b6ae76765313d467c4 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Fri, 30 Aug 2019 15:10:44 -0700
Subject: [PATCH 13/25] added size factors to continuuos unit test

---
 diffxpy/unit_test/test_continuous.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/diffxpy/unit_test/test_continuous.py b/diffxpy/unit_test/test_continuous.py
index f6ecab2..d88dafe 100644
--- a/diffxpy/unit_test/test_continuous.py
+++ b/diffxpy/unit_test/test_continuous.py
@@ -24,11 +24,12 @@ def _fit_continuous(
             data=sim.input_data,
             sample_description=sample_description,
             gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
-            formula_loc="~ 1 + continuous_covariate + batch",
+            formula_loc="~ 1 + continuous + batch",
             formula_scale="~ 1",
-            factor_loc_totest="continuous_covariate",
-            continuous="continuous_covariate",
-            constraints_loc={"batch": "continuous_covariate"} if constrained else None,
+            factor_loc_totest="continuous",
+            continuous="continuous",
+            constraints_loc={"batch": "continuous"} if constrained else None,
+            size_factors=np.random.uniform(0.5, 1.5, sim.nobs),
             df=3,
             test=test,
             quick_scale=True,
@@ -48,10 +49,10 @@ def _test_null_model(
         sim.generate()
 
         random_sample_description = pd.DataFrame({
-            "continuous_covariate": np.asarray(np.random.randint(0, 5, size=sim.nobs), dtype=float)
+            "continuous": np.asarray(np.random.randint(0, 5, size=sim.nobs), dtype=float)
         })
         random_sample_description["batch"] = [str(int(x)) + str(np.random.randint(0, 3))
-                                              for x in random_sample_description["continuous_covariate"]]
+                                              for x in random_sample_description["continuous"]]
         return self._fit_continuous(
             sim=sim,
             sample_description=random_sample_description,
@@ -72,7 +73,7 @@ def _test_forfatal(
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
         test = self._test_null_model(
-            nobs=10,
+            nobs=500,
             ngenes=2,
             test=test,
             constrained=constrained
@@ -117,7 +118,7 @@ def test_forfatal_wald(self):
         self.noise_model = "nb"
         np.random.seed(1)
         _ = self._test_forfatal(test="wald", constrained=False)
-        _ = self._test_forfatal(test="wald", constrained=True)
+        #_ = self._test_forfatal(test="wald", constrained=True)
         return True
 
     def test_forfatal_lrt(self):

From 06f591ab5da3982e17336768b7c464ece754649a Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Tue, 3 Sep 2019 14:18:35 -0700
Subject: [PATCH 14/25] improved handling of continuous models if constraints
 are given

---
 diffxpy/stats/stats.py                        |   1 -
 diffxpy/testing/tests.py                      |  84 +++++------
 diffxpy/testing/utils.py                      |   2 +-
 diffxpy/unit_test/test_continuous_de.py       | 131 ++++++++++++++++++
 ..._continuous.py => test_continuous_null.py} |  43 ++++--
 diffxpy/unit_test/test_fit.py                 |   2 +-
 ...test_pairwise.py => test_pairwise_null.py} |   0
 7 files changed, 209 insertions(+), 54 deletions(-)
 create mode 100644 diffxpy/unit_test/test_continuous_de.py
 rename diffxpy/unit_test/{test_continuous.py => test_continuous_null.py} (81%)
 rename diffxpy/unit_test/{test_pairwise.py => test_pairwise_null.py} (100%)

diff --git a/diffxpy/stats/stats.py b/diffxpy/stats/stats.py
index 3080224..74c9beb 100644
--- a/diffxpy/stats/stats.py
+++ b/diffxpy/stats/stats.py
@@ -263,7 +263,6 @@ def wald_test_chisq(
             raise ValueError('stats.wald_test(): theta_mle and theta0 have to contain the same number of entries')
 
     theta_diff = theta_mle - theta0
-    # Convert to nd.array to avoid gufunc error.
     wald_statistic = np.array([
         np.matmul(
             np.matmul(
diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 703a896..54461a3 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -28,6 +28,8 @@ def _fit(
         data,
         design_loc,
         design_scale,
+        design_loc_names: list = None,
+        design_scale_names: list = None,
         constraints_loc: np.ndarray = None,
         constraints_scale: np.ndarray = None,
         init_model=None,
@@ -148,6 +150,8 @@ def _fit(
             data=data,
             design_loc=design_loc,
             design_scale=design_scale,
+            design_loc_names=design_loc_names,
+            design_scale_names=design_scale_names,
             constraints_loc=constraints_loc,
             constraints_scale=constraints_scale,
             size_factors=size_factors,
@@ -558,8 +562,8 @@ def wald(
         sample_description=sample_description
     )
 
-    logging.getLogger("diffxpy").debug("building location model")
-    design_loc, constraints_loc = constraint_system_from_star(
+    # Build design matrices and constraints.
+    design_loc, design_loc_names, constraints_loc, term_names_loc = constraint_system_from_star(
         dmat=dmat_loc,
         sample_description=sample_description,
         formula=formula_loc,
@@ -567,8 +571,7 @@ def wald(
         constraints=constraints_loc,
         return_type="patsy"
     )
-    logging.getLogger("diffxpy").debug("building scale model")
-    design_scale, constraints_scale = constraint_system_from_star(
+    design_scale, design_scale_names, constraints_scale, term_names_scale = constraint_system_from_star(
         dmat=dmat_scale,
         sample_description=sample_description,
         formula=formula_scale,
@@ -579,22 +582,20 @@ def wald(
 
     # Define indices of coefficients to test:
     constraints_loc_temp = constraints_loc if constraints_loc is not None else np.eye(design_loc.shape[-1])
+    # Check that design_loc is patsy, otherwise  use term_names for slicing.
     if factor_loc_totest is not None:
-        # Select coefficients to test via formula model:
-        # Create temporary patsy design matrix to catch events in which design matrix is not patsy anymore here:
-        design_loc_temp = design_matrix(
-            data=data,
-            sample_description=sample_description,
-            formula=formula_loc,
-            as_numeric=as_numeric,
-            dmat=dmat_loc,
-            return_type="patsy"
-        )
-        col_indices = np.concatenate([
-            np.arange(design_loc_temp.shape[-1])[design_loc_temp.design_info.slice(x)]
-            for x in factor_loc_totest
-        ])
-        assert col_indices.size > 0, "Could not find any matching columns!"
+        if not isinstance(design_loc, patsy.design_info.DesignMatrix):
+            col_indices = np.where([
+                x in factor_loc_totest
+                for x in term_names_loc
+            ])[0]
+        else:
+            # Select coefficients to test via formula model:
+            col_indices = np.concatenate([
+                np.arange(design_loc.shape[-1])[design_loc.design_info.slice(x)]
+                for x in factor_loc_totest
+            ])
+        assert len(col_indices) > 0, "Could not find any matching columns!"
         if coef_to_test is not None:
             if len(factor_loc_totest) > 1:
                 raise ValueError("do not set coef_to_test if more than one factor_loc_totest is given")
@@ -626,16 +627,19 @@ def wald(
         raise ValueError("either set factor_loc_totest or coef_to_test")
     # Check that all tested coefficients are independent:
     for x in col_indices:
-        if np.sum(constraints_loc_temp[x,:]) != 1:
+        if np.sum(constraints_loc_temp[x, :]) != 1:
             raise ValueError("Constraints input is wrong: not all tested coefficients are unconstrained.")
     # Adjust tested coefficients from dependent to independent (fitted) parameters:
-    col_indices = np.array([np.where(constraints_loc_temp[x,:] == 1)[0][0] for x in col_indices])
+    col_indices = np.array([np.where(constraints_loc_temp[x, :] == 1)[0][0] for x in col_indices])
 
+    # Fit model.
     model = _fit(
         noise_model=noise_model,
         data=data,
         design_loc=design_loc,
         design_scale=design_scale,
+        design_loc_names=design_loc_names,
+        design_scale_names=design_scale_names,
         constraints_loc=constraints_loc,
         constraints_scale=constraints_scale,
         init_a=init_a,
@@ -649,13 +653,13 @@ def wald(
         **kwargs,
     )
 
+    # Prepare differential expression test.
     de_test = DifferentialExpressionTestWald(
         model_estim=model,
         col_indices=col_indices,
         noise_model=noise_model,
         sample_description=sample_description
     )
-
     return de_test
 
 
@@ -1721,22 +1725,22 @@ def wald(
 
 
 def continuous_1d(
-        data: Union[anndata.AnnData, Raw, np.ndarray, scipy.sparse.csr_matrix],
+        data: Union[anndata.AnnData, Raw, np.ndarray, scipy.sparse.csr_matrix, glm.typing.InputDataBase],
         continuous: str,
-        df: int = 5,
-        factor_loc_totest: Union[str, List[str]] = None,
-        formula_loc: str = None,
+        factor_loc_totest: Union[str, List[str]],
+        formula_loc: str,
         formula_scale: str = "~1",
+        df: int = 5,
         as_numeric: Union[List[str], Tuple[str], str] = (),
         test: str = 'wald',
         init_a: Union[np.ndarray, str] = "standard",
         init_b: Union[np.ndarray, str] = "standard",
         gene_names: Union[np.ndarray, list] = None,
-        sample_description=None,
+        sample_description: Union[None, pd.DataFrame] = None,
         constraints_loc: Union[dict, None] = None,
         constraints_scale: Union[dict, None] = None,
         noise_model: str = 'nb',
-        size_factors: np.ndarray = None,
+        size_factors: Union[np.ndarray, pd.core.series.Series, str] = None,
         batch_size: int = None,
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = None,
@@ -1896,10 +1900,7 @@ def continuous_1d(
     """
     if formula_loc is None:
         raise ValueError("supply fomula_loc")
-    # Set testing default to continuous covariate if not supplied:
-    if factor_loc_totest is None:
-        factor_loc_totest = [continuous]
-    elif isinstance(factor_loc_totest, str):
+    if isinstance(factor_loc_totest, str):
         factor_loc_totest = [factor_loc_totest]
     elif isinstance(factor_loc_totest, tuple):
         factor_loc_totest = list(factor_loc_totest)
@@ -1917,6 +1918,11 @@ def continuous_1d(
         raise ValueError('parameter continuous not found in sample_description')
 
     # Perform spline basis transform.
+    if not np.issubdtype(sample_description[continuous].dtype, np.number):
+        raise ValueError(
+            "The column corresponding to the continuous covariate ('%s') in " % continuous +
+            "sample description should be numeric but is '%s'" % sample_description[continuous].dtype
+        )
     spline_basis = patsy.highlevel.dmatrix("0+bs(" + continuous + ", df=" + str(df) + ")", sample_description)
     spline_basis = pd.DataFrame(spline_basis)
     new_coefs = [continuous + str(i) for i in range(spline_basis.shape[1])]
@@ -1943,17 +1949,11 @@ def continuous_1d(
     # across interaction terms.
     formula_term_continuous = '(' + formula_extension + ')'
 
-    if formula_loc is not None:
-        formula_loc_new = formula_loc.split(continuous)
-        formula_loc_new = formula_term_continuous.join(formula_loc_new)
-    else:
-        formula_loc_new = None
+    formula_loc_new = formula_loc.split(continuous)
+    formula_loc_new = formula_term_continuous.join(formula_loc_new)
 
-    if formula_scale is not None:
-        formula_scale_new = formula_scale.split(continuous)
-        formula_scale_new = formula_term_continuous.join(formula_scale_new)
-    else:
-        formula_scale_new = None
+    formula_scale_new = formula_scale.split(continuous)
+    formula_scale_new = formula_term_continuous.join(formula_scale_new)
 
     # Add spline basis into sample description
     for x in spline_basis.columns:
diff --git a/diffxpy/testing/utils.py b/diffxpy/testing/utils.py
index 9fc7501..295f386 100644
--- a/diffxpy/testing/utils.py
+++ b/diffxpy/testing/utils.py
@@ -213,7 +213,7 @@ def preview_coef_names(
 
 
 def constraint_system_from_star(
-        dmat: Union[None, np.ndarray] = None,
+        dmat: Union[None, patsy.design_info.DesignMatrix] = None,
         sample_description: Union[None, pd.DataFrame] = None,
         formula: Union[None, str] = None,
         as_numeric: Union[List[str], Tuple[str], str] = (),
diff --git a/diffxpy/unit_test/test_continuous_de.py b/diffxpy/unit_test/test_continuous_de.py
new file mode 100644
index 0000000..b7acacf
--- /dev/null
+++ b/diffxpy/unit_test/test_continuous_de.py
@@ -0,0 +1,131 @@
+import unittest
+import logging
+import numpy as np
+
+import diffxpy.api as de
+
+
+class _TestContinuousDe:
+    noise_model: str
+
+    def _test_wald_de(
+            self,
+            constrained: bool,
+            nobs: int = 2000,
+            ngenes: int = 100
+    ):
+        if self.noise_model == "nb":
+            from batchglm.api.models.glm_nb import Simulator
+            rand_fn_loc = lambda shape: np.random.uniform(2, 5, shape)
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        elif self.noise_model == "norm":
+            from batchglm.api.models.glm_norm import Simulator
+            rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        else:
+            raise ValueError("noise model %s not recognized" % self.noise_model)
+
+        sim = Simulator(num_observations=nobs, num_features=ngenes)
+        sim.generate_sample_description(
+            num_batches=0,
+            num_conditions=5
+        )
+        sim.generate_params(
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale
+        )
+        num_non_de = round(ngenes / 2)
+        sim.a_var[1:, :num_non_de] = 0  # Set all condition effects of non DE genes to zero.
+        sim.b_var[1:, :] = 0  # Use constant dispersion across all conditions.
+        self.isDE = np.arange(ngenes) >= num_non_de
+        sim.generate_data()
+
+        random_sample_description = sim.sample_description
+        random_sample_description["continuous_categ"] = random_sample_description["condition"]
+        random_sample_description["continuous"] = [int(x) for x in random_sample_description["condition"]]
+        random_sample_description["batch"] = [
+            str(int(x)) + str(np.random.randint(0, 3))
+            for x in random_sample_description["continuous"]
+        ]
+
+        test = de.test.continuous_1d(
+            data=sim.input_data,
+            sample_description=random_sample_description,
+            gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
+            formula_loc="~ 1 + continuous + batch" if constrained else "~ 1 + continuous",
+            formula_scale="~ 1",
+            factor_loc_totest="continuous",
+            continuous="continuous",
+            constraints_loc={"batch": "continuous_categ"} if constrained else None,
+            df=5,
+            test="wald",
+            quick_scale=True,
+            noise_model=self.noise_model
+        )
+        self._eval(sim=sim, test=test)
+
+    def _eval(self, sim, test):
+        idx_de = np.where(self.isDE)[0]
+        idx_nonde = np.where(np.logical_not(self.isDE))[0]
+
+        frac_de_of_non_de = np.mean(test.qval[idx_nonde] < 0.05)
+        frac_de_of_de = np.mean(test.qval[idx_de] < 0.05)
+
+        logging.getLogger("diffxpy").info(
+            'fraction of non-DE genes with q-value < 0.05: %.1f%%' %
+            float(100 * frac_de_of_non_de)
+        )
+        logging.getLogger("diffxpy").info(
+            'fraction of DE genes with q-value < 0.05: %.1f%%' %
+            float(100 * frac_de_of_de)
+        )
+        assert frac_de_of_non_de <= 0.1, "too many false-positives, FPR=%f" % frac_de_of_non_de
+        assert frac_de_of_de >= 0.5, "too many false-negatives, TPR=%f" % frac_de_of_de
+
+        return sim
+
+
+class TestContinuousDeNb(_TestContinuousDe, unittest.TestCase):
+    """
+    Negative binomial noise model unit tests that tests false positive and false negative rates.
+    """
+
+    def test_wald_de_nb(self):
+        """
+
+        :return:
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.INFO)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        self._test_wald_de(constrained=False)
+        self._test_wald_de(constrained=True)
+        return True
+
+
+class TestContinuousDeNorm(_TestContinuousDe, unittest.TestCase):
+    """
+    Normal noise model unit tests that tests false positive and false negative rates.
+    """
+
+    def test_wald_de_norm(self):
+        """
+
+        :return:
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "norm"
+        np.random.seed(1)
+        self._test_wald_de(constrained=False)
+        self._test_wald_de(constrained=True)
+        return True
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/diffxpy/unit_test/test_continuous.py b/diffxpy/unit_test/test_continuous_null.py
similarity index 81%
rename from diffxpy/unit_test/test_continuous.py
rename to diffxpy/unit_test/test_continuous_null.py
index d88dafe..9d54f89 100644
--- a/diffxpy/unit_test/test_continuous.py
+++ b/diffxpy/unit_test/test_continuous_null.py
@@ -24,12 +24,12 @@ def _fit_continuous(
             data=sim.input_data,
             sample_description=sample_description,
             gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
-            formula_loc="~ 1 + continuous + batch",
+            formula_loc="~ 1 + continuous + batch" if constrained else "~ 1 + continuous",
             formula_scale="~ 1",
             factor_loc_totest="continuous",
             continuous="continuous",
             constraints_loc={"batch": "continuous"} if constrained else None,
-            size_factors=np.random.uniform(0.5, 1.5, sim.nobs),
+            size_factors="size_factors",
             df=3,
             test=test,
             quick_scale=True,
@@ -46,13 +46,15 @@ def _test_null_model(
     ):
         sim = Simulator(num_observations=nobs, num_features=ngenes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
-        sim.generate()
+        sim.generate_params()
+        sim.generate_data()
 
         random_sample_description = pd.DataFrame({
             "continuous": np.asarray(np.random.randint(0, 5, size=sim.nobs), dtype=float)
         })
         random_sample_description["batch"] = [str(int(x)) + str(np.random.randint(0, 3))
                                               for x in random_sample_description["continuous"]]
+        random_sample_description["size_factors"] = np.random.uniform(0.9, 1.1, sim.nobs)
         return self._fit_continuous(
             sim=sim,
             sample_description=random_sample_description,
@@ -95,7 +97,6 @@ def _test_forfatal(
         _ = test.argmax(genes=ids, non_numeric=True)
         _ = test.argmin(genes=ids, non_numeric=True)
         _ = test.summary(non_numeric=True)
-
         return True
 
 
@@ -118,7 +119,7 @@ def test_forfatal_wald(self):
         self.noise_model = "nb"
         np.random.seed(1)
         _ = self._test_forfatal(test="wald", constrained=False)
-        #_ = self._test_forfatal(test="wald", constrained=True)
+        _ = self._test_forfatal(test="wald", constrained=True)
         return True
 
     def test_forfatal_lrt(self):
@@ -140,7 +141,7 @@ def test_forfatal_lrt(self):
         _ = self._test_forfatal(test="lrt", constrained=False)
         return True
 
-    def test_null_distribution_wald(self):
+    def test_null_distribution_wald_unconstrained(self):
         """
         Test if de.test.continuous() generates a uniform p-value distribution in the wald test
         if it is given data simulated based on the null model. Returns the p-value
@@ -156,14 +157,40 @@ def test_null_distribution_wald(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
+
         test = self._test_null_model(nobs=2000, ngenes=100, test="wald", constrained=False)
 
         # Compare p-value distribution under null model against uniform distribution.
         pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
 
         logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
+        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05" % pval_h0
+        return True
+
+    def test_null_distribution_wald_constrained(self):
+        """
+        Test if de.test.continuous() generates a uniform p-value distribution in the wald test
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed
+        p-value distriubution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+
+        test = self._test_null_model(nobs=2000, ngenes=100, test="wald", constrained=True)
 
+        # Compare p-value distribution under null model against uniform distribution.
+        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
+
+        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
+        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
         return True
 
     def test_null_distribution_lrt(self):
@@ -188,8 +215,6 @@ def test_null_distribution_lrt(self):
         pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
 
         logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
-
         return True
 
 
diff --git a/diffxpy/unit_test/test_fit.py b/diffxpy/unit_test/test_fit.py
index 80b177f..9a27b7d 100644
--- a/diffxpy/unit_test/test_fit.py
+++ b/diffxpy/unit_test/test_fit.py
@@ -43,7 +43,7 @@ def _test_model_fit(
             "batch": np.random.randint(2, size=sim.nobs)
         })
 
-        estim = de.fit.model(
+        _ = de.fit.model(
             data=sim.input_data,
             sample_description=random_sample_description,
             formula_loc="~ 1 + condition + batch",
diff --git a/diffxpy/unit_test/test_pairwise.py b/diffxpy/unit_test/test_pairwise_null.py
similarity index 100%
rename from diffxpy/unit_test/test_pairwise.py
rename to diffxpy/unit_test/test_pairwise_null.py

From 599bfe4af5913615948acaf1ae334d1ec9ab2892 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Tue, 3 Sep 2019 16:21:48 -0700
Subject: [PATCH 15/25] added checks for design matrix rank and df of
 continuous model and added additional spline basis transforms

fixed some remaining bugs in usage of constraints
user can now chose between spline basis: bs, cr, cc
there was potentially an unidentifiability in spline basis transform before as bs was not explicitly centered
---
 diffxpy/testing/det_cont.py               |  2 +-
 diffxpy/testing/tests.py                  | 42 ++++++++--
 diffxpy/unit_test/test_constrained.py     |  6 +-
 diffxpy/unit_test/test_continuous_de.py   | 29 ++++---
 diffxpy/unit_test/test_continuous_null.py | 98 +++++++++++++++--------
 diffxpy/unit_test/test_single_fullrank.py | 85 ++++++++++++++++++++
 diffxpy/unit_test/test_single_null.py     | 13 +--
 7 files changed, 212 insertions(+), 63 deletions(-)
 create mode 100644 diffxpy/unit_test/test_single_fullrank.py

diff --git a/diffxpy/testing/det_cont.py b/diffxpy/testing/det_cont.py
index 3476c04..41c67f5 100644
--- a/diffxpy/testing/det_cont.py
+++ b/diffxpy/testing/det_cont.py
@@ -214,7 +214,7 @@ def _continuous_model(self, idx, non_numeric=False):
             mu = np.matmul(self._model_estim.input_data.design_loc,
                            self._model_estim.model.a[:, idx])
             if self._size_factors is not None:
-                mu = mu + self._size_factors
+                mu = mu + self._model_estim.input_data.size_factors
         else:
             idx_basis = self._spline_par_loc_idx(intercept=True)
             mu = np.matmul(self._model_estim.input_data.design_loc[:, idx_basis],
diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 54461a3..57776b9 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -611,7 +611,7 @@ def wald(
                 sample_description=sample_description,
                 formula=formula_loc,
                 as_numeric=as_numeric
-            ).tolist()
+            )
         else:
             coef_loc_names = dmat_loc.columns.tolist()
         if not np.all([x in coef_loc_names for x in coef_to_test]):
@@ -1731,6 +1731,7 @@ def continuous_1d(
         formula_loc: str,
         formula_scale: str = "~1",
         df: int = 5,
+        spline_basis: str = "bs",
         as_numeric: Union[List[str], Tuple[str], str] = (),
         test: str = 'wald',
         init_a: Union[np.ndarray, str] = "standard",
@@ -1774,7 +1775,14 @@ def continuous_1d(
     :param df: int
         Degrees of freedom of the spline model, i.e. the number of spline basis vectors.
         df is equal to the number of coefficients in the GLM which are used to describe the
-        continuous depedency-
+        continuous depedency.
+    :param spline_basis: str
+        The type of sline basis to use, refer also to:
+            https://patsy.readthedocs.io/en/latest/spline-regression.html
+
+            - "bs": B-splines
+            - "cr": natural cubic splines
+            - "cc": natural cyclic splines
     :param factor_loc_totest:
         List of factors of formula to test with Wald test.
         E.g. "condition" or ["batch", "condition"] if formula_loc would be "~ 1 + batch + condition"
@@ -1913,17 +1921,39 @@ def continuous_1d(
     gene_names = parse_gene_names(data, gene_names)
     sample_description = parse_sample_description(data, sample_description)
 
-    # Check that continuous factor is contained in sample description
+    # Check that continuous factor is contained in sample description and is numeric.
     if continuous not in sample_description.columns:
         raise ValueError('parameter continuous not found in sample_description')
-
-    # Perform spline basis transform.
     if not np.issubdtype(sample_description[continuous].dtype, np.number):
         raise ValueError(
             "The column corresponding to the continuous covariate ('%s') in " % continuous +
             "sample description should be numeric but is '%s'" % sample_description[continuous].dtype
         )
-    spline_basis = patsy.highlevel.dmatrix("0+bs(" + continuous + ", df=" + str(df) + ")", sample_description)
+    # Check that not too many degrees of freedom given the sampled points were chosen:
+    if len(np.unique(sample_description[continuous].values)) <= df - 1:
+        raise ValueError(
+            "Use at most (number of observed points in continuous covariate) - 1 degrees of freedom " +
+            " for spline basis. You chose df=%i for n=%i points." %
+            (df, len(np.unique(sample_description[continuous].values)))
+        )
+    # Perform spline basis transform.
+    if spline_basis.lower() == "bs":
+        spline_basis = patsy.highlevel.dmatrix(
+            "bs(" + continuous + ", df=" + str(df) + ", degree=3, include_intercept=False) - 1",
+            sample_description
+        )
+    elif spline_basis.lower() == "cr":
+        spline_basis = patsy.highlevel.dmatrix(
+            "cr(" + continuous + ", df=" + str(df) + ", constraints='center') - 1",
+            sample_description
+        )
+    elif spline_basis.lower() == "cc":
+        spline_basis = patsy.highlevel.dmatrix(
+            "cc(" + continuous + ", df=" + str(df) + ", constraints='center') - 1",
+            sample_description
+        )
+    else:
+        raise ValueError("spline basis %s not recognized" % spline_basis)
     spline_basis = pd.DataFrame(spline_basis)
     new_coefs = [continuous + str(i) for i in range(spline_basis.shape[1])]
     spline_basis.columns = new_coefs
diff --git a/diffxpy/unit_test/test_constrained.py b/diffxpy/unit_test/test_constrained.py
index 7b4143a..0acdb1c 100644
--- a/diffxpy/unit_test/test_constrained.py
+++ b/diffxpy/unit_test/test_constrained.py
@@ -40,8 +40,8 @@ def test_forfatal_from_string(self):
         coefficient_names = ['intercept', 'bio1', 'bio2', 'bio3', 'bio4', 'treatment1']
         dmat_est = pd.DataFrame(data=dmat, columns=coefficient_names)
 
-        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
-        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
+        dmat_est_loc, _ = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
+        dmat_est_scale, _ = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
 
         # Build constraints:
         constraints_loc = de.utils.constraint_matrix_from_string(
@@ -144,7 +144,7 @@ def test_null_distribution_wald_constrained(self, n_genes: int = 100):
 
         return True
 
-    def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
+    def _test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
         """
         Test if de.wald() with constraints generates a uniform p-value distribution
         if it is given data simulated based on the null model. Returns the p-value
diff --git a/diffxpy/unit_test/test_continuous_de.py b/diffxpy/unit_test/test_continuous_de.py
index b7acacf..023d3cc 100644
--- a/diffxpy/unit_test/test_continuous_de.py
+++ b/diffxpy/unit_test/test_continuous_de.py
@@ -11,8 +11,8 @@ class _TestContinuousDe:
     def _test_wald_de(
             self,
             constrained: bool,
-            nobs: int = 2000,
-            ngenes: int = 100
+            spline_basis: str,
+            ngenes: int
     ):
         if self.noise_model == "nb":
             from batchglm.api.models.glm_nb import Simulator
@@ -25,10 +25,11 @@ def _test_wald_de(
         else:
             raise ValueError("noise model %s not recognized" % self.noise_model)
 
-        sim = Simulator(num_observations=nobs, num_features=ngenes)
+        n_timepoints = 7
+        sim = Simulator(num_observations=n_timepoints*200, num_features=ngenes)
         sim.generate_sample_description(
             num_batches=0,
-            num_conditions=5
+            num_conditions=n_timepoints
         )
         sim.generate_params(
             rand_fn_loc=rand_fn_loc,
@@ -41,7 +42,6 @@ def _test_wald_de(
         sim.generate_data()
 
         random_sample_description = sim.sample_description
-        random_sample_description["continuous_categ"] = random_sample_description["condition"]
         random_sample_description["continuous"] = [int(x) for x in random_sample_description["condition"]]
         random_sample_description["batch"] = [
             str(int(x)) + str(np.random.randint(0, 3))
@@ -56,8 +56,9 @@ def _test_wald_de(
             formula_scale="~ 1",
             factor_loc_totest="continuous",
             continuous="continuous",
-            constraints_loc={"batch": "continuous_categ"} if constrained else None,
+            constraints_loc={"batch": "continuous"} if constrained else None,
             df=5,
+            spline_basis=spline_basis,
             test="wald",
             quick_scale=True,
             noise_model=self.noise_model
@@ -84,6 +85,14 @@ def _eval(self, sim, test):
 
         return sim
 
+    def _test_wald_de_all_splines(
+            self,
+            ngenes: int,
+            constrained: bool
+    ):
+        for x in ["bs", "cr", "cc"]:
+            self._test_wald_de(ngenes=ngenes, constrained=constrained, spline_basis=x)
+
 
 class TestContinuousDeNb(_TestContinuousDe, unittest.TestCase):
     """
@@ -101,8 +110,8 @@ def test_wald_de_nb(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-        self._test_wald_de(constrained=False)
-        self._test_wald_de(constrained=True)
+        self._test_wald_de_all_splines(ngenes=100, constrained=False)
+        self._test_wald_de_all_splines(ngenes=100, constrained=True)
         return True
 
 
@@ -122,8 +131,8 @@ def test_wald_de_norm(self):
 
         self.noise_model = "norm"
         np.random.seed(1)
-        self._test_wald_de(constrained=False)
-        self._test_wald_de(constrained=True)
+        self._test_wald_de_all_splines(ngenes=100, constrained=False)
+        self._test_wald_de_all_splines(ngenes=100, constrained=True)
         return True
 
 
diff --git a/diffxpy/unit_test/test_continuous_null.py b/diffxpy/unit_test/test_continuous_null.py
index 9d54f89..8e9c122 100644
--- a/diffxpy/unit_test/test_continuous_null.py
+++ b/diffxpy/unit_test/test_continuous_null.py
@@ -18,7 +18,8 @@ def _fit_continuous(
         sim,
         sample_description,
         constrained,
-        test
+        test,
+        spline_basis
     ):
         test = de.test.continuous_1d(
             data=sim.input_data,
@@ -31,41 +32,69 @@ def _fit_continuous(
             constraints_loc={"batch": "continuous"} if constrained else None,
             size_factors="size_factors",
             df=3,
+            spline_basis=spline_basis,
             test=test,
             quick_scale=True,
             noise_model=self.noise_model
         )
         return test
 
-    def _test_null_model(
+    def _test_basic(
             self,
-            nobs: int,
             ngenes: int,
             test: str,
-            constrained: bool
+            constrained: bool,
+            spline_basis: str
     ):
-        sim = Simulator(num_observations=nobs, num_features=ngenes)
+        n_timepoints = 5
+        sim = Simulator(num_observations=n_timepoints*200, num_features=ngenes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
         sim.generate_params()
         sim.generate_data()
 
         random_sample_description = pd.DataFrame({
-            "continuous": np.asarray(np.random.randint(0, 5, size=sim.nobs), dtype=float)
+            "continuous": np.asarray(np.random.randint(0, n_timepoints, size=sim.nobs), dtype=float)
         })
         random_sample_description["batch"] = [str(int(x)) + str(np.random.randint(0, 3))
                                               for x in random_sample_description["continuous"]]
-        random_sample_description["size_factors"] = np.random.uniform(0.9, 1.1, sim.nobs)
-        return self._fit_continuous(
+        random_sample_description["size_factors"] = np.random.uniform(0.9, 1.1, sim.nobs)  # TODO put into simulation.
+        det = self._fit_continuous(
             sim=sim,
             sample_description=random_sample_description,
             test=test,
-            constrained=constrained
+            constrained=constrained,
+            spline_basis=spline_basis,
+        )
+        return det
+
+    def _test_null_model(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool,
+            spline_basis: str
+    ):
+        det = self._test_basic(
+            ngenes=ngenes,
+            test=test,
+            constrained=constrained,
+            spline_basis=spline_basis
         )
+        return self._eval(det=det)
+
+    def _eval(self, det):
+        pval_h0 = stats.kstest(det.pval, 'uniform').pvalue
+        logging.getLogger("diffxpy").info(
+            'KS-test pvalue for null model match of wald(): %f' % pval_h0
+        )
+        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05" % pval_h0
+        return True
 
     def _test_forfatal(
             self,
             test: str,
-            constrained: bool
+            constrained: bool,
+            spline_basis: str
     ):
         """
         Test if de.test.continuous() DifferentialExpressionTestSingle object functions work fine.
@@ -74,11 +103,11 @@ def _test_forfatal(
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
-        test = self._test_null_model(
-            nobs=500,
+        test = self._test_basic(
             ngenes=2,
             test=test,
-            constrained=constrained
+            constrained=constrained,
+            spline_basis=spline_basis
         )
         ids = test.gene_ids
 
@@ -99,6 +128,23 @@ def _test_forfatal(
         _ = test.summary(non_numeric=True)
         return True
 
+    def _test_for_fatal_all_splines(
+            self,
+            test: str,
+            constrained: bool
+    ):
+        for x in ["bs", "cr", "cc"]:
+            self._test_forfatal(test=test, constrained=constrained, spline_basis=x)
+
+    def _test_null_model_all_splines(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool
+    ):
+        for x in ["bs", "cr", "cc"]:
+            self._test_null_model(ngenes=ngenes, test=test, constrained=constrained, spline_basis=x)
+
 
 class TestContinuousNb(_TestContinuous, unittest.TestCase):
 
@@ -118,11 +164,11 @@ def test_forfatal_wald(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-        _ = self._test_forfatal(test="wald", constrained=False)
-        _ = self._test_forfatal(test="wald", constrained=True)
+        self._test_for_fatal_all_splines(test="wald", constrained=False)
+        self._test_for_fatal_all_splines(test="wald", constrained=True)
         return True
 
-    def test_forfatal_lrt(self):
+    def _test_forfatal_lrt(self):
         """
         Test if de.test.continuous() generates a uniform p-value distribution in the wald test
         if it is given data simulated based on the null model. Returns the p-value
@@ -157,14 +203,7 @@ def test_null_distribution_wald_unconstrained(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-
-        test = self._test_null_model(nobs=2000, ngenes=100, test="wald", constrained=False)
-
-        # Compare p-value distribution under null model against uniform distribution.
-        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
-
-        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05" % pval_h0
+        self._test_null_model_all_splines(ngenes=100, test="wald", constrained=False)
         return True
 
     def test_null_distribution_wald_constrained(self):
@@ -183,17 +222,10 @@ def test_null_distribution_wald_constrained(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-
-        test = self._test_null_model(nobs=2000, ngenes=100, test="wald", constrained=True)
-
-        # Compare p-value distribution under null model against uniform distribution.
-        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
-
-        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
+        self._test_null_model_all_splines(ngenes=100, test="wald", constrained=True)
         return True
 
-    def test_null_distribution_lrt(self):
+    def _test_null_distribution_lrt(self):
         """
         Test if de.test.continuous() generates a uniform p-value distribution in lrt
         if it is given data simulated based on the null model. Returns the p-value
diff --git a/diffxpy/unit_test/test_single_fullrank.py b/diffxpy/unit_test/test_single_fullrank.py
new file mode 100644
index 0000000..785d3d7
--- /dev/null
+++ b/diffxpy/unit_test/test_single_fullrank.py
@@ -0,0 +1,85 @@
+import unittest
+import logging
+import numpy as np
+import pandas as pd
+import scipy.stats as stats
+
+import diffxpy.api as de
+
+
+class _TestSingleFullRank(unittest.TestCase):
+
+    def _test_single_full_rank(self):
+        """
+        Test if de.wald() generates a uniform p-value distribution
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed 
+        p-value distribution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        :param noise_model: Noise model to use for data fitting.
+        """
+        if self.noise_model == "nb":
+            from batchglm.api.models.glm_nb import Simulator
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        elif self.noise_model == "norm":
+            from batchglm.api.models.glm_norm import Simulator
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        else:
+            raise ValueError("noise model %s not recognized" % self.noise_model)
+
+        sim = Simulator(num_observations=200, num_features=2)
+        sim.generate_sample_description(num_batches=0, num_conditions=0)
+        sim.generate_params(rand_fn_scale=rand_fn_scale)
+        sim.generate_data()
+
+        random_sample_description = pd.DataFrame({
+            "condition": [str(x) for x in np.random.randint(2, size=sim.nobs)]
+        })
+
+        try:
+            random_sample_description["batch"] = random_sample_description["condition"]
+            _ = de.test.wald(
+                data=sim.input_data,
+                sample_description=random_sample_description,
+                factor_loc_totest="condition",
+                formula_loc="~ 1 + condition + batch",
+                noise_model=self.noise_model
+            )
+        except ValueError as error:
+            logging.getLogger("diffxpy").info(error)
+        else:
+            raise ValueError("rank error was erroneously not thrown on under-determined unconstrained system")
+
+        try:
+            random_sample_description["batch"] = [
+                x + str(np.random.randint(0, 2)) for x in random_sample_description["condition"].values
+            ]
+            _ = de.test.wald(
+                data=sim.input_data,
+                sample_description=random_sample_description,
+                factor_loc_totest="condition",
+                formula_loc="~ 1 + condition + batch",
+                constraints_loc={"batch": "condition"},
+                noise_model=self.noise_model
+            )
+        except ValueError as error:
+            raise ValueError("rank error was erroneously thrown on defined constrained system")
+
+    def test_single_full_rank(
+            self):
+        """
+        Test if error is thrown if input is not full rank.
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.INFO)
+
+        np.random.seed(1)
+        self.noise_model = "nb"
+        return self._test_single_full_rank()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/diffxpy/unit_test/test_single_null.py b/diffxpy/unit_test/test_single_null.py
index 3c9834c..6c1b801 100644
--- a/diffxpy/unit_test/test_single_null.py
+++ b/diffxpy/unit_test/test_single_null.py
@@ -49,10 +49,7 @@ def _test_null_distribution_wald(
             sample_description=random_sample_description,
             factor_loc_totest="condition",
             formula_loc="~ 1 + condition + batch",
-            batch_size=500,
-            noise_model=noise_model,
-            training_strategy="DEFAULT",
-            dtype="float64"
+            noise_model=noise_model
         )
         _ = test.summary()
 
@@ -100,9 +97,7 @@ def _test_null_distribution_wald_multi(
             sample_description=random_sample_description,
             factor_loc_totest="condition",
             formula_loc="~ 1 + condition",
-            noise_model=noise_model,
-            training_strategy="DEFAULT",
-            dtype="float64"
+            noise_model=noise_model
         )
         _ = test.summary()
 
@@ -152,9 +147,7 @@ def _test_null_distribution_lrt(
             full_formula_scale="~ 1",
             reduced_formula_loc="~ 1",
             reduced_formula_scale="~ 1",
-            noise_model=noise_model,
-            training_strategy="DEFAULT",
-            dtype="float64"
+            noise_model=noise_model
         )
         _ = test.summary()
 

From 9797135a0b48a2d4557f1dd1983e0972129f59e2 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Tue, 3 Sep 2019 16:51:06 -0700
Subject: [PATCH 16/25] added unconstrained fitting of continuous models with
 interactions

---
 diffxpy/testing/tests.py                  | 27 ++++++--
 diffxpy/unit_test/test_continuous_null.py | 81 ++++++++++++++++++++++-
 2 files changed, 100 insertions(+), 8 deletions(-)

diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 57776b9..6dd708f 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -595,6 +595,7 @@ def wald(
                 np.arange(design_loc.shape[-1])[design_loc.design_info.slice(x)]
                 for x in factor_loc_totest
             ])
+        print(col_indices)
         assert len(col_indices) > 0, "Could not find any matching columns!"
         if coef_to_test is not None:
             if len(factor_loc_totest) > 1:
@@ -1989,7 +1990,7 @@ def continuous_1d(
     for x in spline_basis.columns:
         sample_description[x] = spline_basis[x].values
 
-    # Add spline basis to continuous covariate list
+    # Add spline basis to continuous covariates list
     as_numeric.extend(new_coefs)
 
     if test.lower() == 'wald':
@@ -1999,23 +2000,34 @@ def continuous_1d(
         # Adjust factors / coefficients to test:
         # Note that the continuous covariate does not necessarily have to be tested,
         # it could also be a condition effect or similar.
-        # TODO handle interactions
         if continuous in factor_loc_totest:
             # Create reduced set of factors to test which does not contain continuous:
-            factor_loc_totest_new = [x for x in factor_loc_totest if x != continuous]
+            factor_loc_totest_intermediate = [x for x in factor_loc_totest if x != continuous]
             # Add spline basis terms in instead of continuous term:
-            factor_loc_totest_new.extend(new_coefs)
+            factor_loc_totest_intermediate.extend(new_coefs)
         else:
-            factor_loc_totest_new = factor_loc_totest
+            factor_loc_totest_intermediate = factor_loc_totest
+        # Replace continuous factor in interaction terms with new spline factors.
+        factor_loc_totest_final = []
+        for i, x in enumerate(factor_loc_totest_intermediate):
+            if len(x.split(":")) > 1:
+                if np.any([x == continuous for x in x.split(":")]):
+                    interaction_partner = [y for y in x.split(":") if y != continuous][0]
+                    for y in new_coefs:
+                        factor_loc_totest_final.append(y+":"+interaction_partner)
+                else:
+                    factor_loc_totest_final.append(x)
+            else:
+                factor_loc_totest_final.append(x)
 
         logging.getLogger("diffxpy").debug("model formulas assembled in de.test.continuos():")
-        logging.getLogger("diffxpy").debug("factor_loc_totest_new: " + ",".join(factor_loc_totest_new))
+        logging.getLogger("diffxpy").debug("factor_loc_totest_final: " + ",".join(factor_loc_totest_final))
         logging.getLogger("diffxpy").debug("formula_loc_new: " + formula_loc_new)
         logging.getLogger("diffxpy").debug("formula_scale_new: " + formula_scale_new)
 
         de_test = wald(
             data=data,
-            factor_loc_totest=factor_loc_totest_new,
+            factor_loc_totest=factor_loc_totest_final,
             coef_to_test=None,
             formula_loc=formula_loc_new,
             formula_scale=formula_scale_new,
@@ -2034,6 +2046,7 @@ def continuous_1d(
             dtype=dtype,
             **kwargs
         )
+        print(de_test.model_estim.input_data.loc_names)
         de_test = DifferentialExpressionTestWaldCont(
             de_test=de_test,
             noise_model=noise_model,
diff --git a/diffxpy/unit_test/test_continuous_null.py b/diffxpy/unit_test/test_continuous_null.py
index 8e9c122..690ea92 100644
--- a/diffxpy/unit_test/test_continuous_null.py
+++ b/diffxpy/unit_test/test_continuous_null.py
@@ -39,6 +39,31 @@ def _fit_continuous(
         )
         return test
 
+    def _fit_continuous_interaction(
+        self,
+        sim,
+        sample_description,
+        constrained,
+        test,
+        spline_basis
+    ):
+        test = de.test.continuous_1d(
+            data=sim.input_data,
+            sample_description=sample_description,
+            gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
+            formula_loc="~ 1 + continuous + batch + continuous:batch",
+            formula_scale="~ 1",
+            factor_loc_totest=["continuous", "continuous:batch"],
+            continuous="continuous",
+            size_factors="size_factors",
+            df=3,
+            spline_basis=spline_basis,
+            test=test,
+            quick_scale=True,
+            noise_model=self.noise_model
+        )
+        return test
+
     def _test_basic(
             self,
             ngenes: int,
@@ -67,6 +92,34 @@ def _test_basic(
         )
         return det
 
+    def _test_interaction(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool,
+            spline_basis: str
+    ):
+        n_timepoints = 5
+        sim = Simulator(num_observations=n_timepoints*200, num_features=ngenes)
+        sim.generate_sample_description(num_batches=0, num_conditions=0)
+        sim.generate_params()
+        sim.generate_data()
+
+        random_sample_description = pd.DataFrame({
+            "continuous": np.asarray(np.random.randint(0, n_timepoints, size=sim.nobs), dtype=float)
+        })
+        random_sample_description["batch"] = [str(np.random.randint(0, 3))
+                                              for x in random_sample_description["continuous"]]
+        random_sample_description["size_factors"] = np.random.uniform(0.9, 1.1, sim.nobs)  # TODO put into simulation.
+        det = self._fit_continuous_interaction(
+            sim=sim,
+            sample_description=random_sample_description,
+            test=test,
+            constrained=constrained,
+            spline_basis=spline_basis,
+        )
+        return det
+
     def _test_null_model(
             self,
             ngenes: int,
@@ -82,6 +135,21 @@ def _test_null_model(
         )
         return self._eval(det=det)
 
+    def _test_null_model_interaction(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool,
+            spline_basis: str
+    ):
+        det = self._test_interaction(
+            ngenes=ngenes,
+            test=test,
+            constrained=constrained,
+            spline_basis=spline_basis
+        )
+        return self._eval(det=det)
+
     def _eval(self, det):
         pval_h0 = stats.kstest(det.pval, 'uniform').pvalue
         logging.getLogger("diffxpy").info(
@@ -145,6 +213,15 @@ def _test_null_model_all_splines(
         for x in ["bs", "cr", "cc"]:
             self._test_null_model(ngenes=ngenes, test=test, constrained=constrained, spline_basis=x)
 
+    def _test_null_model_all_splines_interaction(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool
+    ):
+        for x in ["bs", "cr", "cc"]:
+            self._test_null_model_interaction(ngenes=ngenes, test=test, constrained=constrained, spline_basis=x)
+
 
 class TestContinuousNb(_TestContinuous, unittest.TestCase):
 
@@ -203,7 +280,8 @@ def test_null_distribution_wald_unconstrained(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-        self._test_null_model_all_splines(ngenes=100, test="wald", constrained=False)
+        #self._test_null_model_all_splines(ngenes=100, test="wald", constrained=False)
+        self._test_null_model_all_splines_interaction(ngenes=100, test="wald", constrained=False)
         return True
 
     def test_null_distribution_wald_constrained(self):
@@ -223,6 +301,7 @@ def test_null_distribution_wald_constrained(self):
         self.noise_model = "nb"
         np.random.seed(1)
         self._test_null_model_all_splines(ngenes=100, test="wald", constrained=True)
+        # Interaction not supported yet.
         return True
 
     def _test_null_distribution_lrt(self):

From 03eb895dccd969fe5c13e64218f184e36595e104 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Tue, 3 Sep 2019 16:56:54 -0700
Subject: [PATCH 17/25] added support for constraints in continuous models with
 interactions

---
 diffxpy/testing/tests.py                  |  2 --
 diffxpy/unit_test/test_continuous_null.py | 16 ++++++++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 6dd708f..571bfed 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -595,7 +595,6 @@ def wald(
                 np.arange(design_loc.shape[-1])[design_loc.design_info.slice(x)]
                 for x in factor_loc_totest
             ])
-        print(col_indices)
         assert len(col_indices) > 0, "Could not find any matching columns!"
         if coef_to_test is not None:
             if len(factor_loc_totest) > 1:
@@ -2046,7 +2045,6 @@ def continuous_1d(
             dtype=dtype,
             **kwargs
         )
-        print(de_test.model_estim.input_data.loc_names)
         de_test = DifferentialExpressionTestWaldCont(
             de_test=de_test,
             noise_model=noise_model,
diff --git a/diffxpy/unit_test/test_continuous_null.py b/diffxpy/unit_test/test_continuous_null.py
index 690ea92..c3c82fe 100644
--- a/diffxpy/unit_test/test_continuous_null.py
+++ b/diffxpy/unit_test/test_continuous_null.py
@@ -51,10 +51,12 @@ def _fit_continuous_interaction(
             data=sim.input_data,
             sample_description=sample_description,
             gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
-            formula_loc="~ 1 + continuous + batch + continuous:batch",
+            formula_loc="~ 1 + continuous + condition + continuous:condition" if not constrained else \
+                "~ 1 + continuous + condition + continuous:condition + batch",
             formula_scale="~ 1",
-            factor_loc_totest=["continuous", "continuous:batch"],
+            factor_loc_totest=["continuous", "continuous:condition"],
             continuous="continuous",
+            constraints_loc={"batch": "condition"} if constrained else None,
             size_factors="size_factors",
             df=3,
             spline_basis=spline_basis,
@@ -108,8 +110,10 @@ def _test_interaction(
         random_sample_description = pd.DataFrame({
             "continuous": np.asarray(np.random.randint(0, n_timepoints, size=sim.nobs), dtype=float)
         })
-        random_sample_description["batch"] = [str(np.random.randint(0, 3))
-                                              for x in random_sample_description["continuous"]]
+        random_sample_description["condition"] = [str(np.random.randint(0, 2))
+                                                  for x in random_sample_description["continuous"]]
+        random_sample_description["batch"] = [x + str(np.random.randint(0, 3))
+                                              for x in random_sample_description["condition"]]
         random_sample_description["size_factors"] = np.random.uniform(0.9, 1.1, sim.nobs)  # TODO put into simulation.
         det = self._fit_continuous_interaction(
             sim=sim,
@@ -280,7 +284,7 @@ def test_null_distribution_wald_unconstrained(self):
 
         self.noise_model = "nb"
         np.random.seed(1)
-        #self._test_null_model_all_splines(ngenes=100, test="wald", constrained=False)
+        self._test_null_model_all_splines(ngenes=100, test="wald", constrained=False)
         self._test_null_model_all_splines_interaction(ngenes=100, test="wald", constrained=False)
         return True
 
@@ -301,7 +305,7 @@ def test_null_distribution_wald_constrained(self):
         self.noise_model = "nb"
         np.random.seed(1)
         self._test_null_model_all_splines(ngenes=100, test="wald", constrained=True)
-        # Interaction not supported yet.
+        self._test_null_model_all_splines_interaction(ngenes=100, test="wald", constrained=True)
         return True
 
     def _test_null_distribution_lrt(self):

From cfdea00e70ef0417f51af59c8c7d9f06da990a76 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Tue, 3 Sep 2019 17:14:28 -0700
Subject: [PATCH 18/25] incremented batchglm version requirement to 0.6.8

---
 docs/requires.txt | 2 +-
 requirements.txt  | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/requires.txt b/docs/requires.txt
index 90179be..27cbd57 100644
--- a/docs/requires.txt
+++ b/docs/requires.txt
@@ -2,7 +2,7 @@ numpy>=1.14.0
 scipy
 pandas
 patsy>=0.5.0
-batchglm>=0.6.7
+batchglm>=0.6.8
 statsmodels
 anndata
 seaborn
diff --git a/requirements.txt b/requirements.txt
index 90179be..27cbd57 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy>=1.14.0
 scipy
 pandas
 patsy>=0.5.0
-batchglm>=0.6.7
+batchglm>=0.6.8
 statsmodels
 anndata
 seaborn
diff --git a/setup.py b/setup.py
index b98360e..ea3775b 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
         'scipy>=1.2.1',
         'pandas',
         'patsy>=0.5.0',
-        'batchglm>=0.6.7',
+        'batchglm>=0.6.8',
         'statsmodels',
     ],
     extras_require={

From 9a6985de2484ce12ec8a7ef92600a17ee0cf08b7 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Tue, 3 Sep 2019 18:44:37 -0700
Subject: [PATCH 19/25] added basic condition wise plotting

---
 diffxpy/testing/det_cont.py | 72 +++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/diffxpy/testing/det_cont.py b/diffxpy/testing/det_cont.py
index 41c67f5..2db6604 100644
--- a/diffxpy/testing/det_cont.py
+++ b/diffxpy/testing/det_cont.py
@@ -110,12 +110,12 @@ def log_fold_change(self, base=np.e, genes=None, non_numeric=False):
         :return: Log-fold change of fitted expression value by gene.
         """
         if genes is None:
-            genes = np.asarray(range(self.x.shape[1]))
+            idx = np.arange(0, self.x.shape[1])
         else:
-            genes = self._idx_genes(genes)
+            idx, genes = self._idx_genes(genes)
 
-        max_val = self.max(genes=genes, non_numeric=non_numeric)
-        min_val = self.min(genes=genes, non_numeric=non_numeric)
+        max_val = self.max(genes=idx, non_numeric=non_numeric)
+        min_val = self.min(genes=idx, non_numeric=non_numeric)
         max_val = np.nextafter(0, 1, out=max_val, where=max_val == 0)
         min_val = np.nextafter(0, 1, out=min_val, where=min_val == 0)
         return (np.log(max_val) - np.log(min_val)) / np.log(base)
@@ -179,12 +179,14 @@ def _idx_genes(self, genes):
 
         if isinstance(genes[0], str):
             genes = self._filter_genes_str(genes)
-            genes = np.array([self.gene_ids.tolist().index(x) for x in genes])
-        elif isinstance(genes[0], int) or isinstance(genes[0], np.int64):
+            idx = np.array([self.gene_ids.tolist().index(x) for x in genes])
+        elif isinstance(genes[0], int) or isinstance(genes[0], np.number):
             genes = self._filter_genes_int(genes)
+            idx = genes
+            genes = [self.gene_ids.tolist()[x] for x in idx]
         else:
             raise ValueError("only string and integer elements allowed in genes")
-        return genes
+        return idx, genes
 
     def _spline_par_loc_idx(self, intercept=True):
         """
@@ -245,9 +247,9 @@ def max(self, genes, non_numeric=False):
         :param non_numeric: Whether to include non-numeric covariates in fit.
         :return: Maximum fitted expression value by gene.
         """
-        genes = self._idx_genes(genes)
+        idx, genes = self._idx_genes(genes)
         return np.array([np.max(self._continuous_model(idx=i, non_numeric=non_numeric))
-                         for i in genes])
+                         for i in idx])
 
     def min(self, genes, non_numeric=False):
         """
@@ -257,9 +259,9 @@ def min(self, genes, non_numeric=False):
         :param non_numeric: Whether to include non-numeric covariates in fit.
         :return: Maximum fitted expression value by gene.
         """
-        genes = self._idx_genes(genes)
+        idx, genes = self._idx_genes(genes)
         return np.array([np.min(self._continuous_model(idx=i, non_numeric=non_numeric))
-                         for i in genes])
+                         for i in idx])
 
     def argmax(self, genes, non_numeric=False):
         """
@@ -269,9 +271,9 @@ def argmax(self, genes, non_numeric=False):
         :param non_numeric: Whether to include non-numeric covariates in fit.
         :return: Maximum fitted expression value by gene.
         """
-        genes = self._idx_genes(genes)
+        idx, genes = self._idx_genes(genes)
         idx = np.array([np.argmax(self._continuous_model(idx=i, non_numeric=non_numeric))
-                        for i in genes])
+                        for i in idx])
         return self._continuous_coords[idx]
 
     def argmin(self, genes, non_numeric=False):
@@ -282,15 +284,16 @@ def argmin(self, genes, non_numeric=False):
         :param non_numeric: Whether to include non-numeric covariates in fit.
         :return: Maximum fitted expression value by gene.
         """
-        genes = self._idx_genes(genes)
+        idx, genes = self._idx_genes(genes)
         idx = np.array([np.argmin(self._continuous_model(idx=i, non_numeric=non_numeric))
-                        for i in genes])
+                        for i in idx])
         return self._continuous_coords[idx]
 
     def plot_genes(
             self,
             genes,
             hue=None,
+            scalings=None,
             size=1,
             log=True,
             save=None,
@@ -304,7 +307,8 @@ def plot_genes(
         Plot observed data and spline fits of selected genes.
 
         :param genes: Gene IDs to plot.
-        :param hue: Confounder to include in plot.
+        :param hue: Confounder to include in plot. Must be length number of observations.
+        :param scalings: Names of scaling coefficients to plot separate model curves for.
         :param size: Point size.
         :param log: Whether to log values.
         :param save: Path+file name stem to save plots to.
@@ -324,7 +328,7 @@ def plot_genes(
 
         plt.ioff()
 
-        gene_idx = self._idx_genes(genes)
+        gene_idx, gene_ids = self._idx_genes(genes)
 
         # Set up gridspec.
         ncols = ncols if len(gene_idx) > ncols else len(gene_idx)
@@ -354,6 +358,19 @@ def plot_genes(
             if isinstance(y, scipy.sparse.csr_matrix):
                 y = np.asarray(y.todense()).flatten()
             t_continuous, yhat = self._continuous_interpolation(idx=g)
+            if scalings is not None:
+                yhat = np.vstack([
+                    [yhat],
+                    [
+                        yhat * np.expand_dims(
+                            np.exp(self._model_estim.a_var[self._model_estim.input_data.loc_names.index(x), g]),
+                            axis=0
+                        )
+                        for i, x in enumerate(scalings)
+                    ]
+                ])
+            else:
+                yhat = np.expand_dims(yhat, axis=0)
             if log:
                 y = np.log(y + 1)
                 yhat = np.log(yhat + 1)
@@ -366,14 +383,15 @@ def plot_genes(
                 ax=ax,
                 legend=False
             )
-            sns.lineplot(
-                x=t_continuous,
-                y=yhat,
-                hue=hue,
-                ax=ax
-            )
-
-            ax.set_title(genes[i])
+            for j in range(yhat.shape[0]):
+                sns.lineplot(
+                    x=t_continuous,
+                    y=yhat[j, :],
+                    hue=None,
+                    ax=ax
+                )
+
+            ax.set_title(gene_ids[i])
             ax.set_xlabel("continuous")
             if log:
                 ax.set_ylabel("log expression")
@@ -426,7 +444,7 @@ def plot_heatmap(
 
         plt.ioff()
 
-        gene_idx = self._idx_genes(genes)
+        gene_idx, gene_ids = self._idx_genes(genes)
 
         # Define figure.
         fig = plt.figure(figsize=(width, height_per_gene * len(gene_idx)))
@@ -465,7 +483,7 @@ def plot_heatmap(
         ax.set_xticks(xtick_pos)
         ax.set_xticklabels(xtick_lab)
         ax.set_xlabel("continuous")
-        plt.yticks(np.arange(len(genes)), genes, rotation='horizontal')
+        plt.yticks(np.arange(len(genes)), gene_ids, rotation='horizontal')
         ax.set_ylabel("genes")
 
         # Save, show and return figure.

From 12c66674d1c16ac132c8402bc571890a2a134a62 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Tue, 3 Sep 2019 19:11:09 -0700
Subject: [PATCH 20/25] added missing interactive plotting back-on statement in
 plotting of continuous models

---
 diffxpy/testing/det_cont.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/diffxpy/testing/det_cont.py b/diffxpy/testing/det_cont.py
index 2db6604..ca6d221 100644
--- a/diffxpy/testing/det_cont.py
+++ b/diffxpy/testing/det_cont.py
@@ -357,6 +357,8 @@ def plot_genes(
             y = self.x[:, g]
             if isinstance(y, scipy.sparse.csr_matrix):
                 y = np.asarray(y.todense()).flatten()
+                if self._model_estim.input_data.size_factors is not None:
+                    y = y / self._model_estim.input_data.size_factors
             t_continuous, yhat = self._continuous_interpolation(idx=g)
             if scalings is not None:
                 yhat = np.vstack([
@@ -406,6 +408,7 @@ def plot_genes(
             plt.show()
 
         plt.close(fig)
+        plt.ion()
 
         if return_axs:
             return axs
@@ -494,6 +497,7 @@ def plot_heatmap(
             plt.show()
 
         plt.close(fig)
+        plt.ion()
 
         if return_axs:
             return ax

From 9b3ea33952fe8142f203e9ebd840d747e502fe21 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 30 Oct 2019 22:40:09 -0700
Subject: [PATCH 21/25] batchglm numpy backend support functional from diffxpy

---
 diffxpy/models/__init__.py                    |   0
 diffxpy/models/batch_bfgs/__init__.py         |   2 -
 diffxpy/models/batch_bfgs/objectives.py       |  50 ----
 diffxpy/models/batch_bfgs/optim.py            | 231 ----------------
 diffxpy/models/hessian.py                     | 256 ------------------
 diffxpy/testing/det.py                        |   7 +-
 diffxpy/testing/tests.py                      | 116 ++++----
 diffxpy/unit_test/test_constrained.py         |   2 +-
 diffxpy/unit_test/test_continuous_de.py       |   4 +-
 diffxpy/unit_test/test_continuous_null.py     |   2 +-
 diffxpy/unit_test/test_correction.py          |   8 -
 diffxpy/unit_test/test_data_types.py          |   2 +-
 diffxpy/unit_test/test_enrich.py              |   5 +-
 diffxpy/unit_test/test_extreme_values.py      |   2 +-
 diffxpy/unit_test/test_fit.py                 |  12 +-
 diffxpy/unit_test/test_numeric_covar.py       |   2 +-
 diffxpy/unit_test/test_pairwise_null.py       |   4 +-
 diffxpy/unit_test/test_partition.py           |   2 +-
 diffxpy/unit_test/test_single_de.py           |   8 +-
 .../unit_test/test_single_external_libs.py    |   2 +-
 diffxpy/unit_test/test_single_fullrank.py     |   5 +-
 diffxpy/unit_test/test_single_null.py         |  16 +-
 diffxpy/unit_test/test_single_sf_null.py      |   4 +-
 diffxpy/unit_test/test_vsrest.py              |   8 +-
 24 files changed, 88 insertions(+), 662 deletions(-)
 delete mode 100644 diffxpy/models/__init__.py
 delete mode 100644 diffxpy/models/batch_bfgs/__init__.py
 delete mode 100644 diffxpy/models/batch_bfgs/objectives.py
 delete mode 100644 diffxpy/models/batch_bfgs/optim.py
 delete mode 100644 diffxpy/models/hessian.py

diff --git a/diffxpy/models/__init__.py b/diffxpy/models/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/diffxpy/models/batch_bfgs/__init__.py b/diffxpy/models/batch_bfgs/__init__.py
deleted file mode 100644
index a5389bb..0000000
--- a/diffxpy/models/batch_bfgs/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-
-__version__ = "0.1"
diff --git a/diffxpy/models/batch_bfgs/objectives.py b/diffxpy/models/batch_bfgs/objectives.py
deleted file mode 100644
index ee47b1e..0000000
--- a/diffxpy/models/batch_bfgs/objectives.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import numpy as np
-import numpy.random
-import scipy.stats
-
-
-def clip_theta_mu(theta):
-    return np.maximum(np.minimum(theta, np.zeros(len(theta)) + 1e8), np.zeros(len(theta)) - 1e8)
-
-
-def clip_theta_disp(theta):
-    return np.maximum(np.minimum(theta, np.zeros(len(theta)) + 1e8), np.zeros(len(theta)) - 1e8)
-
-
-def nb_glm_linker_mu(theta, X, lib_size):
-    return np.exp(np.asarray(np.dot(X, np.asarray(clip_theta_mu(theta)).T)).flatten() + lib_size)
-
-
-def nb_glm_linker_disp(theta, X):
-    return np.asarray(np.exp(np.dot(X, np.asarray(clip_theta_disp(theta)).T))).flatten()
-
-
-def ll_nb(x, mu, disp):
-    # Re-parameterize.
-    variance = np.maximum(mu + np.square(mu) / disp, np.zeros(len(mu)) + 1e-8)
-    p = 1 - (mu / variance)
-    return scipy.stats.nbinom(n=disp, p=1 - p).logpmf(x)
-
-
-def objective_ll(x, theta_mu, theta_disp, design_loc, design_scale, lib_size):
-    return -ll_nb(x=x, mu=nb_glm_linker_mu(theta_mu, design_loc, lib_size),
-                  disp=nb_glm_linker_disp(theta_disp, design_scale))
-
-
-def objective(theta, x, design_loc, design_scale, lib_size, batch_size=100):
-    if batch_size is None:
-        J = np.sum(objective_ll(x=x,
-                                theta_mu=np.asarray(theta)[:design_loc.shape[1]],
-                                theta_disp=np.asarray(theta)[design_loc.shape[1]:],
-                                design_loc=design_loc,
-                                design_scale=design_scale,
-                                lib_size=lib_size))
-    else:
-        batch_idx = numpy.random.randint(low=0, high=x.shape[0], size=(batch_size))
-        J = np.sum(objective_ll(x=x[batch_idx],
-                                theta_mu=np.asarray(theta)[:design_loc.shape[1]],
-                                theta_disp=np.asarray(theta)[design_loc.shape[1]:],
-                                design_loc=design_loc[batch_idx, :],
-                                design_scale=design_scale[batch_idx, :],
-                                lib_size=lib_size[batch_idx]))
-    return J
diff --git a/diffxpy/models/batch_bfgs/optim.py b/diffxpy/models/batch_bfgs/optim.py
deleted file mode 100644
index a593aed..0000000
--- a/diffxpy/models/batch_bfgs/optim.py
+++ /dev/null
@@ -1,231 +0,0 @@
-import logging
-
-from scipy.optimize import minimize
-from scipy.sparse import csr_matrix
-import numpy as np
-from numpy.linalg import pinv
-import numpy.random
-from multiprocessing import Pool
-
-from .objectives import *
-
-logger = logging.getLogger(__name__)
-
-
-class Estim_BFGS_Model():
-
-    def __init__(self, Estim_BFGS, nproc):
-        self._num_observations = Estim_BFGS.x.shape[0]
-        self._num_features = Estim_BFGS.x.shape[1]
-        self._features = Estim_BFGS.feature_names
-        self._observations = Estim_BFGS.x.shape[0]
-        self._design_loc = Estim_BFGS.design_loc
-        self._design_scale = Estim_BFGS.design_scale
-        self._loss = Estim_BFGS.full_loss(nproc)
-        self._log_probs = -self._loss
-        self._probs = np.exp(self._log_probs)
-        self._mles = np.transpose(Estim_BFGS.mles())
-        self._gradient = np.zeros([Estim_BFGS.x.shape[1]])
-        self._fisher_inv = Estim_BFGS.fisher_inv
-        self._idx_loc = np.arange(0, Estim_BFGS.design_loc.shape[1])
-        self._idx_scale = np.arange(Estim_BFGS.design_loc.shape[1],
-                                    Estim_BFGS.design_loc.shape[1] + Estim_BFGS.design_scale.shape[1])
-        self._error_codes = Estim_BFGS.error_codes()
-        self._niter = Estim_BFGS.niter()
-        self._estim_bfgs = Estim_BFGS
-
-    @property
-    def num_observations(self) -> int:
-        return self._num_observations
-
-    @property
-    def num_features(self) -> int:
-        return self._num_features
-
-    @property
-    def features(self) -> np.ndarray:
-        return self._features
-
-    @property
-    def observations(self) -> np.ndarray:
-        return self._observations
-
-    @property
-    def design_loc(self, **kwargs):
-        return self._design_loc
-
-    @property
-    def design_scale(self, **kwargs):
-        return self._design_scale
-
-    @property
-    def probs(self):
-        return self._probs
-
-    # @property
-    def log_probs(self):
-        return self._log_probs
-
-    @property
-    def loss(self, **kwargs):
-        return self._loss
-
-    @property
-    def gradient(self, **kwargs):
-        return self._gradient
-
-    @property
-    def mles(self, **kwargs):
-        return self._mles
-
-    @property
-    def par_link_loc(self, **kwargs):
-        return self._mles[self._idx_loc, :]
-
-    @property
-    def par_link_scale(self, **kwargs):
-        return self._mles[self._idx_scale, :]
-
-    def link_loc(self, x):
-        return np.log(x)
-
-    @property
-    def fisher_inv(self):
-        return self._fisher_inv
-
-    @property
-    def error_codes(self, **kwargs):
-        return self._error_codes
-
-
-class Estim_BFGS():
-    """ Class that handles multiple parallel starts of parameter estimation on one machine.
-    """
-
-    def __init__(self, X, design_loc, design_scale, lib_size=0, batch_size=None, feature_names=None):
-        """ Constructor of ManyGenes()
-        """
-        if np.size(lib_size):
-            lib_size = np.broadcast_to(lib_size, X.shape[0])
-
-        if batch_size is not None:
-            logger.warning("Using BFGS with batching is currently not supported!")
-
-        self.X = X
-        self.design_loc = np.asarray(design_loc)
-        self.design_scale = np.asarray(design_scale)
-        self.lib_size = lib_size
-        self.batch_size = batch_size
-        self.feature_names = feature_names
-        self.__is_sparse = isinstance(X, csr_matrix)
-        self.res = None
-
-    def __init_mu_theta(self, x):
-        if self.design_loc.shape[1] > 1:
-            return np.concatenate([[np.log(np.mean(x) + 1e-08)], np.zeros([self.design_loc.shape[1] - 1])])
-            # return np.zeros([self.design_loc.shape[1]])
-        else:
-            return [np.log(np.mean(x) + 1e-08)]
-
-    def __init_disp_theta(self, x):
-        if self.design_scale.shape[1] > 1:
-            return np.zeros([self.design_scale.shape[1]])
-        else:
-            return [0]
-
-    def get_gene(self, i):
-        """
-
-        Has to be public so that it can be passed via starmap.
-        """
-        if self.__is_sparse:
-            return np.asarray(self.X[:, i].data.todense())
-        else:
-            return np.asarray(self.X[:, i].data)
-
-    def run_optim(self, x, maxiter=10000, debug=False):
-        """ Run single optimisation.
-
-        Has to be public so that it can be passed via starmap.
-
-        Parameters
-        ----------
-        """
-        x0 = np.concatenate([self.__init_mu_theta(x), self.__init_disp_theta(x)])
-
-        if debug:
-            minimize_out = minimize(
-                fun=objective,
-                x0=x0,
-                args=(x, self.design_loc, self.design_scale, self.lib_size, self.batch_size),
-                method='BFGS',
-                options={'maxiter': maxiter, 'gtol': 1e-05}
-            )
-        else:
-            try:
-                minimize_out = minimize(
-                    fun=objective,
-                    x0=x0,
-                    args=(x, self.design_loc, self.design_scale, self.lib_size, self.batch_size),
-                    method='BFGS',
-                    options={'maxiter': maxiter, 'gtol': 1e-05}
-                )
-                err = ''
-            except Exception as e:
-                minimize_out = None
-                err = e
-                print(e)
-
-        return minimize_out
-
-    def run(self, nproc=1, maxiter=10000, debug=False):
-        """ Run multiple optimisation starts
-        
-        Parameters
-        ----------
-        maxiter : int
-            Maximum number of iterations in parameter estimation.
-        nstarts : int 
-            Number of starts to perform in multi-start optimization.
-        nproc : int
-            Number of processes to use in parallelization.
-        """
-        if debug == True:
-            self.res = []  # list of FitResults instances.
-            for i in range(self.X.shape[1]):
-                self.res.append(self.run_optim(x=self.get_gene(i), maxiter=maxiter, debug=debug))
-        else:
-            with Pool(processes=nproc) as p:
-                self.res = p.starmap(
-                    self.run_optim,
-                    [(self.get_gene(i), maxiter, False) for i in range(self.X.shape[1])]
-                )
-
-    def full_loss(self, nproc=1):
-        with Pool(processes=nproc) as p:
-            loss = np.asarray(p.starmap(
-                objective,
-                [(x.x, self.get_gene(i), self.design_loc, self.design_scale, self.lib_size, None)
-                 for i, x in enumerate(self.res)]
-            ))
-        return np.asarray(loss)
-
-    def mles(self):
-        return np.vstack([x.x for x in self.res])
-
-    def fim_diags(self):
-        return np.vstack([x.hess_inv.diagonal() for x in self.res])
-
-    @property
-    def fisher_inv(self):
-        return np.stack([x.hess_inv for x in self.res])
-
-    def niter(self):
-        return np.array([x.nit for x in self.res])
-
-    def error_codes(self):
-        return np.array([x.status for x in self.res])
-
-    def return_batchglm_formated_model(self, nproc=1):
-        model = Estim_BFGS_Model(self, nproc)
-        return (model)
diff --git a/diffxpy/models/hessian.py b/diffxpy/models/hessian.py
deleted file mode 100644
index c629cdf..0000000
--- a/diffxpy/models/hessian.py
+++ /dev/null
@@ -1,256 +0,0 @@
-import numpy as np
-from scipy.special import polygamma
-import numpy.linalg
-
-
-def hes_nb_glm_mean_block(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-        i: int,
-        j: int
-):
-    """ Compute entry of hessian in mean model block for a given gene.
-
-    Sum the following across cells:
-    $h_{ij} = -x^{m_i}*x^{m_j}*mu*(x/disp)/(1+mu(disp))^2$
-    Make sure that only element wise operations happen here!
-    Do not simplify design matrix elements: they are only 0 or 1 for discrete
-    groups but continuous if space, time, pseudotime or spline basis covariates
-    are supplied!
-
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    :param i: int
-        Index of first dimension in fisher information matrix which is to be computed.
-    :param j: int
-        Index of second dimension in fisher information matrix which is to be computed
-    :return: float
-        Entry of fisher information matrix in mean model block at position (i,j)
-    """
-    
-    h_ij = -np.asarray(design_loc[:, i]) * np.asarray(design_loc[:, j]) * mu * x / disp / np.square(1 + mu / disp)
-    return np.sum(h_ij)
-
-
-def hes_nb_glm_disp_block(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-        i: int,
-        j: int
-):
-    """ Compute entry of hessian in dispersion model block for a given gene.
-
-    Sum the following across cells:
-    $$
-    h_{ij} =
-        disp * x^{m_i} * x^{m_j} * [psi_0(disp+x)
-        + psi_0(disp)
-        - mu/(disp+mu)^2 * (disp+x)
-        +(mu-disp) / (disp+mu)
-        + log(disp)
-        + 1 - log(disp+mu)]
-        + disp * psi_1(disp+x)
-        + disp * psi_1(disp)
-    $$
-    
-    Make sure that only element wise operations happen here!
-    Do not simplify design matrix elements: they are only 0 or 1 for discrete
-    groups but continuous if space, time, pseudotime or spline basis covariates
-    are supplied!
-    
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    :param i: int
-        Index of first dimension in fisher information matrix which is to be computed.
-    :param j: int
-        Index of second dimension in fisher information matrix which is to be computed
-
-    :return: float
-        Entry of fisher information matrix in dispersion model block at position (i,j)
-    """
-    h_ij = (
-            disp * np.asarray(design_loc[:, i]) * np.asarray(design_loc[:, j]) * polygamma(n=0, x=disp + x)
-            + polygamma(n=0, x=disp)
-            - mu / np.square(disp + mu) * (disp + x)
-            + (mu - disp) / (disp + mu)
-            + np.log(disp)
-            + 1 - np.log(disp + mu)
-            + disp * polygamma(n=1, x=disp + x)
-            + disp * polygamma(n=1, x=disp)
-    )
-    return np.sum(h_ij)
-
-
-def hes_nb_glm_meandisp_block(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-        i: int,
-        j: int
-):
-    """ Compute entry of hessian in mean-dispersion model block for a given gene.
-
-    Sum the following across cells:
-    Need to multiply by -1 here??????
-    $h_{ij} = mu*x^{m_i}*x^{m_j}*(x-mu)/(1+mu/disp)^2$
-    
-    Make sure that only element wise operations happen here!
-    Do not simplify design matrix elements: they are only 0 or 1 for discrete
-    groups but continuous if space, time, pseudotime or spline basis covariates
-    are supplied!
-    
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    :param i: int
-        Index of first dimension in fisher information matrix which is to be computed.
-    :param j: int
-        Index of second dimension in fisher information matrix which is to be computed
-
-    :return: float
-        Entry of fisher information matrix in mean-dispersion model block at position (i,j)
-    """
-    h_ij = disp * np.asarray(design_loc[:, i]) * np.asarray(design_loc[:, j]) * (x - mu) / np.square(1 + mu / disp)
-    return np.sum(h_ij)
-
-
-def hes_nb_glm_bygene(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-):
-    """ Compute hessian for a given gene.
-
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    
-    :return: np.ndarray (#parameters location model + #parameters shape model, #parameters location model + #parameters shape model)
-        Fisher information matrix.
-    """
-    n_par_loc = design_loc.shape[1]
-    n_par_scale = design_scale.shape[1]
-    n_par = n_par_loc + n_par_scale
-    hes = np.zeros([n_par, n_par])
-    # Add in elements by block:
-    # Mean model block:
-    for i in np.arange(0, n_par_loc):
-        for j in np.arange(i, n_par_loc):  # Block is on the diagonal and symmtric.
-            hes[i, j] = hes_nb_glm_mean_block(x=x, mu=mu, disp=disp, design_loc=design_loc, design_scale=design_scale,
-                                              i=i, j=j)
-            hes[j, i] = hes[i, j]
-    # Dispersion model block:
-    for i in np.arange(0, n_par_scale):
-        for j in np.arange(i, n_par_scale):  # Block is on the diagonal and symmtric.
-            hes[n_par_loc + i, n_par_loc + j] = hes_nb_glm_disp_block(x=x, mu=mu, disp=disp, design_loc=design_loc,
-                                                                      design_scale=design_scale, i=i, j=j)
-            hes[n_par_loc + j, n_par_loc + i] = hes[n_par_loc + i, n_par_loc + j]
-    # Mean-dispersion model block:
-    for i in np.arange(0, n_par_loc):
-        for j in np.arange(0, n_par_scale):  # Duplicate block across diagonal but block itself is not symmetric!
-            hes[i, n_par_loc + j] = hes_nb_glm_meandisp_block(x=x, mu=mu, disp=disp, design_loc=design_loc,
-                                                              design_scale=design_scale, i=i, j=j)
-            hes[n_par_loc + j, i] = hes[i, n_par_loc + j]
-    return (hes)
-
-
-def theta_covar_bygene(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-):
-    """ Compute model coefficient covariance matrix for a given gene.
-
-    Based on the hessian matrix via fisher information matrix (fim).
-    covar = inv(fim) = inv(-hess)
-
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix(cells, #parameters shape model)
-        Design matrix of shape model.
-    
-    :return: np.ndarray (#parameters location model + #parameters shape model, #parameters location model + #parameters shape model)
-        Model coefficient covariance matrix.
-    """
-    hes = hes_nb_glm_bygene(x=x, mu=mu, disp=disp, design_loc=design_loc, design_scale=design_scale)
-    return numpy.linalg.pinv(-hes)
-
-
-def theta_sd_bygene(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-):
-    """ Compute model coefficient standard deviation vector for a given gene.
-
-    Based on the hessian matrix via fisher information matrix (fim).
-    covar = inv(fim) = inv(-hess)
-    var = diagonal of covar
-
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param disp: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    
-    :return: np.ndarray (#parameters location model + #parameters shape model,)
-        Model coefficient standard deviation vector.
-    """
-    
-    hes = hes_nb_glm_bygene(x=x, mu=mu, disp=disp, design_loc=design_loc, design_scale=design_scale)
-    return np.sqrt(numpy.linalg.pinv(-hes).diagonal())
diff --git a/diffxpy/testing/det.py b/diffxpy/testing/det.py
index b12195f..cc727a0 100644
--- a/diffxpy/testing/det.py
+++ b/diffxpy/testing/det.py
@@ -15,7 +15,6 @@
 from .utils import split_x, dmat_unique
 from ..stats import stats
 from . import correction
-from diffxpy import pkg_constants
 
 logger = logging.getLogger("diffxpy")
 
@@ -822,7 +821,7 @@ def summary(
         :return: Summary table of differential expression test.
         """
         res = super().summary(**kwargs)
-        res["grad"] = self.model_gradient.data
+        res["grad"] = self.model_gradient
         if len(self.theta_mle.shape) == 1:
             res["coef_mle"] = self.theta_mle
         if len(self.theta_sd.shape) == 1:
@@ -945,7 +944,7 @@ def plot_comparison_ols_coef(
         import matplotlib.pyplot as plt
         from matplotlib import gridspec
         from matplotlib import rcParams
-        from batchglm.api.models.glm_norm import Estimator, InputDataGLM
+        from batchglm.api.models.tf1.glm_norm import Estimator, InputDataGLM
 
         plt.ioff()
 
@@ -1084,7 +1083,7 @@ def plot_comparison_ols_pred(
         import matplotlib.pyplot as plt
         from matplotlib import gridspec
         from matplotlib import rcParams
-        from batchglm.api.models.glm_norm import Estimator, InputDataGLM
+        from batchglm.api.models import Estimator, InputDataGLM
 
         plt.ioff()
 
diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 571bfed..260b7a6 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -12,7 +12,6 @@
 from typing import Union, List, Dict, Callable, Tuple
 
 from diffxpy import pkg_constants
-from diffxpy.models.batch_bfgs.optim import Estim_BFGS
 from .det import DifferentialExpressionTestLRT, DifferentialExpressionTestWald, \
     DifferentialExpressionTestTT, DifferentialExpressionTestRank, _DifferentialExpressionTestSingle, \
     DifferentialExpressionTestVsRest, _DifferentialExpressionTestMulti, DifferentialExpressionTestByPartition
@@ -20,7 +19,7 @@
 from .det_pair import DifferentialExpressionTestZTestLazy, DifferentialExpressionTestZTest, \
     DifferentialExpressionTestPairwiseStandard
 from .utils import parse_gene_names, parse_sample_description, parse_size_factors, parse_grouping, \
-    constraint_system_from_star, design_matrix, preview_coef_names
+    constraint_system_from_star, preview_coef_names
 
 
 def _fit(
@@ -38,6 +37,7 @@ def _fit(
         gene_names=None,
         size_factors=None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = None,
         close_session=True,
@@ -89,24 +89,9 @@ def _fit(
     :param size_factors: 1D array of transformed library size factors for each cell in the
         same order as in data
     :param batch_size: the batch size to use for the estimator
-    :param training_strategy: {str, function, list} training strategy to use. Can be:
+    :param training_strategy: {str} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
-        - function: Can be used to implement custom training function will be called as
-          `training_strategy(estimator)`.
-        - list of keyword dicts containing method arguments: Will call Estimator.train() once with each dict of
-          method arguments.
-
-          Example:
-
-          .. code-block:: python
-
-              [
-                {"learning_rate": 0.5, },
-                {"learning_rate": 0.05, },
-              ]
-
-          This will run training first with learning rate = 0.5 and then with learning rate = 0.05.
     :param quick_scale: Depending on the optimizer, `scale` will be fitted faster and maybe less accurate.
 
         Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
@@ -128,64 +113,57 @@ def _fit(
         "irls_gd_tr": pkg_constants.BATCHGLM_OPTIM_IRLS_GD_TR
     }
 
-    if isinstance(training_strategy, str) and training_strategy.lower() == 'bfgs':
-        assert False, "depreceated"
-        lib_size = np.zeros(data.shape[0])
+    if backend.lower() in ["tf1"]:
         if noise_model == "nb" or noise_model == "negative_binomial":
-            estim = Estim_BFGS(X=data, design_loc=design_loc, design_scale=design_scale,
-                               lib_size=lib_size, batch_size=batch_size, feature_names=gene_names)
-            estim.run(nproc=3, maxiter=10000, debug=False)
-            model = estim.return_batchglm_formated_model()
+            from batchglm.api.models.tf1.glm_nb import Estimator, InputDataGLM
+        elif noise_model == "norm" or noise_model == "normal":
+            from batchglm.api.models.tf1.glm_norm import Estimator, InputDataGLM
         else:
-            raise ValueError('base.test(): `noise_model="%s"` not recognized.' % noise_model)
-    else:
+            raise ValueError('noise_model="%s" not recognized.' % noise_model)
+    elif backend.lower() in ["numpy"]:
+        if isinstance(training_strategy, str):
+            if training_strategy.lower() == "auto":
+                training_strategy = "DEFAULT"
         if noise_model == "nb" or noise_model == "negative_binomial":
-            from batchglm.api.models.glm_nb import Estimator, InputDataGLM
-        elif noise_model == "norm" or noise_model == "normal":
-            from batchglm.api.models.glm_norm import Estimator, InputDataGLM
+            from batchglm.api.models.numpy.glm_nb import Estimator, InputDataGLM
         else:
-            raise ValueError('base.test(): `noise_model="%s"` not recognized.' % noise_model)
-
-        input_data = InputDataGLM(
-            data=data,
-            design_loc=design_loc,
-            design_scale=design_scale,
-            design_loc_names=design_loc_names,
-            design_scale_names=design_scale_names,
-            constraints_loc=constraints_loc,
-            constraints_scale=constraints_scale,
-            size_factors=size_factors,
-            feature_names=gene_names,
-        )
+            raise ValueError('noise_model="%s" not recognized.' % noise_model)
+    else:
+        raise ValueError('models="%s" not recognized.' % backend)
 
-        constructor_args = {}
-        if batch_size is not None:
-            constructor_args["batch_size"] = batch_size
-        if quick_scale is not None:
-            constructor_args["quick_scale"] = quick_scale
-        estim = Estimator(
-            input_data=input_data,
-            init_model=init_model,
-            init_a=init_a,
-            init_b=init_b,
-            provide_optimizers=provide_optimizers,
-            provide_batched=pkg_constants.BATCHGLM_PROVIDE_BATCHED,
-            provide_fim=pkg_constants.BATCHGLM_PROVIDE_FIM,
-            provide_hessian=pkg_constants.BATCHGLM_PROVIDE_HESSIAN,
-            dtype=dtype,
-            **constructor_args
-        )
-        estim.initialize()
+    input_data = InputDataGLM(
+        data=data,
+        design_loc=design_loc,
+        design_scale=design_scale,
+        design_loc_names=design_loc_names,
+        design_scale_names=design_scale_names,
+        constraints_loc=constraints_loc,
+        constraints_scale=constraints_scale,
+        size_factors=size_factors,
+        feature_names=gene_names,
+    )
 
-        # Training:
-        if callable(training_strategy):
-            # call training_strategy if it is a function
-            training_strategy(estim)
-        else:
-            estim.train_sequence(training_strategy=training_strategy)
+    constructor_args = {}
+    if batch_size is not None:
+        constructor_args["batch_size"] = batch_size
+    if quick_scale is not None:
+        constructor_args["quick_scale"] = quick_scale
+    estim = Estimator(
+        input_data=input_data,
+        init_a=init_a,
+        init_b=init_b,
+        provide_optimizers=provide_optimizers,
+        provide_batched=pkg_constants.BATCHGLM_PROVIDE_BATCHED,
+        provide_fim=pkg_constants.BATCHGLM_PROVIDE_FIM,
+        provide_hessian=pkg_constants.BATCHGLM_PROVIDE_HESSIAN,
+        dtype=dtype,
+        **constructor_args
+    )
+    estim.initialize()
+    estim.train_sequence(training_strategy=training_strategy)
 
-        if close_session:
-            estim.finalize()
+    if close_session:
+        estim.finalize()
 
     return estim
 
diff --git a/diffxpy/unit_test/test_constrained.py b/diffxpy/unit_test/test_constrained.py
index 0acdb1c..545bead 100644
--- a/diffxpy/unit_test/test_constrained.py
+++ b/diffxpy/unit_test/test_constrained.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import scipy.stats as stats
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_continuous_de.py b/diffxpy/unit_test/test_continuous_de.py
index 023d3cc..7a26654 100644
--- a/diffxpy/unit_test/test_continuous_de.py
+++ b/diffxpy/unit_test/test_continuous_de.py
@@ -15,11 +15,11 @@ def _test_wald_de(
             ngenes: int
     ):
         if self.noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(2, 5, shape)
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif self.noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
diff --git a/diffxpy/unit_test/test_continuous_null.py b/diffxpy/unit_test/test_continuous_null.py
index c3c82fe..d84c829 100644
--- a/diffxpy/unit_test/test_continuous_null.py
+++ b/diffxpy/unit_test/test_continuous_null.py
@@ -5,7 +5,7 @@
 import scipy.stats as stats
 import logging
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_correction.py b/diffxpy/unit_test/test_correction.py
index cbd602e..9a99f6e 100644
--- a/diffxpy/unit_test/test_correction.py
+++ b/diffxpy/unit_test/test_correction.py
@@ -1,12 +1,4 @@
 import unittest
-import numpy as np
-import pandas as pd
-import scipy.stats as stats
-import logging
-
-from batchglm.api.models.glm_nb import Simulator, Estimator, InputDataGLM
-import diffxpy.api as de
-
 
 
 class TestCorrection(unittest.TestCase):
diff --git a/diffxpy/unit_test/test_data_types.py b/diffxpy/unit_test/test_data_types.py
index f4874bb..b1f911f 100644
--- a/diffxpy/unit_test/test_data_types.py
+++ b/diffxpy/unit_test/test_data_types.py
@@ -6,7 +6,7 @@
 import scipy.sparse
 import anndata
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_enrich.py b/diffxpy/unit_test/test_enrich.py
index 8ed229f..a88e09f 100644
--- a/diffxpy/unit_test/test_enrich.py
+++ b/diffxpy/unit_test/test_enrich.py
@@ -1,10 +1,7 @@
 import unittest
 import logging
-import numpy as np
-import pandas as pd
-import scipy.stats as stats
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_extreme_values.py b/diffxpy/unit_test/test_extreme_values.py
index c430784..69498da 100644
--- a/diffxpy/unit_test/test_extreme_values.py
+++ b/diffxpy/unit_test/test_extreme_values.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_fit.py b/diffxpy/unit_test/test_fit.py
index 9a27b7d..eb46e18 100644
--- a/diffxpy/unit_test/test_fit.py
+++ b/diffxpy/unit_test/test_fit.py
@@ -25,10 +25,10 @@ def _test_model_fit(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
@@ -68,10 +68,10 @@ def _test_model_fit_partition(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
@@ -114,9 +114,9 @@ def _test_residuals_fit(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
 
diff --git a/diffxpy/unit_test/test_numeric_covar.py b/diffxpy/unit_test/test_numeric_covar.py
index 2c3bedd..1fd05fd 100644
--- a/diffxpy/unit_test/test_numeric_covar.py
+++ b/diffxpy/unit_test/test_numeric_covar.py
@@ -3,7 +3,7 @@
 import numpy as np
 import logging
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_pairwise_null.py b/diffxpy/unit_test/test_pairwise_null.py
index dd88008..02154ea 100644
--- a/diffxpy/unit_test/test_pairwise_null.py
+++ b/diffxpy/unit_test/test_pairwise_null.py
@@ -18,11 +18,11 @@ def _prepate_data(
             n_groups: int
     ):
         if self.noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(0.1, 1, shape)
             rand_fn_scale = lambda shape: np.random.uniform(0.5, 1, shape)
         elif self.noise_model == "norm" or self.noise_model is None:
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
diff --git a/diffxpy/unit_test/test_partition.py b/diffxpy/unit_test/test_partition.py
index 7f83374..c041a04 100644
--- a/diffxpy/unit_test/test_partition.py
+++ b/diffxpy/unit_test/test_partition.py
@@ -4,7 +4,7 @@
 import pandas as pd
 import scipy.stats as stats
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_single_de.py b/diffxpy/unit_test/test_single_de.py
index 8edf03f..6d402f5 100644
--- a/diffxpy/unit_test/test_single_de.py
+++ b/diffxpy/unit_test/test_single_de.py
@@ -20,11 +20,11 @@ def _prepare_data(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(5, 10, shape)
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
@@ -58,8 +58,8 @@ def _eval(self, sim, test):
             'fraction of DE genes with q-value < 0.05: %.1f%%' %
             float(100 * frac_de_of_de)
         )
-        assert frac_de_of_non_de <= 0.1, "too many false-positives"
-        assert frac_de_of_de >= 0.5, "too many false-negatives"
+        assert frac_de_of_non_de <= 0.1, "too many false-positives %f" % frac_de_of_non_de
+        assert frac_de_of_de >= 0.5, "too many false-negatives %f" % frac_de_of_de
 
         return sim
 
diff --git a/diffxpy/unit_test/test_single_external_libs.py b/diffxpy/unit_test/test_single_external_libs.py
index b1d4883..f6832ec 100644
--- a/diffxpy/unit_test/test_single_external_libs.py
+++ b/diffxpy/unit_test/test_single_external_libs.py
@@ -3,7 +3,7 @@
 import numpy as np
 import scipy.stats as stats
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_single_fullrank.py b/diffxpy/unit_test/test_single_fullrank.py
index 785d3d7..cf99518 100644
--- a/diffxpy/unit_test/test_single_fullrank.py
+++ b/diffxpy/unit_test/test_single_fullrank.py
@@ -2,7 +2,6 @@
 import logging
 import numpy as np
 import pandas as pd
-import scipy.stats as stats
 
 import diffxpy.api as de
 
@@ -21,10 +20,10 @@ def _test_single_full_rank(self):
         :param noise_model: Noise model to use for data fitting.
         """
         if self.noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif self.noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % self.noise_model)
diff --git a/diffxpy/unit_test/test_single_null.py b/diffxpy/unit_test/test_single_null.py
index 6c1b801..7a3ec56 100644
--- a/diffxpy/unit_test/test_single_null.py
+++ b/diffxpy/unit_test/test_single_null.py
@@ -26,10 +26,10 @@ def _test_null_distribution_wald(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
@@ -78,9 +78,9 @@ def _test_null_distribution_wald_multi(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
 
@@ -126,9 +126,9 @@ def _test_null_distribution_lrt(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
 
@@ -173,7 +173,7 @@ def _test_null_distribution_ttest(
         :param n_cells: Number of cells to simulate (number of observations per test).
         :param n_genes: Number of genes to simulate (number of tests).
         """
-        from batchglm.api.models.glm_norm import Simulator
+        from batchglm.api.models.tf1.glm_norm import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -213,7 +213,7 @@ def _test_null_distribution_rank(
         :param n_cells: Number of cells to simulate (number of observations per test).
         :param n_genes: Number of genes to simulate (number of tests).
         """
-        from batchglm.api.models.glm_norm import Simulator
+        from batchglm.api.models.tf1.glm_norm import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
diff --git a/diffxpy/unit_test/test_single_sf_null.py b/diffxpy/unit_test/test_single_sf_null.py
index ab97115..625379e 100644
--- a/diffxpy/unit_test/test_single_sf_null.py
+++ b/diffxpy/unit_test/test_single_sf_null.py
@@ -26,10 +26,10 @@ def _test_null_distribution_wald(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
diff --git a/diffxpy/unit_test/test_vsrest.py b/diffxpy/unit_test/test_vsrest.py
index 9a78505..cb3b074 100644
--- a/diffxpy/unit_test/test_vsrest.py
+++ b/diffxpy/unit_test/test_vsrest.py
@@ -22,7 +22,7 @@ def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 100, n
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
-        from batchglm.api.models.glm_nb import Simulator
+        from batchglm.api.models.tf1.glm_nb import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -65,7 +65,7 @@ def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.ERROR)
-        from batchglm.api.models.glm_nb import Simulator
+        from batchglm.api.models.tf1.glm_nb import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -108,7 +108,7 @@ def test_null_distribution_rank(self, n_cells: int = 2000, n_genes: int = 100, n
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
-        from batchglm.api.models.glm_nb import Simulator
+        from batchglm.api.models.tf1.glm_nb import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -148,7 +148,7 @@ def test_null_distribution_ttest(self, n_cells: int = 2000, n_genes: int = 100,
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
-        from batchglm.api.models.glm_norm import Simulator
+        from batchglm.api.models import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)

From e3e7ef3927d575cb8cf15b388838d9155ea622df Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 30 Oct 2019 22:51:36 -0700
Subject: [PATCH 22/25] added backend choice in all diffxpy tests

---
 diffxpy/testing/tests.py      | 84 ++++++++++++++++++++++++++++++++++-
 diffxpy/unit_test/test_fit.py |  6 +--
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 260b7a6..b64453d 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -42,7 +42,7 @@ def _fit(
         quick_scale: bool = None,
         close_session=True,
         dtype="float64"
-) -> glm.typing.InputDataBase:
+):
     """
     :param noise_model: str, noise model to use in model-based unit_test. Possible options:
 
@@ -89,6 +89,12 @@ def _fit(
     :param size_factors: 1D array of transformed library size factors for each cell in the
         same order as in data
     :param batch_size: the batch size to use for the estimator
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -182,6 +188,7 @@ def lrt(
         noise_model="nb",
         size_factors: Union[np.ndarray, pd.core.series.Series, np.ndarray] = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = False,
         dtype="float64",
@@ -239,6 +246,12 @@ def lrt(
         same order as in data or string-type column identifier of size-factor containing
         column in sample description.
     :param batch_size: the batch size to use for the estimator
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -317,6 +330,7 @@ def lrt(
         gene_names=gene_names,
         size_factors=size_factors,
         batch_size=batch_size,
+        backend=backend,
         training_strategy=training_strategy,
         quick_scale=quick_scale,
         dtype=dtype,
@@ -335,6 +349,7 @@ def lrt(
         init_model=reduced_model,
         size_factors=size_factors,
         batch_size=batch_size,
+        backend=backend,
         training_strategy=training_strategy,
         quick_scale=quick_scale,
         dtype=dtype,
@@ -370,6 +385,7 @@ def wald(
         noise_model: str = "nb",
         size_factors: Union[np.ndarray, pd.core.series.Series, str] = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = False,
         dtype="float64",
@@ -496,6 +512,12 @@ def wald(
 
         - 'nb': default
     :param batch_size: The batch size to use for the estimator.
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -625,6 +647,7 @@ def wald(
         gene_names=gene_names,
         size_factors=size_factors,
         batch_size=batch_size,
+        backend=backend,
         training_strategy=training_strategy,
         quick_scale=quick_scale,
         dtype=dtype,
@@ -735,6 +758,7 @@ def two_sample(
         noise_model: str = None,
         size_factors: np.ndarray = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         is_sig_zerovar: bool = True,
         quick_scale: bool = None,
@@ -798,6 +822,12 @@ def two_sample(
 
         - 'nb': default
     :param batch_size: The batch size to use for the estimator.
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -849,6 +879,7 @@ def two_sample(
             init_a="closed_form",
             init_b="closed_form",
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -875,6 +906,7 @@ def two_sample(
             init_a="closed_form",
             init_b="closed_form",
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -911,6 +943,7 @@ def pairwise(
         noise_model: str = "nb",
         size_factors: np.ndarray = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         is_sig_zerovar: bool = True,
         quick_scale: bool = False,
@@ -986,6 +1019,12 @@ def pairwise(
 
         - 'nb': default
     :param batch_size: The batch size to use for the estimator.
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1039,6 +1078,7 @@ def pairwise(
             init_a="closed_form",
             init_b="closed_form",
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -1093,6 +1133,7 @@ def pairwise(
                     noise_model=noise_model,
                     size_factors=size_factors[idx] if size_factors is not None else None,
                     batch_size=batch_size,
+                    backend=backend,
                     training_strategy=training_strategy,
                     quick_scale=quick_scale,
                     is_sig_zerovar=is_sig_zerovar,
@@ -1130,6 +1171,7 @@ def versus_rest(
         noise_model: str = None,
         size_factors: np.ndarray = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         is_sig_zerovar: bool = True,
         quick_scale: bool = None,
@@ -1204,6 +1246,12 @@ def versus_rest(
 
         - 'nb': default
     :param batch_size: The batch size to use for the estimator.
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1257,6 +1305,7 @@ def versus_rest(
             sample_description=sample_description,
             noise_model=noise_model,
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             size_factors=size_factors,
@@ -1363,6 +1412,7 @@ def two_sample(
             size_factors: np.ndarray = None,
             noise_model: str = None,
             batch_size: int = None,
+            backend: str = "numpy",
             training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
             is_sig_zerovar: bool = True,
             **kwargs
@@ -1393,6 +1443,12 @@ def two_sample(
 
             - 'nb': default
         :param batch_size: The batch size to use for the estimator.
+        :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+            training strategies. Available are:
+
+            - "numpy" numpy
+            - "tf1" tensorflow1.* >= 1.13
+            - "tf2" tensorflow2.*
         :param training_strategy: {str, function, list} training strategy to use. Can be:
 
             - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1417,6 +1473,7 @@ def two_sample(
                 noise_model=noise_model,
                 size_factors=size_factors[idx] if size_factors is not None else None,
                 batch_size=batch_size,
+                backend=backend,
                 training_strategy=training_strategy,
                 is_sig_zerovar=is_sig_zerovar,
                 **kwargs
@@ -1515,6 +1572,7 @@ def lrt(
             size_factors: np.ndarray = None,
             noise_model="nb",
             batch_size: int = None,
+            backend: str = "numpy",
             training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
             **kwargs
     ):
@@ -1567,6 +1625,12 @@ def lrt(
 
             - 'nb': default
         :param batch_size: The batch size to use for the estimator.
+        :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+            training strategies. Available are:
+
+            - "numpy" numpy
+            - "tf1" tensorflow1.* >= 1.13
+            - "tf2" tensorflow2.*
         :param training_strategy: {str, function, list} training strategy to use. Can be:
 
             - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1592,6 +1656,7 @@ def lrt(
                 noise_model=noise_model,
                 size_factors=size_factors[idx] if size_factors is not None else None,
                 batch_size=batch_size,
+                backend=backend,
                 training_strategy=training_strategy,
                 **kwargs
             ))
@@ -1613,6 +1678,7 @@ def wald(
             noise_model: str = "nb",
             size_factors: np.ndarray = None,
             batch_size: int = None,
+            backend: str = "numpy",
             training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
             **kwargs
     ):
@@ -1667,6 +1733,12 @@ def wald(
 
             - 'nb': default
         :param batch_size: The batch size to use for the estimator.
+            :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+            training strategies. Available are:
+
+            - "numpy" numpy
+            - "tf1" tensorflow1.* >= 1.13
+            - "tf2" tensorflow2.*
         :param training_strategy: {str, function, list} training strategy to use. Can be:
 
             - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1692,6 +1764,7 @@ def wald(
                 noise_model=noise_model,
                 size_factors=size_factors[idx] if size_factors is not None else None,
                 batch_size=batch_size,
+                backend=backend,
                 training_strategy=training_strategy,
                 **kwargs
             ))
@@ -1721,6 +1794,7 @@ def continuous_1d(
         noise_model: str = 'nb',
         size_factors: Union[np.ndarray, pd.core.series.Series, str] = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = None,
         dtype="float64",
@@ -1858,6 +1932,12 @@ def continuous_1d(
     :param size_factors: 1D array of transformed library size factors for each cell in the
         same order as in data
     :param batch_size: the batch size to use for the estimator
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -2018,6 +2098,7 @@ def continuous_1d(
             noise_model=noise_model,
             size_factors=size_factors,
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -2070,6 +2151,7 @@ def continuous_1d(
             noise_model=noise_model,
             size_factors=size_factors,
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
diff --git a/diffxpy/unit_test/test_fit.py b/diffxpy/unit_test/test_fit.py
index eb46e18..27cb421 100644
--- a/diffxpy/unit_test/test_fit.py
+++ b/diffxpy/unit_test/test_fit.py
@@ -28,7 +28,7 @@ def _test_model_fit(
             from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
@@ -71,7 +71,7 @@ def _test_model_fit_partition(
             from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
@@ -116,7 +116,7 @@ def _test_residuals_fit(
         if noise_model == "nb":
             from batchglm.api.models.tf1.glm_nb import Simulator
         elif noise_model == "norm":
-            from batchglm.api.models import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
 

From ffbd58d1d80b32f312d2c7b8bf8374cbdb2ca609 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 30 Oct 2019 22:59:08 -0700
Subject: [PATCH 23/25] corrected formula documentation of lrt()

---
 diffxpy/testing/tests.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index b64453d..f65f429 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -203,16 +203,12 @@ def lrt(
     :param data: Input data matrix (observations x features) or (cells x genes).
     :param full_formula_loc: formula
         Full model formula for location parameter model.
-        If not specified, `full_formula` will be used instead.
     :param reduced_formula_loc: formula
         Reduced model formula for location and scale parameter models.
-        If not specified, `reduced_formula` will be used instead.
     :param full_formula_scale: formula
         Full model formula for scale parameter model.
-        If not specified, `reduced_formula_scale` will be used instead.
     :param reduced_formula_scale: formula
         Reduced model formula for scale parameter model.
-        If not specified, `reduced_formula` will be used instead.
     :param as_numeric:
         Which columns of sample_description to treat as numeric and
         not as categorical. This yields columns in the design matrix

From 0aa7defe16dc4ad5e0d322f753042e80fe15f26c Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 30 Oct 2019 22:59:52 -0700
Subject: [PATCH 24/25] fixed formula documentation of wald()

---
 diffxpy/testing/tests.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index f65f429..a8ac7c8 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -401,10 +401,8 @@ def wald(
         the exact coefficients which are to be tested.
     :param formula_loc: formula
         model formula for location and scale parameter models.
-        If not specified, `formula` will be used instead.
     :param formula_scale: formula
         model formula for scale parameter model.
-        If not specified, `formula` will be used instead.
     :param as_numeric:
         Which columns of sample_description to treat as numeric and
         not as categorical. This yields columns in the design matrix

From f60e68c9ee369300d2064fdecbb04b96289f6315 Mon Sep 17 00:00:00 2001
From: "david.seb.fischer" <david.seb.fischer@gmail.com>
Date: Wed, 30 Oct 2019 23:06:14 -0700
Subject: [PATCH 25/25] updated batchglm requirements

---
 docs/requires.txt | 2 +-
 requirements.txt  | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/requires.txt b/docs/requires.txt
index 27cbd57..1592d45 100644
--- a/docs/requires.txt
+++ b/docs/requires.txt
@@ -2,7 +2,7 @@ numpy>=1.14.0
 scipy
 pandas
 patsy>=0.5.0
-batchglm>=0.6.8
+batchglm>=0.7.1
 statsmodels
 anndata
 seaborn
diff --git a/requirements.txt b/requirements.txt
index 27cbd57..1592d45 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy>=1.14.0
 scipy
 pandas
 patsy>=0.5.0
-batchglm>=0.6.8
+batchglm>=0.7.1
 statsmodels
 anndata
 seaborn
diff --git a/setup.py b/setup.py
index ea3775b..99ba194 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
         'scipy>=1.2.1',
         'pandas',
         'patsy>=0.5.0',
-        'batchglm>=0.6.8',
+        'batchglm>=0.7.1',
         'statsmodels',
     ],
     extras_require={