Merge branch 'main' into all-contributors/add-kshedden

RaphaelS1 · web-flow · commit 82ec8198a881 · 2022-10-27T21:52:54.000+01:00
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -36,6 +36,14 @@
       "contributions": [
         "doc",
         "code"
+    },
+    {
+      "login": "mschauer",
+      "name": "Moritz Schauer",
+      "avatar_url": "https://avatars.githubusercontent.com/u/1923437?v=4",
+      "profile": "http://www.math.chalmers.se/~smoritz/index.html",
+      "contributions": [
+        "review"
       ]
     }
   ],
diff --git a/README.md b/README.md
@@ -48,7 +48,7 @@ Survival analysis interface in Julia, still very experimental. Tries to build on
 ## Contributors
 
 <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
-[![All Contributors](https://img.shields.io/badge/all_contributors-2-orange.svg?style=flat-square)](#contributors-)
+[![All Contributors](https://img.shields.io/badge/all_contributors-3-orange.svg?style=flat-square)](#contributors-)
 <!-- ALL-CONTRIBUTORS-BADGE:END -->
 
 <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
@@ -59,6 +59,7 @@ Survival analysis interface in Julia, still very experimental. Tries to build on
     <tr>
       <td align="center"><a href="http://www.raphaelsonabend.co.uk"><img src="https://avatars.githubusercontent.com/u/25639974?v=4?s=100" width="100px;" alt="Raphael Sonabend"/><br /><sub><b>Raphael Sonabend</b></sub></a><br /><a href="https://github.com/RaphaelS1/SurvivalAnalysis.jl/issues?q=author%3ARaphaelS1" title="Bug reports">🐛</a> <a href="https://github.com/RaphaelS1/SurvivalAnalysis.jl/commits?author=RaphaelS1" title="Code">💻</a> <a href="#content-RaphaelS1" title="Content">🖋</a> <a href="https://github.com/RaphaelS1/SurvivalAnalysis.jl/commits?author=RaphaelS1" title="Documentation">📖</a> <a href="#design-RaphaelS1" title="Design">🎨</a> <a href="#example-RaphaelS1" title="Examples">💡</a> <a href="#ideas-RaphaelS1" title="Ideas, Planning, & Feedback">🤔</a> <a href="#maintenance-RaphaelS1" title="Maintenance">🚧</a> <a href="#projectManagement-RaphaelS1" title="Project Management">📆</a> <a href="#question-RaphaelS1" title="Answering Questions">💬</a> <a href="#research-RaphaelS1" title="Research">🔬</a> <a href="https://github.com/RaphaelS1/SurvivalAnalysis.jl/pulls?q=is%3Apr+reviewed-by%3ARaphaelS1" title="Reviewed Pull Requests">👀</a> <a href="https://github.com/RaphaelS1/SurvivalAnalysis.jl/commits?author=RaphaelS1" title="Tests">⚠️</a> <a href="#tutorial-RaphaelS1" title="Tutorials">✅</a></td>
       <td align="center"><a href="https://github.com/kshedden"><img src="https://avatars.githubusercontent.com/u/2666691?v=4?s=100" width="100px;" alt="Kerby Shedden"/><br /><sub><b>Kerby Shedden</b></sub></a><br /><a href="https://github.com/RaphaelS1/SurvivalAnalysis.jl/commits?author=kshedden" title="Documentation">📖</a> <a href="https://github.com/RaphaelS1/SurvivalAnalysis.jl/commits?author=kshedden" title="Code">💻</a></td>
+      <td align="center"><a href="http://www.math.chalmers.se/~smoritz/index.html"><img src="https://avatars.githubusercontent.com/u/1923437?v=4?s=100" width="100px;" alt="Moritz Schauer"/><br /><sub><b>Moritz Schauer</b></sub></a><br /><a href="https://github.com/RaphaelS1/SurvivalAnalysis.jl/pulls?q=is%3Apr+reviewed-by%3Amschauer" title="Reviewed Pull Requests">👀</a></td>
     </tr>
   </tbody>
 </table>
diff --git a/src/Surv.jl b/src/Surv.jl
@@ -363,6 +363,59 @@ function _tabulate_surv(T, Δ)
             noutcomes = noutcomes)
 end
 
+"""
+    expandstats(stats, times)
+
+Insert new times into 'stats' (a named tuple compatible with the 'stats' field of 'RCSurv').
+
+Zero observation/event counts are used for the newly-inserted times.  Time values in 'times' that are already in 'stats.time' are ignored.
+
+# Examples
+```jldoctest
+julia> srv = Surv([1, 4], [false, true], :r);
+
+julia> srv.stats
+(time = [1.0, 4.0], nrisk = [2, 1], ncens = [1, 0], nevents = [0, 1], noutcomes = [1, 1])
+
+julia> SurvivalAnalysis.expandstats(srv.stats, [3, 4, 5])
+(time = [1.0, 3.0, 4.0, 5.0], nrisk = [2.0, 1.0, 1.0, 0.0], ncens = [1.0, 0.0, 0.0, 0.0], nevents = [0.0, 0.0, 1.0, 0.0], noutcomes = [1.0, 0.0, 1.0, 0.0])
+```
+"""
+function expandstats(stats, times)
+
+    ut = unique(vcat(stats.time, times))
+    sort!(ut)
+    n = length(ut)
+    nrisk = zeros(n)
+    ncens = zeros(n)
+    nevents = zeros(n)
+    noutcomes = zeros(n)
+
+    for (i,t) in enumerate(ut)
+        j = searchsortedfirst(stats.time, t)
+        if j <= length(stats.time) && stats.time[j] == t
+            nrisk[i] = stats.nrisk[j]
+            ncens[i] = stats.ncens[j]
+            nevents[i] = stats.nevents[j]
+            noutcomes[i] = stats.noutcomes[j]
+        end
+    end
+
+    nr = last(stats.nrisk)
+    for i in reverse(eachindex(nrisk))
+        if ut[i] > last(stats.time)
+            continue
+        elseif nrisk[i] == 0
+            nrisk[i] = nr
+        else
+            nr = nrisk[i]
+        end
+    end
+
+    return (time = ut, nrisk = nrisk, ncens = ncens, nevents = nevents,
+            noutcomes = noutcomes)
+end
+
 """
     merge(A::OneSidedSurv...)
     merge(A::TwoSidedSurv...)
diff --git a/src/SurvivalAnalysis.jl b/src/SurvivalAnalysis.jl
@@ -1,8 +1,8 @@
 module SurvivalAnalysis
 
-    using DataFrames: DataFrame
+    using DataFrames: DataFrame, groupby
     using Distributions
-    using LinearAlgebra: diag
+    using LinearAlgebra: diag, pinv
     using NLSolversBase: hessian!
     using Optim
     using RecipesBase
@@ -26,6 +26,7 @@ module SurvivalAnalysis
     export SurvivalPrediction
     export SurvivalMeasure, concordance, ConcordanceWeights
     export SurvivalTimeMeasure, MSE, RMSE, MAE
+    export logrank
 
     ## undocumented
     # reexports
diff --git a/src/SurvivalEstimator.jl b/src/SurvivalEstimator.jl
@@ -338,6 +338,165 @@ function StatsBase.fit!(obj::KaplanMeier, Y::RCSurv)
     )
 end
 
+#-------------------
+# Log-rank tests
+#-------------------
+function _update_wt(wt, nevents, nrisk, wtmethod)
+    if wtmethod == :logrank
+        return 1.0
+    elseif wtmethod == :wilcoxon
+        return nrisk
+    elseif wtmethod == :tw
+       return sqrt(nrisk)
+    elseif wtmethod == :peto
+        return wt * (1 - nevents / (nrisk + 1))
+    else
+        error("wtmethod must be one of logrank, wilcoxon, tw, or peto")
+    end
+end
+
+# Helper function to calculate the score vector u and covariance matrix V for a logrank test.
+function logrank_moments(Y::RCSurv...; wtmethod::Symbol=:logrank)
+    m = length(Y)
+    A = merge(Y...)
+    ti = unique_outcome_times(A)
+    sta = A.stats
+    st = [expandstats(y.stats, ti) for y in Y]
+
+    u = zeros(m)
+    V = zeros(m, m)
+    wt = 1.0
+    for i in eachindex(ti)
+        d, n = sta.nevents[i], sta.nrisk[i]
+        wt = _update_wt(wt, d, n, wtmethod)
+        for j in 1:m
+            dd, nnj = st[j].nevents[i], st[j].nrisk[i]
+            rj = dd / nnj
+            fj = nnj / n
+            u[j] += wt * (dd - d*fj)
+            for k in 1:m
+                nnk = st[k].nrisk[i]
+                fk = nnk / n
+                q = j == k ? 1.0 : 0.0
+                if n > 1
+                    V[j,k] += wt^2 * (q - fj) * fk * d * (n - d) / (n - 1)
+                end
+            end
+        end
+    end
+
+    return u, V
+end
+
+"""
+    logrank(Y::RCSurv...; wtmethod=:logrank)
+    logrank(time, status, group, strata=zeros(0); wtmethod=:logrank)
+
+Test the null hypothesis that two or more survival functions are identical.
+
+When providing `time` and `status` as vectors, the `status` argument is coded 0/1 corresponding to censoring (0) and event (1).
+
+The `strata` argument is optional and contains labels defining strata for a stratified test.
+
+`wtmethod` selects one of four different weighting methods: logrank (uniform weighting), Wilcoxon (weight by number at risk), Tarone-Ware (weight by the square root of the number at risk), Peto-Peto (weight by the estimated marginal survival function).
+
+# Examples
+```jldoctest
+julia> srv1 = Surv([1, 3, 4], [false, true, true], :r);
+
+julia> srv2 = Surv([4, 5, 6], [true, true, false], :r);
+
+julia> pr = x -> (stat=round(x.stat; sigdigits=4), dof=x.dof, pvalue=round(x.pvalue; sigdigits=4));
+
+julia> r = logrank(srv1, srv2; wtmethod=:wilcoxon);
+
+julia> pr(r)
+(stat = 2.5, dof = 1, pvalue = 0.1138)
+
+julia> r = logrank([1, 3, 4, 4, 5, 6], [false, true, true, true, true, false], [1, 1, 1, 2, 2, 2]; wtmethod=:wilcoxon);
+
+julia> pr(r)
+(stat = 2.5, dof = 1, pvalue = 0.1138)
+
+julia> r = logrank([1, 3, 4, 4, 5, 6], [false, true, true, true, true, false], [1, 1, 1, 2, 2, 2], [1, 1, 2, 1, 2, 2]; wtmethod=:wilcoxon);
+
+julia> pr(r)
+(stat = 3.0, dof = 1, pvalue = 0.08326)
+```
+"""
+function logrank(Y::RCSurv...; wtmethod=:logrank)
+
+    length(Y) > 1 || throw(ArgumentError("logrank requires two or more groups"))
+
+    u, V = logrank_moments(Y...; wtmethod=wtmethod)
+
+    # Chi-square statistic
+    csq = u' * pinv(V) * u
+
+    # Degrees of freedom
+    dof = length(Y) - 1
+
+    # P-value
+    p = 1 - cdf(Chisq(dof), csq)
+
+    return (stat=csq, dof=dof, pvalue=p)
+end
+
+# Returns a list of Surv values, each of which contains the survival data for one group.
+# Also returns a vector containing the group labels
+function _build_surv(time, status, group)
+    da = DataFrame(time=time, status=status, group=group)
+    Y = Surv[]
+    grp = []
+    for dz in groupby(da, :group)
+        push!(Y, Surv(dz[:, :time], dz[:, :status], :r))
+        push!(grp, first(dz[:, :group]))
+    end
+    return Y, grp
+end
+
+function logrank(time, status, group, strata=zeros(0); wtmethod=:logrank)
+
+    length(time) == length(status) == length(group) || throw(ArgumentError("time, status, and group must have the same length"))
+    length(strata) in [0, length(time)] || throw(ArgumentError("If provided, strata must have the same length as time, status, and group"))
+
+    # Unstratified test
+    if length(strata) == 0
+        Y, _ = _build_surv(time, status, group)
+        return logrank(Y...; wtmethod=wtmethod)
+    end
+
+    # Dictionary mapping group labels to integer positions 1, 2, ...
+    gpix = Dict{eltype(group),Int}()
+    for g in sort(unique(group))
+        gpix[g] = length(gpix) + 1
+    end
+
+    # Stratified test
+    da = DataFrame(time=time, status=status, group=group, strata=strata)
+    m = length(gpix)
+    u = zeros(m)
+    V = zeros(m, m)
+    for dx in groupby(da, :strata)
+        Y, grp = _build_surv(dx[:, :time], dx[:, :status], dx[:, :group])
+        u0, V0 = logrank_moments(Y...; wtmethod=wtmethod)
+        ii = [gpix[g] for g in grp]
+        u[ii] .+= u0
+        V[ii, ii] .+= V0
+    end
+
+    # Chi-square statistic
+    csq = u' * pinv(V) * u
+
+    # Degrees of freedom
+    dof = length(gpix) - 1
+
+    # P-value
+    p = 1 - cdf(Chisq(dof), csq)
+
+    return (stat=csq, dof=dof, pvalue=p)
+end
+
 """
     confint(km::KaplanMeier; level::Float64 = 0.95)
     confint(km::KaplanMeier, t::Number; level::Float64 = 0.95)
diff --git a/test/SurvivalEstimator.jl b/test/SurvivalEstimator.jl