diff --git a/include/problem.h b/include/problem.h index 9d7a446f..25d15172 100644 --- a/include/problem.h +++ b/include/problem.h @@ -36,6 +36,8 @@ typedef struct int nnz_affine; int nnz_nonlinear; /* jacobian of nonlinear constraints */ int nnz_hessian; + int n_vars; + int total_constraint_size; } Diff_engine_stats; typedef struct problem diff --git a/include/subexpr.h b/include/subexpr.h index 4324991f..c87da073 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -48,6 +48,7 @@ typedef struct quad_form_expr { expr base; CSR_Matrix *Q; + CSC_Matrix *QJf; /* Q * J_f in CSC (for chain rule hessian) */ } quad_form_expr; /* Sum reduction along an axis */ diff --git a/include/utils/CSC_Matrix.h b/include/utils/CSC_Matrix.h index c444dcc6..951b088d 100644 --- a/include/utils/CSC_Matrix.h +++ b/include/utils/CSC_Matrix.h @@ -29,30 +29,37 @@ CSC_Matrix *new_csc_matrix(int m, int n, int nnz); /* Free a CSC matrix */ void free_csc_matrix(CSC_Matrix *matrix); -CSC_Matrix *csr_to_csc(const CSR_Matrix *A); - -/* Allocate sparsity pattern for C = A^T D A for diagonal D */ +/* Fill sparsity of C = A^T D A for diagonal D */ CSR_Matrix *ATA_alloc(const CSC_Matrix *A); -/* Allocate sparsity pattern for C = B^T D A for diagonal D */ +/* Fill sparsity of C = B^T D A for diagonal D */ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B); -/* Compute values for C = A^T D A. C must have precomputed sparsity pattern */ +/* Fill sparsity of C = BA, where B is symmetric. */ +CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A); + +/* Compute values for C = A^T D A (null d corresponds to D as identity) */ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C); -/* Compute values for C = B^T D A. C must have precomputed sparsity pattern */ +/* Compute values for C = B^T D A (null d corresonds to D as identity) */ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d, CSR_Matrix *C); -/* C = z^T A where A is in CSC format and C is assumed to have one row. - * C must have column indices pre-computed. Fills in values of C only. - */ -void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C); +/* Fill values of C = BA. The matrix B does not have to be symmetric */ +void BA_fill_values(const CSR_Matrix *B, const CSC_Matrix *A, CSC_Matrix *C); + +/* Fill values of C = x^T A. The matrix C must have filled sparsity. */ +void yTA_fill_values(const CSC_Matrix *A, const double *x, CSR_Matrix *C); + +/* Count nonzero columns of a CSC matrix */ +int count_nonzero_cols_csc(const CSC_Matrix *A); -CSC_Matrix *csr_to_csc_fill_sparsity(const CSR_Matrix *A, int *iwork); +/* convert from CSR to CSC format */ +CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork); void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork); -CSR_Matrix *csc_to_csr_fill_sparsity(const CSC_Matrix *A, int *iwork); +/* convert from CSC to CSR format */ +CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork); void csc_to_csr_fill_values(const CSC_Matrix *A, CSR_Matrix *C, int *iwork); #endif /* CSC_MATRIX_H */ diff --git a/src/affine/left_matmul.c b/src/affine/left_matmul.c index be62f288..26ea172c 100644 --- a/src/affine/left_matmul.c +++ b/src/affine/left_matmul.c @@ -88,12 +88,12 @@ static void jacobian_init_impl(expr *node) /* initialize child's jacobian and precompute sparsity of its CSC */ jacobian_init(x); - lnode->Jchild_CSC = csr_to_csc_fill_sparsity(x->jacobian, node->work->iwork); + lnode->Jchild_CSC = csr_to_csc_alloc(x->jacobian, node->work->iwork); /* precompute sparsity of this node's jacobian in CSC and CSR */ lnode->J_CSC = lnode->A->block_left_mult_sparsity(lnode->A, lnode->Jchild_CSC, lnode->n_blocks); - node->jacobian = csc_to_csr_fill_sparsity(lnode->J_CSC, lnode->csc_to_csr_work); + node->jacobian = csc_to_csr_alloc(lnode->J_CSC, lnode->csc_to_csr_work); } static void eval_jacobian(expr *node) diff --git a/src/bivariate_restricted_dom/quad_over_lin.c b/src/bivariate_restricted_dom/quad_over_lin.c index ac4f85c2..bc8ea052 100644 --- a/src/bivariate_restricted_dom/quad_over_lin.c +++ b/src/bivariate_restricted_dom/quad_over_lin.c @@ -164,8 +164,7 @@ static void eval_jacobian(expr *node) } /* chain rule (no derivative wrt y) using CSC format */ - csc_matvec_fill_values(x->work->jacobian_csc, node->work->dwork, - node->jacobian); + yTA_fill_values(x->work->jacobian_csc, node->work->dwork, node->jacobian); /* insert derivative wrt y at right place (for correctness this assumes that y does not appear in the numerator, but this will always be diff --git a/src/expr.c b/src/expr.c index 7754d1cf..01e5b49c 100644 --- a/src/expr.c +++ b/src/expr.c @@ -51,7 +51,7 @@ void jacobian_csc_init(expr *node) } node->work->csc_work = (int *) malloc(node->n_vars * sizeof(int)); node->work->jacobian_csc = - csr_to_csc_fill_sparsity(node->jacobian, node->work->csc_work); + csr_to_csc_alloc(node->jacobian, node->work->csc_work); } void free_expr(expr *node) diff --git a/src/other/quad_form.c b/src/other/quad_form.c index 509d5d8d..91ea6028 100644 --- a/src/other/quad_form.c +++ b/src/other/quad_form.c @@ -1,6 +1,8 @@ #include "other.h" #include "subexpr.h" #include "utils/CSC_Matrix.h" +#include "utils/CSR_sum.h" +#include "utils/cblas_wrapper.h" #include #include #include @@ -27,17 +29,44 @@ static void forward(expr *node, const double *u) static void jacobian_init_impl(expr *node) { - assert(node->left->var_id != NOT_A_VARIABLE); - expr *x = node->left; + + /* dwork stores the result of Q @ f(x) in the forward pass */ node->work->dwork = (double *) malloc(x->size * sizeof(double)); - node->jacobian = new_csr_matrix(1, node->n_vars, x->size); - node->jacobian->p[0] = 0; - node->jacobian->p[1] = x->size; - for (int j = 0; j < x->size; j++) + if (x->var_id != NOT_A_VARIABLE) + { + node->jacobian = new_csr_matrix(1, node->n_vars, x->size); + node->jacobian->p[0] = 0; + node->jacobian->p[1] = x->size; + + for (int j = 0; j < x->size; j++) + { + node->jacobian->i[j] = x->var_id + j; + } + } + else { - node->jacobian->i[j] = x->var_id + j; + /* chain rule: J = 2 * (Q @ f(x))^T * J_f */ + jacobian_init(x); + jacobian_csc_init(x); + CSC_Matrix *J_csc = x->work->jacobian_csc; + + /* allocate the right number of nnz */ + int nnz = count_nonzero_cols_csc(J_csc); + node->jacobian = new_csr_matrix(1, node->n_vars, nnz); + node->jacobian->p[0] = 0; + node->jacobian->p[1] = nnz; + + /* fill sparsity pattern */ + int idx = 0; + for (int j = 0; j < J_csc->n; j++) + { + if (J_csc->p[j + 1] > J_csc->p[j]) + { + node->jacobian->i[idx++] = j; + } + } } } @@ -46,140 +75,156 @@ static void eval_jacobian(expr *node) expr *x = node->left; CSR_Matrix *Q = ((quad_form_expr *) node)->Q; - // jacobian = 2 * Q * x - csr_matvec(Q, x->value, node->jacobian->x, 0); - - for (int j = 0; j < x->size; j++) + if (x->var_id != NOT_A_VARIABLE) { - node->jacobian->x[j] *= 2.0; + /* jacobian = 2 * (Q @ x)^T */ + csr_matvec(Q, x->value, node->jacobian->x, 0); + cblas_dscal(x->size, 2.0, node->jacobian->x, 1); } -} - -static void wsum_hess_init_impl(expr *node) -{ - CSR_Matrix *Q = ((quad_form_expr *) node)->Q; - expr *x = node->left; - CSR_Matrix *H = new_csr_matrix(node->n_vars, node->n_vars, Q->nnz); - - /* set global row pointers */ - memcpy(H->p + x->var_id, Q->p, (x->size + 1) * sizeof(int)); - for (int i = x->var_id + x->size + 1; i <= node->n_vars; i++) + else { + /* jacobian = 2 * (Q @ f(x))^T @ J_f */ + x->eval_jacobian(x); - H->p[i] = Q->nnz; - } + if (!x->work->jacobian_csc_filled) + { + csr_to_csc_fill_values(x->jacobian, x->work->jacobian_csc, + x->work->csc_work); - /* set global column indices */ - for (int i = 0; i < Q->nnz; i++) - { - H->i[i] = Q->i[i] + x->var_id; - } + if (x->is_affine(x)) + { + x->work->jacobian_csc_filled = true; + } + } - node->wsum_hess = H; -} + /* The jacobian has same values as the gradient, which is + J_f^T (Q @ f(x)). Here, dwork stores Q @ f(x) from forward */ + yTA_fill_values(x->work->jacobian_csc, node->work->dwork, node->jacobian); -static void eval_wsum_hess(expr *node, const double *w) -{ - CSR_Matrix *Q = ((quad_form_expr *) node)->Q; - double *H = node->wsum_hess->x; - double two_w = 2.0 * w[0]; - for (int i = 0; i < Q->nnz; i++) - { - H[i] = two_w * Q->x[i]; + cblas_dscal(node->jacobian->nnz, 2.0, node->jacobian->x, 1); } } -/* -The following two functions are commented out. It supports the jacobian for -quad_form(Ax, Q), but after reconsideration, I think we should treat this as -quad_form(x, A.TQA) in the canonicalization so we don't need to support the chain -rule here. -static void jacobian_init_impl(expr *node) +static void wsum_hess_init_impl(expr *node) { + CSR_Matrix *Q = ((quad_form_expr *) node)->Q; expr *x = node->left; - node->work->dwork = (double *) malloc(x->d1 * sizeof(double)); - // if x is a variable if (x->var_id != NOT_A_VARIABLE) { - node->jacobian = new_csr_matrix(1, node->n_vars, x->d1); - node->jacobian->p[0] = 0; - node->jacobian->p[1] = x->d1; + CSR_Matrix *H = new_csr_matrix(node->n_vars, node->n_vars, Q->nnz); - for (int j = 0; j < x->d1; j++) + /* set global row pointers */ + memcpy(H->p + x->var_id, Q->p, (x->size + 1) * sizeof(int)); + for (int i = x->var_id + x->size + 1; i <= node->n_vars; i++) { - node->jacobian->i[j] = x->var_id + j; + H->p[i] = Q->nnz; } - } - else // x is not a variable - { - // compute required allocation and allocate jacobian - bool *col_nz = (bool *) calloc(node->n_vars, sizeof(bool)); - int nonzero_cols = count_nonzero_cols(x->jacobian, col_nz); - node->jacobian = new_csr_matrix(1, node->n_vars, nonzero_cols + 1); - - // precompute column indices - node->jacobian->nnz = 0; - for (int j = 0; j < node->n_vars; j++) + + /* set global column indices */ + for (int i = 0; i < Q->nnz; i++) { - if (col_nz[j]) - { - node->jacobian->i[node->jacobian->nnz] = j; - node->jacobian->nnz++; - } + H->i[i] = Q->i[i] + x->var_id; } - assert(nonzero_cols == node->jacobian->nnz); - free(col_nz); - node->jacobian->p[0] = 0; - node->jacobian->p[1] = node->jacobian->nnz; + node->wsum_hess = H; } -} + else + { + /* The hessian of h(x) = f(x)^T Q f(x) is term1 + term2 where + + * term1 = J_f^T Q J_f + * term2 = sum_i (Qf(x))_i nabla^2 f_i. + + To compute term1, we first compute B = Q J_f and then compute term1 + = J_f^T B. + */ + /* jacobian_csc_init(x) already called in jacobian_init */ + quad_form_expr *qnode = (quad_form_expr *) node; + CSC_Matrix *Jf = x->work->jacobian_csc; + + /* term1 = Jf^T W Jf = Jf^T B*/ + CSC_Matrix *B = symBA_alloc(Q, Jf); + qnode->QJf = B; + node->work->hess_term1 = BTA_alloc(Jf, B); + + /* term2 = sum_i (Qf(x))_i nabla^2 f_i */ + wsum_hess_init(x); + node->work->hess_term2 = new_csr_copy_sparsity(x->wsum_hess); + + /* hess = term1 + term2 */ + int max_nnz = node->work->hess_term1->nnz + node->work->hess_term2->nnz; + node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, max_nnz); + sum_csr_matrices_fill_sparsity(node->work->hess_term1, + node->work->hess_term2, node->wsum_hess); + } +} -static void eval_jacobian_old(expr *node) +static void eval_wsum_hess(expr *node, const double *w) { - expr *x = node->left; CSR_Matrix *Q = ((quad_form_expr *) node)->Q; + expr *x = node->left; + double two_w = 2.0 * w[0]; - // if x is a variable if (x->var_id != NOT_A_VARIABLE) { - csr_matvec(Q, x->value, node->jacobian->x, 0); - - for (int j = 0; j < x->d1; j++) - { - node->jacobian->x[j] *= 2.0; - } + /* TODO: do we want to compute this hessian only once (up to a scaling)? + * Maybe unnecessary optimization. */ + memcpy(node->wsum_hess->x, Q->x, Q->nnz * sizeof(double)); + cblas_dscal(Q->nnz, two_w, node->wsum_hess->x, 1); } - else // x is not a variable + else { - linear_op_expr *lin_x = (linear_op_expr *) x; - - // local jacobian - csr_matvec(Q, x->value, node->work->dwork, 0); - - for (int j = 0; j < x->d1; j++) + /* fill the CSC representation of the Jacobian of the child */ + CSC_Matrix *Jf = x->work->jacobian_csc; + if (!x->work->jacobian_csc_filled) { - node->work->dwork[j] *= 2.0; + csr_to_csc_fill_values(x->jacobian, Jf, x->work->csc_work); + + if (x->is_affine(x)) + { + x->work->jacobian_csc_filled = true; + } } - // chain rule using CSC format - csc_matvec_fill_values(lin_x->A_csc, node->work->dwork, node->jacobian); + CSC_Matrix *QJf = ((quad_form_expr *) node)->QJf; + CSR_Matrix *term1 = node->work->hess_term1; + CSR_Matrix *term2 = node->work->hess_term2; + + /* term1 = J_f^T Q J_f = J_f^T B */ + BA_fill_values(Q, Jf, QJf); + BTDA_fill_values(Jf, QJf, NULL, term1); + + /* term2 */ + x->eval_wsum_hess(x, node->work->dwork); + memcpy(term2->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); + + /* scale both terms by 2w */ + cblas_dscal(term1->nnz, two_w, term1->x, 1); + cblas_dscal(term2->nnz, two_w, term2->x, 1); + + /* sum the two terms */ + sum_csr_matrices_fill_values(term1, term2, node->wsum_hess); } } -*/ static void free_type_data(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; free_csr_matrix(qnode->Q); qnode->Q = NULL; + if (qnode->QJf != NULL) + { + free_csc_matrix(qnode->QJf); + qnode->QJf = NULL; + } } static bool is_affine(const expr *node) { (void) node; + /* TODO: it is affine (constant) if both children are constant */ return false; } diff --git a/src/problem.c b/src/problem.c index 406f7343..c29ca89d 100644 --- a/src/problem.c +++ b/src/problem.c @@ -65,6 +65,9 @@ problem *new_problem(expr *objective, expr **constraints, int n_constraints, prob->stats.time_forward_constraints = 0.0; prob->stats.nnz_affine = 0; prob->stats.nnz_nonlinear = 0; + prob->stats.nnz_hessian = 0; + prob->stats.n_vars = prob->n_vars; + prob->stats.total_constraint_size = prob->total_constraint_size; prob->verbose = verbose; @@ -274,6 +277,9 @@ static inline void print_end_message(const Diff_engine_stats *stats) DIFF_ENGINE_VERSION); printf("\nProblem statistics:\n"); + printf(" Number of variables: %d\n", stats->n_vars); + printf(" Number of constraints: %d\n", + stats->total_constraint_size); printf(" Affine constraints (nnz): %d\n", stats->nnz_affine); printf(" Jacobian nonlinear constraints (nnz): %d\n", stats->nnz_nonlinear); printf(" Lagrange Hessian (nnz): %d\n", stats->nnz_hessian); diff --git a/src/utils/CSC_Matrix.c b/src/utils/CSC_Matrix.c index d017a627..9d4e0780 100644 --- a/src/utils/CSC_Matrix.c +++ b/src/utils/CSC_Matrix.c @@ -111,6 +111,34 @@ CSR_Matrix *ATA_alloc(const CSC_Matrix *A) return C; } +static inline double sparse_dot(const double *a_x, const int *a_i, int a_nnz, + const double *b_x, const int *b_i, int b_nnz) +{ + int ii = 0; + int jj = 0; + double sum = 0.0; + + while (ii < a_nnz && jj < b_nnz) + { + if (a_i[ii] == b_i[jj]) + { + sum += a_x[ii] * b_x[jj]; + ii++; + jj++; + } + else if (a_i[ii] < b_i[jj]) + { + ii++; + } + else + { + jj++; + } + } + + return sum; +} + static inline double sparse_wdot(const double *a_x, const int *a_i, int a_nnz, const double *b_x, const int *b_i, int b_nnz, const double *d) @@ -158,64 +186,23 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C) int nnz_ai = A->p[ii + 1] - A->p[ii]; int nnz_aj = A->p[j + 1] - A->p[j]; - /* compute Cij = weighted inner product of column i and column j */ - double sum = sparse_wdot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai, - A->x + A->p[j], A->i + A->p[j], nnz_aj, d); - - C->x[jj] = sum; + if (d != NULL) + { + C->x[jj] = + sparse_wdot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai, + A->x + A->p[j], A->i + A->p[j], nnz_aj, d); + } + else + { + C->x[jj] = sparse_dot(A->x + A->p[ii], A->i + A->p[ii], nnz_ai, + A->x + A->p[j], A->i + A->p[j], nnz_aj); + } } } } } -CSC_Matrix *csr_to_csc(const CSR_Matrix *A) -{ - CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz); - - int i, j; - int *count = malloc(A->n * sizeof(int)); - - memset(count, 0, A->n * sizeof(int)); - - // ------------------------------------------------------------------- - // compute nnz in each column of A - // ------------------------------------------------------------------- - for (i = 0; i < A->m; ++i) - { - for (j = A->p[i]; j < A->p[i + 1]; ++j) - { - count[A->i[j]]++; - } - } - - // ------------------------------------------------------------------ - // compute column pointers - // ------------------------------------------------------------------ - C->p[0] = 0; - for (i = 0; i < A->n; ++i) - { - C->p[i + 1] = C->p[i] + count[i]; - count[i] = C->p[i]; - } - - // ------------------------------------------------------------------ - // fill matrix - // ------------------------------------------------------------------ - for (i = 0; i < A->m; ++i) - { - for (j = A->p[i]; j < A->p[i + 1]; ++j) - { - C->x[count[A->i[j]]] = A->x[j]; - C->i[count[A->i[j]]] = i; - count[A->i[j]]++; - } - } - - free(count); - return C; -} - -CSC_Matrix *csr_to_csc_fill_sparsity(const CSR_Matrix *A, int *iwork) +CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork) { CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz); @@ -278,7 +265,7 @@ void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork) } } -CSR_Matrix *csc_to_csr_fill_sparsity(const CSC_Matrix *A, int *iwork) +CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork) { CSR_Matrix *C = new_csr_matrix(A->m, A->n, A->nnz); @@ -401,19 +388,14 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B) return C; } -void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C) +void yTA_fill_values(const CSC_Matrix *A, const double *y, CSR_Matrix *C) { - /* Compute C = z^T * A where A is in CSC format - * C is a single-row CSR matrix with column indices pre-computed - * This fills in the values of C only - */ - for (int col = 0; col < A->n; col++) { double val = 0; for (int j = A->p[col]; j < A->p[col + 1]; j++) { - val += z[A->i[j]] * A->x[j]; + val += y[A->i[j]] * A->x[j]; } if (A->p[col + 1] - A->p[col] == 0) continue; @@ -430,6 +412,7 @@ void csc_matvec_fill_values(const CSC_Matrix *A, const double *z, CSR_Matrix *C) } } +/* computes C = B^T * D * A in CSR */ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d, CSR_Matrix *C) { @@ -443,11 +426,124 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d, int nnz_bi = B->p[i + 1] - B->p[i]; int nnz_aj = A->p[j + 1] - A->p[j]; - /* compute Cij = weighted inner product of col i of B and col j of A */ - double sum = sparse_wdot(B->x + B->p[i], B->i + B->p[i], nnz_bi, - A->x + A->p[j], A->i + A->p[j], nnz_aj, d); + if (d != NULL) + { + C->x[jj] = sparse_wdot(B->x + B->p[i], B->i + B->p[i], nnz_bi, + A->x + A->p[j], A->i + A->p[j], nnz_aj, d); + } + else + { + C->x[jj] = sparse_dot(B->x + B->p[i], B->i + B->p[i], nnz_bi, + A->x + A->p[j], A->i + A->p[j], nnz_aj); + } + } + } +} + +/* NOTE: an alternative marker-based approach (scatter A_{k,j} * Q[k,:] + * into column j of C using a marker array for position lookup) may be + * faster when Q is dense, since it touches each Q entry exactly once. + * The sparse_dot approach below is simpler but redundantly scans + * column j of A for each nonzero row of C. */ +void BA_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A, CSC_Matrix *C) +{ + /* fill values of C = Q * A, given the sparsity pattern of C. */ + int i, j, ii; - C->x[jj] = sum; + /* for each column j of C */ + for (j = 0; j < C->n; j++) + { + for (ii = C->p[j]; ii < C->p[j + 1]; ii++) + { + i = C->i[ii]; + int nnz_q = Q->p[i + 1] - Q->p[i]; + int nnz_a = A->p[j + 1] - A->p[j]; + + /* inner product between row i of Q and column j of A */ + C->x[ii] = sparse_dot(Q->x + Q->p[i], Q->i + Q->p[i], nnz_q, + A->x + A->p[j], A->i + A->p[j], nnz_a); + } + } +} + +CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A) +{ + /* Allocate C = B * A (sparsity only). B must be symmetric. + * B is CSR (m x m), A is CSC (m x n), C is CSC (m x n). + * + * Column j of C is B * a_j = sum_k A_{k,j} B[:, k], so the nonzero + * rows of column j of C are the union of the nonzero rows of B[:, k]. + * + * Since B is symmetric, we can find the nonzero rows of B[:, k] by + * finding the nonzero columns of B in row k. + * + * We use a marker array to avoid duplicates: marker[l] stores the + * last column j that registered l as nonzero, so checking + * marker[l] != j avoids duplicates. */ + + int m = B->m; + int n = A->n; + int i, j, k, jj, ii, ell; + + /* marker[row] = last column j that registered row as nonzero */ + int *marker = (int *) malloc(m * sizeof(int)); + for (i = 0; i < m; i++) + { + marker[i] = -1; + } + + int *Cp = (int *) malloc((n + 1) * sizeof(int)); + iVec *Ci = iVec_new(A->nnz); + Cp[0] = 0; + + /* for each column j of C */ + for (j = 0; j < n; j++) + { + int col_nnz = 0; + + /* iterate over nonzero rows k in column j of A */ + for (ii = A->p[j]; ii < A->p[j + 1]; ii++) + { + k = A->i[ii]; + + /* find nonzero rows ell of column k of B */ + for (jj = B->p[k]; jj < B->p[k + 1]; jj++) + { + ell = B->i[jj]; + if (marker[ell] != j) + { + marker[ell] = j; + iVec_append(Ci, ell); + col_nnz++; + } + } + } + + Cp[j + 1] = Cp[j] + col_nnz; + } + + /* allocate C and copy the computed structure */ + int total_nnz = Cp[n]; + CSC_Matrix *C = new_csc_matrix(m, n, total_nnz); + memcpy(C->p, Cp, (n + 1) * sizeof(int)); + memcpy(C->i, Ci->data, total_nnz * sizeof(int)); + + free(marker); + free(Cp); + iVec_free(Ci); + + return C; +} + +int count_nonzero_cols_csc(const CSC_Matrix *A) +{ + int count = 0; + for (int j = 0; j < A->n; j++) + { + if (A->p[j + 1] > A->p[j]) + { + count++; } } + return count; } diff --git a/tests/all_tests.c b/tests/all_tests.c index 37312c0c..a92a6d97 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -137,6 +137,8 @@ int main(void) mu_run_test(test_jacobian_cos_sin_multiply, tests_run); mu_run_test(test_jacobian_Ax_Bx_multiply, tests_run); mu_run_test(test_jacobian_AX_BX_multiply, tests_run); + mu_run_test(test_jacobian_quad_form_Ax, tests_run); + mu_run_test(test_jacobian_quad_form_exp, tests_run); mu_run_test(test_jacobian_composite_exp_add, tests_run); mu_run_test(test_jacobian_const_scalar_mult_log_vector, tests_run); mu_run_test(test_jacobian_const_scalar_mult_log_matrix, tests_run); @@ -274,6 +276,9 @@ int main(void) mu_run_test(test_wsum_hess_x_x_multiply, tests_run); mu_run_test(test_wsum_hess_AX_BX_multiply, tests_run); mu_run_test(test_wsum_hess_multiply_deep_composite, tests_run); + mu_run_test(test_wsum_hess_quad_form_Ax, tests_run); + mu_run_test(test_wsum_hess_quad_form_sin_Ax, tests_run); + mu_run_test(test_wsum_hess_quad_form_exp, tests_run); printf("\n--- Utility Tests ---\n"); mu_run_test(test_cblas_ddot, tests_run); @@ -283,8 +288,6 @@ int main(void) mu_run_test(test_transpose, tests_run); mu_run_test(test_AT_alloc_and_fill, tests_run); mu_run_test(test_kron_identity_csr, tests_run); - mu_run_test(test_csr_to_csc1, tests_run); - mu_run_test(test_csr_to_csc2, tests_run); mu_run_test(test_csr_to_csc_split, tests_run); mu_run_test(test_csc_to_csr_sparsity, tests_run); mu_run_test(test_csc_to_csr_values, tests_run); diff --git a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h index 4a6dde97..9e916af1 100644 --- a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h +++ b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h @@ -4,6 +4,7 @@ #include "expr.h" #include "minunit.h" #include "numerical_diff.h" +#include "other.h" #include "test_helpers.h" #include "utils/CSR_Matrix.h" @@ -113,3 +114,59 @@ const char *test_jacobian_AX_BX_multiply(void) free_csr_matrix(B); return 0; } + +const char *test_jacobian_quad_form_Ax(void) +{ + /* (Ax)^T Q (Ax) where Q is symmetric */ + double u_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + + CSR_Matrix *A = new_csr_random(3, 4, 1.0); + + /* Q = [1 2 0; 2 3 0; 0 0 4] */ + CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; + int Qi[5] = {0, 1, 0, 1, 2}; + int Qp[4] = {0, 2, 4, 5}; + memcpy(Q->x, Qx, 5 * sizeof(double)); + memcpy(Q->i, Qi, 5 * sizeof(int)); + memcpy(Q->p, Qp, 4 * sizeof(int)); + + expr *x = new_variable(4, 1, 1, 6); + expr *Ax = new_left_matmul(x, A); + expr *sin_Ax = new_sin(Ax); + expr *node = new_quad_form(sin_Ax, Q); + + mu_assert("check_jacobian failed", + check_jacobian(node, u_vals, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(node); + free_csr_matrix(A); + free_csr_matrix(Q); + return 0; +} + +const char *test_jacobian_quad_form_exp(void) +{ + /* exp(x)^T Q exp(x) where Q is symmetric */ + double u_vals[3] = {0.5, 1.0, 1.5}; + + /* Q = [1 2 0; 2 3 0; 0 0 4] */ + CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; + int Qi[5] = {0, 1, 0, 1, 2}; + int Qp[4] = {0, 2, 4, 5}; + memcpy(Q->x, Qx, 5 * sizeof(double)); + memcpy(Q->i, Qi, 5 * sizeof(int)); + memcpy(Q->p, Qp, 4 * sizeof(int)); + + expr *x = new_variable(3, 1, 0, 3); + expr *exp_x = new_exp(x); + expr *node = new_quad_form(exp_x, Q); + + mu_assert("check_jacobian failed", + check_jacobian(node, u_vals, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(node); + free_csr_matrix(Q); + return 0; +} diff --git a/tests/utils/test_csc_matrix.h b/tests/utils/test_csc_matrix.h index ed0a3049..54590214 100644 --- a/tests/utils/test_csc_matrix.h +++ b/tests/utils/test_csc_matrix.h @@ -7,85 +7,6 @@ #include "test_helpers.h" #include "utils/CSC_Matrix.h" -const char *test_csr_to_csc1(void) -{ - CSR_Matrix *A = new_csr_matrix(4, 5, 5); - double Ax[5] = {1.0, 1.0, 3.0, 2.0, 4.0}; - int Ai[5] = {0, 4, 1, 0, 1}; - int Ap[5] = {0, 2, 3, 4, 5}; - memcpy(A->x, Ax, 5 * sizeof(double)); - memcpy(A->i, Ai, 5 * sizeof(int)); - memcpy(A->p, Ap, 5 * sizeof(int)); - - CSC_Matrix *C = csr_to_csc(A); - - double Cx_correct[5] = {1.0, 2.0, 3.0, 4.0, 1.0}; - int Ci_correct[5] = {0, 2, 1, 3, 0}; - int Cp_correct[6] = {0, 2, 4, 4, 4, 5}; - - mu_assert("C vals incorrect", cmp_double_array(C->x, Cx_correct, 5)); - mu_assert("C rows incorrect", cmp_int_array(C->i, Ci_correct, 5)); - mu_assert("C cols incorrect", cmp_int_array(C->p, Cp_correct, 6)); - - free_csr_matrix(A); - free_csc_matrix(C); - - return 0; -} - -const char *test_csr_to_csc2(void) -{ - CSR_Matrix *A = new_csr_matrix(20, 30, 120); - double Ax[120] = {9, 6, 5, 9, 7, 3, 8, 2, 6, 1, 3, 9, 2, 8, 9, 1, 4, 9, 2, 1, - 3, 4, 2, 8, 6, 2, 9, 7, 3, 8, 3, 7, 9, 2, 2, 2, 5, 5, 3, 5, - 1, 6, 7, 2, 7, 3, 3, 7, 3, 5, 4, 7, 7, 3, 6, 3, 6, 1, 8, 8, - 3, 2, 2, 3, 4, 5, 5, 5, 8, 3, 5, 3, 7, 5, 1, 4, 9, 6, 6, 7, - 4, 6, 8, 2, 7, 3, 5, 3, 3, 4, 7, 3, 6, 4, 2, 1, 1, 5, 5, 8, - 1, 9, 5, 2, 3, 8, 5, 8, 4, 5, 5, 6, 9, 6, 4, 4, 1, 8, 9, 8}; - int Ai[120] = {1, 2, 3, 19, 21, 22, 9, 10, 19, 20, 25, 0, 6, 8, 9, - 12, 15, 19, 20, 21, 26, 2, 5, 6, 8, 12, 14, 16, 19, 27, - 8, 11, 13, 15, 25, 26, 27, 10, 12, 19, 22, 23, 24, 25, 28, - 1, 11, 12, 15, 18, 24, 13, 22, 2, 5, 6, 9, 18, 24, 3, - 6, 8, 22, 20, 27, 7, 9, 17, 26, 29, 0, 1, 11, 13, 15, - 16, 18, 23, 24, 4, 5, 8, 9, 16, 20, 23, 4, 6, 14, 15, - 24, 8, 9, 11, 12, 20, 22, 29, 2, 5, 12, 14, 15, 19, 21, - 10, 19, 27, 1, 5, 6, 9, 11, 15, 21, 26, 3, 15, 26, 27}; - int Ap[21] = {0, 6, 11, 21, 30, 37, 45, 51, 53, 59, 63, - 65, 70, 79, 86, 91, 98, 105, 108, 116, 120}; - memcpy(A->x, Ax, 120 * sizeof(double)); - memcpy(A->i, Ai, 120 * sizeof(int)); - memcpy(A->p, Ap, 21 * sizeof(int)); - - CSC_Matrix *C = csr_to_csc(A); - - double Cx_correct[120] = { - 9, 5, 9, 3, 3, 4, 6, 4, 3, 5, 5, 8, 1, 7, 5, 2, 6, 4, 8, 5, 2, 8, 3, 3, - 3, 5, 5, 8, 6, 3, 2, 6, 3, 8, 9, 6, 5, 8, 6, 6, 2, 5, 8, 7, 3, 7, 4, 9, - 1, 2, 3, 7, 2, 1, 9, 7, 5, 9, 3, 9, 4, 2, 3, 1, 4, 5, 6, 8, 7, 4, 2, 5, - 5, 1, 9, 9, 6, 9, 3, 5, 2, 5, 1, 2, 3, 7, 1, 7, 1, 3, 4, 3, 1, 7, 2, 1, - 6, 6, 3, 7, 4, 8, 6, 7, 3, 2, 2, 3, 2, 8, 4, 9, 8, 5, 4, 8, 8, 7, 3, 5}; - int Ci_correct[120] = { - 2, 12, 0, 6, 12, 18, 0, 3, 8, 16, 0, 9, 19, 13, 14, 3, 8, 13, - 16, 18, 2, 3, 8, 9, 14, 18, 11, 2, 3, 4, 9, 13, 15, 1, 2, 8, - 11, 13, 15, 18, 1, 5, 17, 4, 6, 12, 15, 18, 2, 3, 5, 6, 15, 16, - 4, 7, 12, 3, 14, 16, 2, 4, 6, 12, 14, 16, 18, 19, 3, 12, 13, 11, - 6, 8, 12, 0, 1, 2, 3, 5, 16, 17, 1, 2, 10, 13, 15, 0, 2, 16, - 18, 0, 5, 7, 9, 15, 5, 12, 13, 5, 6, 8, 12, 14, 1, 4, 5, 2, - 4, 11, 18, 19, 3, 4, 10, 17, 19, 5, 11, 15}; - int Cp_correct[31] = {0, 2, 6, 10, 13, 15, 20, 26, 27, 33, 40, - 43, 48, 54, 57, 60, 68, 71, 72, 75, 82, 87, - 91, 96, 99, 104, 107, 112, 117, 118, 120}; - - mu_assert("C vals incorrect", cmp_double_array(C->x, Cx_correct, 120)); - mu_assert("C rows incorrect", cmp_int_array(C->i, Ci_correct, 120)); - mu_assert("C cols incorrect", cmp_int_array(C->p, Cp_correct, 31)); - - free_csr_matrix(A); - free_csc_matrix(C); - - return 0; -} - /* Test ATA_alloc with a simple 3x3 example * A is 4x3 (4 rows, 3 columns): * [x 0 x] diff --git a/tests/utils/test_csr_csc_conversion.h b/tests/utils/test_csr_csc_conversion.h index 8c0ba108..efbdc9e1 100644 --- a/tests/utils/test_csr_csc_conversion.h +++ b/tests/utils/test_csr_csc_conversion.h @@ -29,7 +29,7 @@ const char *test_csr_to_csc_split(void) int *iwork = (int *) malloc(A->n * sizeof(int)); /* First, fill sparsity pattern */ - CSC_Matrix *C = csr_to_csc_fill_sparsity(A, iwork); + CSC_Matrix *C = csr_to_csc_alloc(A, iwork); /* Check sparsity pattern */ int Cp_correct[6] = {0, 1, 2, 3, 4, 5}; @@ -74,7 +74,7 @@ const char *test_csc_to_csr_sparsity(void) int *iwork = (int *) malloc(A->m * sizeof(int)); /* Fill sparsity pattern */ - CSR_Matrix *C = csc_to_csr_fill_sparsity(A, iwork); + CSR_Matrix *C = csc_to_csr_alloc(A, iwork); /* Expected CSR format: * Row 0: [1.0 at col 0, 2.0 at col 4] @@ -113,7 +113,7 @@ const char *test_csc_to_csr_values(void) int *iwork = (int *) malloc(A->m * sizeof(int)); /* Fill sparsity pattern */ - CSR_Matrix *C = csc_to_csr_fill_sparsity(A, iwork); + CSR_Matrix *C = csc_to_csr_alloc(A, iwork); /* Fill values */ csc_to_csr_fill_values(A, C, iwork); @@ -148,12 +148,12 @@ const char *test_csr_csc_csr_roundtrip(void) /* Convert CSR to CSC */ int *iwork_csc = (int *) malloc(A->n * sizeof(int)); - CSC_Matrix *B = csr_to_csc_fill_sparsity(A, iwork_csc); + CSC_Matrix *B = csr_to_csc_alloc(A, iwork_csc); csr_to_csc_fill_values(A, B, iwork_csc); /* Convert CSC back to CSR */ int *iwork_csr = (int *) malloc(B->m * sizeof(int)); - CSR_Matrix *C = csc_to_csr_fill_sparsity(B, iwork_csr); + CSR_Matrix *C = csc_to_csr_alloc(B, iwork_csr); csc_to_csr_fill_values(B, C, iwork_csr); /* C should match A */ diff --git a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h index 6187f1d3..d0dbe4a6 100644 --- a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h +++ b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h @@ -3,6 +3,7 @@ #include "elementwise_full_dom.h" #include "minunit.h" #include "numerical_diff.h" +#include "other.h" #include "test_helpers.h" const char *test_wsum_hess_exp_sum(void) @@ -198,3 +199,88 @@ const char *test_wsum_hess_multiply_deep_composite(void) free_csr_matrix(B); return 0; } + +const char *test_wsum_hess_quad_form_Ax(void) +{ + double u_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + double w = 1.0; + + CSR_Matrix *A = new_csr_random(3, 4, 1.0); + + /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */ + CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; + int Qi[5] = {0, 1, 0, 1, 2}; + int Qp[4] = {0, 2, 4, 5}; + memcpy(Q->x, Qx, 5 * sizeof(double)); + memcpy(Q->i, Qi, 5 * sizeof(int)); + memcpy(Q->p, Qp, 4 * sizeof(int)); + + expr *x = new_variable(4, 1, 1, 6); + expr *Ax = new_left_matmul(x, A); + expr *node = new_quad_form(Ax, Q); + + mu_assert("check_wsum_hess failed", + check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(node); + free_csr_matrix(A); + free_csr_matrix(Q); + return 0; +} + +const char *test_wsum_hess_quad_form_sin_Ax(void) +{ + double u_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + double w = 2.0; + + CSR_Matrix *A = new_csr_random(3, 4, 1.0); + + /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */ + CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; + int Qi[5] = {0, 1, 0, 1, 2}; + int Qp[4] = {0, 2, 4, 5}; + memcpy(Q->x, Qx, 5 * sizeof(double)); + memcpy(Q->i, Qi, 5 * sizeof(int)); + memcpy(Q->p, Qp, 4 * sizeof(int)); + + expr *x = new_variable(4, 1, 1, 6); + expr *Ax = new_left_matmul(x, A); + expr *sin_Ax = new_sin(Ax); + expr *node = new_quad_form(sin_Ax, Q); + + mu_assert("check_wsum_hess failed", + check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(node); + free_csr_matrix(A); + free_csr_matrix(Q); + return 0; +} + +const char *test_wsum_hess_quad_form_exp(void) +{ + double u_vals[3] = {0.5, 1.0, 1.5}; + double w = 3.0; + + /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */ + CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; + int Qi[5] = {0, 1, 0, 1, 2}; + int Qp[4] = {0, 2, 4, 5}; + memcpy(Q->x, Qx, 5 * sizeof(double)); + memcpy(Q->i, Qi, 5 * sizeof(int)); + memcpy(Q->p, Qp, 4 * sizeof(int)); + + expr *x = new_variable(3, 1, 0, 3); + expr *exp_x = new_exp(x); + expr *node = new_quad_form(exp_x, Q); + + mu_assert("check_wsum_hess failed", + check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(node); + free_csr_matrix(Q); + return 0; +}