Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
258 changes: 258 additions & 0 deletions scripts/builtin/hmm.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# This script implements the hidden markov model method
# INPUT:
# --------------------------------------------------------------------------------------------
# X Output of hmm of last n timestep
# OUTPUT:
# --------------------------------------------------------------------------------------------
# ip Start probabilities
# A Transition probabilities
# B Emission probabilities
# --------------------------------------------------------------------------------------------

m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] A, Matrix[Double] B)
{

#X should have the size of 1 * ncols
#should be transposed for the unique function
unique_X = matrix(X, rows=ncol(X), cols=1)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to transpose call t(X)

nr_outputs = unique_vals(unique_X)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have a builtin function to calculate the number of unique values:
countDistinct()

you can give it an direction if you want to know the number of distinct in columns, rows or entirety.


/*
Since nr of states if unknown, fit a hmm model for every total number of states
from 1 to some max_states or 1 to log(n_timesteps), depending on whichever is greater.

Also the break condition is: If the likelihood decreases with increase in number of total states,
break and take the paremeters of the last iteration as the optimal one.
*/

max_states = 6
T = ncol(X)

if (max_states < log(T)){
max_states = ceil(log(T))
}

search = TRUE
nr_states = 2
seed = 42

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make seed an argument initially


while (search) {
[A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, nr_outputs, 20, seed)
seed = seed+1
if (nr_states == 2) {
prev_ll = -1
}
if (curr_ll < prev_ll) {
search = FALSE
}
else {
A = A_new
B = B_new
ip = ip_new
}
prev_ll = curr_ll
nr_states = nr_states+1

if (nr_states > max_states) {
search = FALSE
}
}
}

col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
m_norm = m / rowSums(m)
}

unique_vals = function (Matrix[Double] X) return (Integer l) {
# group and count duplicates
I = aggregate (target = X, groups = X[,1], fn = "count")
# select groups
res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select = (I != 0))
l = length(res)
}

baum_welch = function( Matrix[Double] X, Integer nr_states, Integer nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] B, Matrix[Double] ip, Double likelihood, Integer seed)
{
#initialize state transition and emmission matrices uniformly
A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + rand(rows=nr_states, cols=nr_states, seed=seed))
seed = seed+1
B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + rand(rows=nr_states, cols=nr_outputs, seed=seed))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Values contained in the matrix is equivalent in each cell for both calls.
perhaps the value to put into the matrix can be calculated once and then assigned into a matrix?
And even better if this is something we can assign into the arguments of the rand call, then we would reduce the current calls from 3 matrices materialized to only 1 per line of (98, 100, and 102)

seed = seed+1
ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, cols=1, seed=seed)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, each call to random have to have a unique seed. A suggestion for this is to make each rand call with seed += 1. (+= is not supported so you have to assign back the seed with seed = seed + 1, between the lines.)

seed = seed+1

likelihood = 0
T = ncol(X)

for (i in 1:IT) {
alpha = forward(X, A, B, ip)
beta = backward(X, A, B)

gamma = calculate_gamma(alpha, beta)
eta = calculate_eta(X, alpha, beta, A, B)

ip = gamma[, 1]
A = calculate_A(eta, gamma, nr_states)
B = calculate_B(gamma, X, nr_states, nr_outputs)

#likelhood using forward method
likelihood = sum(gamma[, T] * alpha[, T])
}
}

calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer nr_states) return (Matrix[Double] A) {
A = rand(rows=nr_states, cols=nr_states)
T = ncol(eta)

parfor (i in 1:nr_states) {
parfor (j in 1:nr_states) {
eta_id = (i-1) * nr_states + j
num_ij = sum(eta[eta_id, 1:T-1])

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1:T-1 takes all columns except the last one. Is this the intended behavior or should it take the entire row?

den_ij = sum(gamma[i,1:T-1])
A[i,j] = num_ij/den_ij
}
}
}

calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer nr_states, Integer nr_outputs) return (Matrix[Double] B) {
B = rand(rows=nr_states, cols=nr_outputs)

parfor (i in 1:nr_states){
parfor (j in 1:nr_outputs) {
B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

materialize gamma[i, ] in the outer parfor.

}
}
}

/*
To work with 3d matrices, the values in first two dimensions of eta matrix
must be rolled the first dimension. The following indexing function maps the
index of rolled matrix(eta) to "normal" 2d matrices like alpha, beta, etc
*/
index = function(Integer rolled_i, Integer nr_states) return(Double i, Double j) {
i = ceil(rolled_i / nr_states) #index starts at 1
j = rolled_i - (nr_states * (i-1)) #i-1) *
}

calculate_eta = function (Matrix[Double] X, Matrix[Double] alpha, Matrix[Double] beta, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] eta)
{
nr_states = nrow(alpha)
T = ncol(alpha)

/*
eta a (nr_states * nr_states) * T matrix with cell (i, t) being probability of
the state being at i at timestep j given the observed output
*/

tot_transitions = nr_states * nr_states
eta = rand(rows=tot_transitions, cols=T-1)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need a seed here.


/*
The transitions will be indiced as such:transition 1-N will represent
transition from state 1 to 1, 2 upto state N. transition N+1-2N will represent
transitions from 2 to 1, 2 upto state N
*/

parfor (trans_id in 1:tot_transitions) {
[i, j] = index(trans_id, nr_states)
for (t in 1:(T-1)) {
#indices for alpha and beta
ot1 = output_t(X, t+1)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove the method output_t and call the extraction here. It should automatically know it is a scalar.
Do you cast to int because you want to round? if so then call round()

num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, ot1]

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A[i,j] and B[j, ot1] in outer forloop

den = matrix(0, rows=nr_states, cols=1)

parfor (k in 1:nr_states) {
den[k, 1] = sum(alpha[k, t] * t(A[k, ]) * beta[, t+1] * B[,ot1])
}
s = num_ij / sum(den)
eta[trans_id, t] = as.scalar(s[1,1])
}
}
}

calculate_gamma = function (Matrix[Double] alpha, Matrix[Double] beta) return (Matrix[Double] gamma)
{
/*
gamma a nr_state * T matrix with cell (i, t) being probability of
the state being at i at timestep j given the observed output
*/
nr_states = nrow(alpha)
T = ncol(alpha)

gamma = rand(rows=nr_states, cols=T)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This rand call seems to be done just to materialize a matrix?
If so we could consider changing it to a matrix(rows...) call.
But it might not be the best solution.


parfor (i in 1:nrow(alpha)) {
parfor (t in 1:ncol(alpha)) {
num_it = alpha[i, t] * beta[i, t]
den_it = sum(alpha[,t] * beta[,t])
gamma[i, t] = num_it / den_it
}
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these parfors can be rewritten as:

num_it_M = alpha * beta
den_it_M =  rowsum(num_it_M)
gamma = num_it_M / den_it_M

This is because we support direct matrix and vector operations.
And i think this nicely maps to it. (But i might have an error here.)

}

output_t = function (Matrix[Double] X, Integer t) return (Integer o) {
o = as.integer(as.scalar(X[1, t]))
}

forward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B, Matrix[Double] ip) return (Matrix[Double] alpha)
{
/*
alpha a matrix of size nr_states * T with a cell i,t being probability of
the state being at state i at timestep j and the outputs till timestep j
*/

T = ncol(X)
nr_states = nrow(A)
alpha = matrix(0, rows=nr_states, cols=T)
o_1 = output_t(X, 1)
alpha[ ,1] = ip * B[ ,o_1]

for (t in 2:T) {
ot = output_t(X, t)
for (i in 1:nr_states) {
alpha[i, t] = B[i, ot] * sum(alpha[,t-1]* A[ ,i])

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move alpha[,t-1] to outer loop

}
}
}

backward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] beta)
{
/*
alpha a matrix of size nr_states * T with a cell i,t being probability of
the model producing outputs (o_t+1,..., o_T) given that the model is
at state i at time t
*/

T = ncol(X)
nr_states = nrow(A)
beta = matrix(1/T, rows=nr_states, cols=T)
beta[,T] = matrix(1/T, rows=nr_states, cols=1)

for (t in (T-1):1) {
ot = output_t(X, t)
for (i in 1:nr_states) {
beta[i, t] = sum(beta[, t+1] * t(A[i, ]) * B[ , ot])

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move beta[,t+1] to outer loop if possible

}
}
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

new lines in end of files.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not understand this. Could you please clarify?

@Baunsgaard Baunsgaard Jul 23, 2023

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

simple, GitHub indicate with a big red mark that there is no newline. The code does work, but we try to make it consistent across files.

image

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I add or remove a newline?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add

76 changes: 76 additions & 0 deletions scripts/builtin/hmmPredict.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# This script implements the hidden markov model method
# INPUT:
# --------------------------------------------------------------------------------------------
# X Output of hmm of last n timestep
# k Number of timesteps to predict the output for
# OUTPUT:
# --------------------------------------------------------------------------------------------
# outputs Output of hmm for k timesteps
# --------------------------------------------------------------------------------------------

m_hmmPredict = function (Matrix[Double] X, Integer k) return (Matrix[Double] outputs)
{
/*
Only discrete model implemented, need to discretize the cotinous outputs
to work with the continous model
*/
[ip, A, B] = hmm(X)
/*
Also only discrete model implemented, need to discretize the cotinous outputs
to work with the continous model
*/
seed = 42
init_state = choose_value(ip, seed)

states = matrix(0, rows=k, cols=1)

states[1, 1] = init_state
s = init_state
for (i in 2:k) {
seed=seed +1
next_state = choose_value(t(A[s, ]), seed)
states[i, 1] = next_state
s = next_state
}

outputs = matrix(0, rows=nrow(states), cols=1)
for (i in 1:nrow(states)) {
seed = seed+1
s = as.scalar(states[i, 1])
outputs[i, 1] = choose_value(t(B[s, ]), seed)
}
}

choose_value = function (Matrix[Double] prob_dist, Integer seed) return (Double val)
{
# for selecting output or states given probability distribution

# choosing using cumulative distribution
cum_prob = cumsum(prob_dist)

R = rand(rows=1, cols=1, min=0, max=1, seed=seed)

# adding 1 to colsum because indexing starts from 1
val = as.scalar(colSums(cum_prob <= R) + 1)
}
2 changes: 2 additions & 0 deletions src/main/java/org/apache/sysds/common/Builtins.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ public enum Builtins {
GNMF("gnmf", true),
GRID_SEARCH("gridSearch", true),
TOPK_CLEANING("topk_cleaning", true),
HMM("hmm", true),
HMMPREDICT("hmmPredict", true),
HOSPITAL_RESIDENCY_MATCH("hospitalResidencyMatch", true),
HYPERBAND("hyperband", true),
IFELSE("ifelse", false),
Expand Down
Loading