Skip to content

RMTPP documentation

this neural network model is based on the paper "Recurrent Marked Temporal Point Processes: Embedding Event History to Vector" by Du, et al. In particular, the implementation is a modified version from the repository https://github.com/woshiyyya/ERPP-RMTPP.git.

Net

Bases: Module

Source code in server/RMTPP_torch/model.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
class Net(nn.Module):
    def __init__(self, config, lossweight ):
        super(Net, self).__init__()
        self.config = config 
        self.n_class = config.number_classes
        self.embedding = nn.Embedding(num_embeddings=config.number_classes, embedding_dim=config.emb_dim)
        self.emb_drop = nn.Dropout(p=config.dropout)
        #droputs transform some random entries into zeros. Its used for regularization
        self.lstm = nn.LSTM(input_size=config.emb_dim + 1,         #we add one to the input because we merge the embedding vector with the time input (float)
                            hidden_size=config.hid_dim,
                            batch_first=True,
                            bidirectional=False)  
        self.mlp =   nn.Linear(in_features=config.hid_dim, out_features=config.mlp_dim) 
        self.mlp_drop = nn.Dropout(p=config.dropout)

        self.event_linear = nn.Linear(in_features=config.mlp_dim, out_features=config.number_classes) #here we generate the output logits for the label
        self.time_linear = nn.Linear(in_features=config.mlp_dim, out_features=1) #here we calc the time prediction
        self.set_criterion(lossweight) 
        self.lossweight = lossweight
        self.optimizer = Adam(self.parameters(), lr=self.config.lr) #the author uses BertAdam, but we will just use Adam 

    def set_optimizer(self, total_step):
        self.optimizer = Adam(self.parameters(), lr=self.config.lr)


    def set_criterion(self, weight):
        #: cross entropy for the markers/label logits
        self.event_criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(weight))

        if self.config.cuda: 
            self.intensity_w = nn.Parameter(torch.tensor(0.1, dtype=torch.float, device='cuda'))
            self.intensity_b = nn.Parameter(torch.tensor(0.1, dtype=torch.float, device='cuda'))
            self.time_criterion = self.RMTPPLoss
        else: 
            self.intensity_w = nn.Parameter(torch.tensor(0.1, dtype=torch.float, device= 'cpu'))
            self.intensity_b = nn.Parameter(torch.tensor(0.1, dtype=torch.float, device = 'cpu'))
            self.time_criterion = self.RMTPPLoss


    def RMTPPLoss(self, pred, gold):
        """
        calculate the loss for the time. 
        """
        loss = torch.mean(pred + self.intensity_w * gold + self.intensity_b +
                          (torch.exp(pred + self.intensity_b) -
                           torch.exp(pred + self.intensity_w * gold + self.intensity_b)) / self.intensity_w) #: loss function for the time prediction (see paper)
        return -1 * loss



    def forward(self, input_time, input_events):
        event_embedding = self.embedding(input_events)
        event_embedding = self.emb_drop(event_embedding)

        # merge the embed vector with the time input (extra row)
        lstm_input = torch.cat((event_embedding, input_time.unsqueeze(-1)), dim=-1)
        hidden_state, _ = self.lstm(lstm_input) 

        # hidden_state = torch.cat((hidden_state, input_time.unsqueeze(-1)), dim=-1) THIS WAS COMMENTED FROM BEFORE
        mlp_output = torch.tanh(self.mlp(hidden_state[:, -1, :]))  #multi layer perceptorn output
        mlp_output = self.mlp_drop(mlp_output) 
        #Here we are basically passing the output through TWO DIFFERENT LAYERS separately.
        event_logits = self.event_linear(mlp_output)  # the output is separated and passed to a specific activation function
        time_logits = self.time_linear(mlp_output)  # the output is separated and passed to a specific activation function
        return time_logits, event_logits  # get the predictions 

    def dispatch(self, tensors):
        if self.config.cuda:
            for i in range(len(tensors)):

                tensors[i] = tensors[i].cuda().contiguous()
        else:
            for i in range(len(tensors)):
                tensors[i] = tensors[i].contiguous()
        return tensors


    def train_batch(self, batch):
        time_tensor, event_tensor = batch

        #here we make sure to REMOVE THE LABEL from the training input. that is why we do "slicing"
        time_input, time_target = self.dispatch([time_tensor[:, :-1], time_tensor[:, -1]])
        event_input, event_target = self.dispatch([event_tensor[:, :-1], event_tensor[:, -1]])
        time_logits, event_logits = self.forward(time_input, event_input)
        #calc loss
        loss1 = self.time_criterion(time_logits.view(-1), time_target.view(-1))
        loss2 = self.event_criterion(event_logits.view(-1, self.n_class), event_target.view(-1))
        loss = self.config.alpha * loss1 + loss2  #total loss formula 
        loss.backward() #backpropagation trhough time.

        self.optimizer.step()
        self.optimizer.zero_grad() #reset grads
        return loss1.item(), loss2.item(), loss.item()


    def predict(self, batch, pm_active = False):
        """
        Make a prediction.

        Args:
            batch (tuple): A batch containing one or more inputs for doing predictions.
            pm_active (bool): If True, returns only the most likely prediction.

        Returns:
            tuple or int: If pm_active is True, returns a tuple containing the index (encoded marker) of the event that has the highest probability, the maximum probability, and the last time prediction. 
                  If pm_active is False, returns two lists. The first list contains the timestamps of the predictions, and the second list contains the index (encoded marker) of the event that has the highest probability.
        """
        time_tensor, event_tensor = batch
        #make sure to cut out the last event/timestamp from each sequence: 
        time_input, time_target = self.dispatch([time_tensor[:, :-1], time_tensor[:, -1]])
        event_input, event_target = self.dispatch([event_tensor[:, :-1], event_tensor[:, -1]])
        time_logits, event_logits = self.forward(time_input, event_input)
        event_pred=  event_logits.detach().cpu().numpy()
        event_pred = np.argmax(event_pred, axis=-1) #for each label find the index that maximizes the pred.
        if pm_active: 
            #: in case we just need the most likely prediction
            lst = event_logits.detach().cpu().numpy().tolist()
            last = softmax(lst[-1]).tolist() #get event logits from last run
            max_prob = max(last)
            event_index = last.index(max_prob) #compute argmax 
            time_pred = time_logits.detach().cpu().numpy().tolist() #: get last time prdiction
            return event_index, max_prob,time_pred[-1][-1]


        time_pred = time_logits.detach().cpu().numpy()
        return time_pred, event_pred


    def predict_sorted(self, batch): 
        """
        Make a prediction.

        Args:
            batch (tuple): A batch containing one or more inputs for doing predictions.

        Returns:
            tuple: A tuple containing two lists. The first list contains the timestamps of the predictions, and the second list contains tuples of the form `(probability, event_index)`. This second list is sorted in descending order, with the event with the highest probability at the beginning. The `event_index` corresponds to the encoding of a marker.
        """
        time_tensor, event_tensor = batch
        #make sure to cut out the last event/timestamp from each sequence: 
        time_input, time_target = self.dispatch([time_tensor[:, :-1], time_tensor[:, -1]])
        event_input, event_target = self.dispatch([event_tensor[:, :-1], event_tensor[:, -1]])
        time_logits, event_logits = self.forward(time_input, event_input)

        event_pred = event_logits.detach().cpu().numpy().tolist()
        event_pred_with_indices= []
        for index,  logit_list in enumerate(event_pred): 
            index_list= []
            for event_index, prediction in enumerate(softmax(logit_list)):
                index_list.append((prediction, event_index))
            index_list.sort(reverse=True)
            event_pred_with_indices.append(index_list)
        time_pred = time_logits.detach().cpu().tolist()
        return time_pred, event_pred_with_indices

RMTPPLoss(pred, gold)

calculate the loss for the time.

Source code in server/RMTPP_torch/model.py
52
53
54
55
56
57
58
59
def RMTPPLoss(self, pred, gold):
    """
    calculate the loss for the time. 
    """
    loss = torch.mean(pred + self.intensity_w * gold + self.intensity_b +
                      (torch.exp(pred + self.intensity_b) -
                       torch.exp(pred + self.intensity_w * gold + self.intensity_b)) / self.intensity_w) #: loss function for the time prediction (see paper)
    return -1 * loss

predict(batch, pm_active=False)

Make a prediction.

Parameters:

Name Type Description Default
batch tuple

A batch containing one or more inputs for doing predictions.

required
pm_active bool

If True, returns only the most likely prediction.

False

Returns:

Type Description

tuple or int: If pm_active is True, returns a tuple containing the index (encoded marker) of the event that has the highest probability, the maximum probability, and the last time prediction. If pm_active is False, returns two lists. The first list contains the timestamps of the predictions, and the second list contains the index (encoded marker) of the event that has the highest probability.

Source code in server/RMTPP_torch/model.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def predict(self, batch, pm_active = False):
    """
    Make a prediction.

    Args:
        batch (tuple): A batch containing one or more inputs for doing predictions.
        pm_active (bool): If True, returns only the most likely prediction.

    Returns:
        tuple or int: If pm_active is True, returns a tuple containing the index (encoded marker) of the event that has the highest probability, the maximum probability, and the last time prediction. 
              If pm_active is False, returns two lists. The first list contains the timestamps of the predictions, and the second list contains the index (encoded marker) of the event that has the highest probability.
    """
    time_tensor, event_tensor = batch
    #make sure to cut out the last event/timestamp from each sequence: 
    time_input, time_target = self.dispatch([time_tensor[:, :-1], time_tensor[:, -1]])
    event_input, event_target = self.dispatch([event_tensor[:, :-1], event_tensor[:, -1]])
    time_logits, event_logits = self.forward(time_input, event_input)
    event_pred=  event_logits.detach().cpu().numpy()
    event_pred = np.argmax(event_pred, axis=-1) #for each label find the index that maximizes the pred.
    if pm_active: 
        #: in case we just need the most likely prediction
        lst = event_logits.detach().cpu().numpy().tolist()
        last = softmax(lst[-1]).tolist() #get event logits from last run
        max_prob = max(last)
        event_index = last.index(max_prob) #compute argmax 
        time_pred = time_logits.detach().cpu().numpy().tolist() #: get last time prdiction
        return event_index, max_prob,time_pred[-1][-1]


    time_pred = time_logits.detach().cpu().numpy()
    return time_pred, event_pred

predict_sorted(batch)

Make a prediction.

Parameters:

Name Type Description Default
batch tuple

A batch containing one or more inputs for doing predictions.

required

Returns:

Name Type Description
tuple

A tuple containing two lists. The first list contains the timestamps of the predictions, and the second list contains tuples of the form (probability, event_index). This second list is sorted in descending order, with the event with the highest probability at the beginning. The event_index corresponds to the encoding of a marker.

Source code in server/RMTPP_torch/model.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def predict_sorted(self, batch): 
    """
    Make a prediction.

    Args:
        batch (tuple): A batch containing one or more inputs for doing predictions.

    Returns:
        tuple: A tuple containing two lists. The first list contains the timestamps of the predictions, and the second list contains tuples of the form `(probability, event_index)`. This second list is sorted in descending order, with the event with the highest probability at the beginning. The `event_index` corresponds to the encoding of a marker.
    """
    time_tensor, event_tensor = batch
    #make sure to cut out the last event/timestamp from each sequence: 
    time_input, time_target = self.dispatch([time_tensor[:, :-1], time_tensor[:, -1]])
    event_input, event_target = self.dispatch([event_tensor[:, :-1], event_tensor[:, -1]])
    time_logits, event_logits = self.forward(time_input, event_input)

    event_pred = event_logits.detach().cpu().numpy().tolist()
    event_pred_with_indices= []
    for index,  logit_list in enumerate(event_pred): 
        index_list= []
        for event_index, prediction in enumerate(softmax(logit_list)):
            index_list.append((prediction, event_index))
        index_list.sort(reverse=True)
        event_pred_with_indices.append(index_list)
    time_pred = time_logits.detach().cpu().tolist()
    return time_pred, event_pred_with_indices

this module computes the input for the RNN. It is assumed that the input event log is in the right format, i.e. rows are sorted by case id and timestamp, and the columns are encoded properly.

It computes time differences and uses a sliding window.

ATMDataset

helper class for the neural network that is in charge of doing the sliding window algorithm over the sequences and get the time differences in the timestamp column of the data.

it can be seen as a wrapper that does some further preprocessing that is very especific to the NN.

Source code in server/RMTPP_torch/util.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
class ATMDataset:
    """
    helper class for the neural network that
    is in charge of doing the sliding window algorithm
    over the sequences and get the time differences
    in the timestamp column of the data. 

    it can be seen as a wrapper that does some further 
    preprocessing that is very especific to the NN.        
    """
    def __init__(self, config, data, case_id, timestamp_key,event_key,in_recursive_call=False,  *args):
        self.id = list(data[case_id])
        self.time = list(data[timestamp_key] )
        self.event = list(data[event_key])

        self.config = config
        self.seq_len = config.seq_len
        self.in_recursive_call = in_recursive_call #variable used for multiple predictions
        if  not self.in_recursive_call:
            self.time_seqs, self.event_seqs = self.generate_sequence()
            self.statistic()
        else: 
            self.time_seqs, self.event_seqs = self.sliding_window()
            self.first_time_stamp = self.time[0]



    def sliding_window(self):
        event_windows = []
        time_windows = []
        event_window = deque(self.event[:self.seq_len])
        time_window = deque(self.time[:self.seq_len])
        event_windows.append(list(event_window))
        time_windows.append(list(time_window))
        end_idx = self.seq_len
        for i in range(end_idx, len(self.time)):
            event_window.popleft()
            event_window.append(self.event[end_idx])
            time_window.popleft()
            time_window.append(self.time[end_idx])
            time_windows.append(list(time_window))
            event_windows.append(list(event_window))
        return time_windows, event_windows

    def generate_sequence(self):
        """
        use the sliding window algorithm so that the sequences
        fit in the NN (this way we fit the proper tensor dimension) .
        """
        MAX_INTERVAL_VARIANCE = 1
        pbar = tqdm(total=len(self.id) - self.seq_len + 1) #tqdm is the progress bar
        time_seqs = []
        event_seqs = []
        cur_end = self.seq_len - 1
        #: this is a sliding window algorithm to cut each input sequence into sub sequences of the same length
        while cur_end < len(self.id):
            pbar.update(1)
            cur_start = cur_end - self.seq_len + 1
            if self.id[cur_start] != self.id[cur_end]:
                cur_end += 1
                continue

            subseq = self.time[cur_start:cur_end + 1]
            #print(subseq)
            # if max(subseq) - min(subseq) > MAX_INTERVAL_VARIANCE:
            #     if self.subset == "train":
            #         cur_end += 1
            #         continue
            time_seqs.append(subseq)
            event_seqs.append(self.event[cur_start:cur_end + 1])
            cur_end += 1
        return time_seqs, event_seqs

    def __getitem__(self, item):
        return self.time_seqs[item], self.event_seqs[item]

    def __len__(self):
        return len(self.time_seqs)

    @staticmethod
    def to_features(batch):
        """
        Returns:
            Two tensors: one containing the time differences between adjacent time stamps
            and the other one containing the events.
        """
        times, events = [], []
        for time, event in batch:
            time = np.array([time[0]] + time)

            time = np.diff(time)
            times.append(time)
            events.append(event)

        #return torch.FloatTensor(times), torch.LongTensor(events)
        return torch.FloatTensor(np.asarray(times)), torch.LongTensor(np.asarray(events))

    def statistic(self):
        print("TOTAL SEQs:", len(self.time_seqs))
        # intervals = np.diff(np.array(self.time))
        # for thr in [0.001, 0.01, 0.1, 1, 10, 100]:
        #     print(f"<{thr} = {np.mean(intervals < thr)}")

    def importance_weight(self, count):
        """
        used for CrossEntropyLoss 
        """
        weight = [len(self.event) / count[k] for k in sorted(count.keys())]
        return weight

generate_sequence()

use the sliding window algorithm so that the sequences fit in the NN (this way we fit the proper tensor dimension) .

Source code in server/RMTPP_torch/util.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def generate_sequence(self):
    """
    use the sliding window algorithm so that the sequences
    fit in the NN (this way we fit the proper tensor dimension) .
    """
    MAX_INTERVAL_VARIANCE = 1
    pbar = tqdm(total=len(self.id) - self.seq_len + 1) #tqdm is the progress bar
    time_seqs = []
    event_seqs = []
    cur_end = self.seq_len - 1
    #: this is a sliding window algorithm to cut each input sequence into sub sequences of the same length
    while cur_end < len(self.id):
        pbar.update(1)
        cur_start = cur_end - self.seq_len + 1
        if self.id[cur_start] != self.id[cur_end]:
            cur_end += 1
            continue

        subseq = self.time[cur_start:cur_end + 1]
        #print(subseq)
        # if max(subseq) - min(subseq) > MAX_INTERVAL_VARIANCE:
        #     if self.subset == "train":
        #         cur_end += 1
        #         continue
        time_seqs.append(subseq)
        event_seqs.append(self.event[cur_start:cur_end + 1])
        cur_end += 1
    return time_seqs, event_seqs

importance_weight(count)

used for CrossEntropyLoss

Source code in server/RMTPP_torch/util.py
125
126
127
128
129
130
def importance_weight(self, count):
    """
    used for CrossEntropyLoss 
    """
    weight = [len(self.event) / count[k] for k in sorted(count.keys())]
    return weight

to_features(batch) staticmethod

Returns:

Type Description

Two tensors: one containing the time differences between adjacent time stamps

and the other one containing the events.

Source code in server/RMTPP_torch/util.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
@staticmethod
def to_features(batch):
    """
    Returns:
        Two tensors: one containing the time differences between adjacent time stamps
        and the other one containing the events.
    """
    times, events = [], []
    for time, event in batch:
        time = np.array([time[0]] + time)

        time = np.diff(time)
        times.append(time)
        events.append(event)

    #return torch.FloatTensor(times), torch.LongTensor(events)
    return torch.FloatTensor(np.asarray(times)), torch.LongTensor(np.asarray(events))

clf_metric(pred, gold, n_class)

compute test metrics. :return: - recall - precision - f1 score

Source code in server/RMTPP_torch/util.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def clf_metric(pred, gold, n_class):
    """
    compute test metrics.
    :return:
    - recall 
    - precision
    - f1 score
    """
    gold_count = Counter(gold)
    pred_count = Counter(pred)
    prec = recall = 0
    pcnt = rcnt = 0
    for i in range(n_class):
        #print(np.logical_and(pred == gold, pred == i))
        match_count = np.logical_and(pred == gold, pred == i).sum()
        if gold_count[i] != 0:
            prec += match_count / gold_count[i]
            pcnt += 1
        if pred_count[i] != 0:
            recall += match_count / pred_count[i]
            rcnt += 1
    prec /= pcnt
    recall /= rcnt
    print(f"pcnt={pcnt}, rcnt={rcnt}")
    print("__"*15)
    print(prec)
    print(recall)
    print(type(prec))
    print(type(recall))
    f1 = 2 * prec * recall / (prec + recall)
    return prec, recall, f1