Multioutput prediction using LSTM encoder decoder with Attention

Question

Multioutput prediction using LSTM encoder decoder with Attention

Sukhmani Kaur Thethi

2022年3月30日 00:02

(I am working on Jupter notebook with python version 3.6.12, running Tensorflow 2.4.0 version.) I have a dataset that consists of 5 input features and 3 output features (that requires to be predicted). My features are string values of integers and looks like as follows:

Input (training) features:

        A      B      C      D       E
57    00101  01000  01001  01000   00110
203   00111  01001  01000  01000   00110
559   00010  01001  01001  01000   00110
247   00101  01001  01001  01000   00110
1111  00111  01001  01000  01000   00110
...     ...    ...    ...    ...     ...
167   10000  00101  01000  10000   00110
908   00100  01000  01001  01000   00111
166   00010  01001  01001  01000   00110
1106  01001  00101  01000  10000   00110
996   00111  01001  01000  01000   00110

[930 rows x 5 columns]

Output (training) features:

        O1     O2         O3
57    10000  00101      00100
203   10000  00100      00100
559   10000  00101      00011
247   10000  00110      00110
1111  10000  01000      00110
...     ...    ...        ...
167   10000  00110      00111
908   00011  00010      00001
166   10000  00101      00011
1106  00010  00011      00001
996   10000  00100      00101

[930 rows x 3 columns]

Then I converted my data into array to be used further in LSTM, which looks like follows (just my input data shown below for example):

[['00101' '01000' '01001' '01000' '00110']
 ['00111' '01001' '01000' '01000' '00110']
 ['00010' '01001' '01001' '01000' '00110']
 ...
 ['00010' '01001' '01001' '01000' '00110']
 ['01001' '00101' '01000' '10000' '00110']
 ['00111' '01001' '01000' '01000' '00110']]

My code looks like:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, LSTM
from tensorflow.keras.layers import Convolution2D, MaxPooling2D
from tensorflow.keras.layers import Input, LSTM, concatenate, Dense, Lambda
from tensorflow.keras.models import Model

#Then I reshaped my input:
x_tr_re = np.array([int(k) for s in x_tr.flatten() for k in s]).reshape(-1, 5, 5)
x_te_re = np.array([int(a) for c in x_te.flatten() for a in c]).reshape(-1, 5, 5)
print(x_te_re.shape,x_tr_re.shape, y_tr1.shape)
(405, 5, 5) (930, 5, 5) (930, 3)

#my model:
input_1 = Input(shape=(x_tr_re.shape[1],x_tr_re.shape[2]), name = 'input_1')
lstm1   = LSTM(50, name = 'lstm1')(input_1)
output1 = Dense(3, activation = softmax, name ='out1')(lstm1)
model   = Model(inputs=input_1, outputs=output1)
model.compile(optimizer = 'adam', loss = 'mean_squared_error',metrics = ['MAE'])
model.fit( x_te_re, y_te1, epochs = 1, batch_size = 10)

I get the following error:

---------------------------------------------------------------------------
UnimplementedError                        Traceback (most recent call last)
ipython-input-9-c7d71b522ac7 in module
      4 model   = Model(inputs=input_1, outputs=output1)
      5 model.compile(optimizer = 'adam', loss = 'mean_squared_error',metrics = ['MAE'])
---- 6 model.fit( x_te_re, y_te, epochs = 1, batch_size = 10)

~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1098                 _r=1):
   1099               callbacks.on_train_batch_begin(step)
- 1100               tmp_logs = self.train_function(iterator)
   1101               if data_handler.should_sync:
   1102                 context.async_wait()

~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
    826     tracing_count = self.experimental_get_tracing_count()
    827     with trace.Trace(self._name) as tm:
-- 828       result = self._call(*args, **kwds)
    829       compiler = xla if self._experimental_compile else nonXla
    830       new_tracing_count = self.experimental_get_tracing_count()

~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
    886         # Lifting succeeded, so variables are initialized and we can run the
    887         # stateless function.
-- 888         return self._stateless_fn(*args, **kwds)
    889     else:
    890       _, _, _, filtered_flat_args = \

~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
   2941        filtered_flat_args) = self._maybe_define_function(args, kwargs)
   2942     return graph_function._call_flat(
- 2943         filtered_flat_args, captured_inputs=graph_function.captured_inputs)  # pylint: disable=protected-access
   2944 
   2945   @property

~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1917       # No tape is watching; skip to running the function.
   1918       return self._build_call_outputs(self._inference_function.call(
- 1919           ctx, args, cancellation_manager=cancellation_manager))
   1920     forward_backward = self._select_forward_and_backward_functions(
   1921         args,

~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
    558               inputs=args,
    559               attrs=attrs,
-- 560               ctx=ctx)
    561         else:
    562           outputs = execute.execute_with_cancellation(

~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     58     ctx.ensure_initialized()
     59     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
--- 60                                         inputs, attrs, num_outputs)
     61   except core._NotOkStatusException as e:
     62     if name is not None:

UnimplementedError:  Cast string to float is not supported
     [[node mean_squared_error/Cast (defined at ipython-input-9-c7d71b522ac7:6) ]] [Op:__inference_train_function_6931]

Function call stack:
train_function

Help to rectify my code would be highly appreciated. Also, if anyone can help me add an ATTENTION layer to this model, I want to see which model performs better, with attention or without attention.

Topic attention-mechanism sequence-to-sequence lstm autoencoder python

Category Data Science

Dan · Accepted Answer · 2021年10月14日 14:12

I guess you should try one-hot encoding of your features (but as explained in comments, it's a wild guess w/o knowing what it represent)

import pandas as pd
values = [['10101', '01000', '01001', '01000', '00110'], ['00111', '01001', '01000', '01000', '00110'], ['00010', '01001', '01001', '01000', '00110']]
df = pd.DataFrame(values, columns=['A','B','C','D','E'])

for i in range(0, 5):
    df['A' + str(i+1)] = df['A'].str[i]
    df['B' + str(i+1)] = df['B'].str[i]
    df['C' + str(i+1)] = df['C'].str[i]
    df['D' + str(i+1)] = df['D'].str[i]
    df['E' + str(i+1)] = df['E'].str[i]

df = df.drop(['A', 'B', 'C', 'D', 'E'], axis=1)
df

And here the result:

idx A1  B1  C1  D1  E1  A2  B2  C2  D2  E2  ... A4  B4  C4  D4  E4  A5  B5  C5  D5  E5
0   1   0   0   0   0   0   1   1   1   0   ... 0   0   0   0   1   1   0   1   0   0
1   0   0   0   0   0   0   1   1   1   0   ... 1   0   0   0   1   1   1   0   0   0
2   0   0   0   0   0   0   1   1   1   0   ... 1   0   0   0   1   0   1   1   0   0

Finally, you'll have to adjust the shape of the inputs and do the same for the output.

Multioutput prediction using LSTM encoder decoder with Attention

About