Source code for shorttext.generators.seq2seq.s2skeras


from typing import Literal, Self
from os import PathLike

import numpy as np
import numpy.typing as npt
import orjson
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from deprecation import deprecated

from ...utils.compactmodel_io import CompactIOMachine
from ...utils.classification_exceptions import ModelNotTrainedException

# Reference: https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html

kerasseq2seq_suffices = ['.weights.h5', '.json', '_s2s_hyperparam.json', '_encoder.weights.h5', '_encoder.json', '_decoder.h5', '_decoder.weights.json']



[docs]
class Seq2SeqWithKeras(CompactIOMachine):
    """Sequence-to-sequence (seq2seq) model using Keras.

    Implements encoder-decoder architecture for sequence generation tasks.

    Reference:
        Ilya Sutskever, James Martens, Geoffrey Hinton, "Generating Text with Recurrent Neural Networks,"
        ICML (2011). https://www.cs.utoronto.ca/~ilya/pubs/2011/LANG-RNN.pdf

        Ilya Sutskever, Oriol Vinyals, Quoc V. Le, "Sequence to Sequence Learning with Neural Networks,"
        arXiv:1409.3215 (2014). https://arxiv.org/abs/1409.3215

        Francois Chollet, "A ten-minute introduction to sequence-to-sequence learning in Keras,"
        The Keras Blog. https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html

        Aurelien Geron, Hands-On Machine Learning with Scikit-Learn and TensorFlow (Sebastopol, CA: O'Reilly Media, 2017).
    """

[docs]
    def __init__(self, vecsize: int, latent_dim: int):
        """Initialize the model.

        Args:
            vecsize: Vector size of the sequence.
            latent_dim: Latent dimension in the RNN cell.
        """
        super().__init__(
            {'classifier': 'kerasseq2seq'},
            'kerasseq2seq',
            kerasseq2seq_suffices
        )
        self.vecsize = vecsize
        self.latent_dim = latent_dim
        self.compiled = False
        self.trained = False



[docs]
    def prepare_model(self) -> None:
        """Prepare the Keras model."""
        # Define an input sequence and process it.
        encoder_inputs = Input(shape=(None, self.vecsize))
        encoder = LSTM(self.latent_dim, return_state=True)
        encoder_outputs, state_h, state_c = encoder(encoder_inputs)
        # We discard `encoder_outputs` and only keep the states.
        encoder_states = [state_h, state_c]

        # Set up the decoder, using `encoder_states` as initial state.
        decoder_inputs = Input(shape=(None, self.vecsize))
        # We set up our decoder to return full output sequences,
        # and to return internal states as well. We don't use the
        # return states in the training model, but we will use them in inference.
        decoder_lstm = LSTM(self.latent_dim, return_sequences=True, return_state=True)
        decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                             initial_state=encoder_states)
        decoder_dense = Dense(self.vecsize, activation='softmax')
        decoder_outputs = decoder_dense(decoder_outputs)

        # Define the model that will turn
        # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
        model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

        # Define sampling models
        encoder_model = Model(encoder_inputs, encoder_states)

        decoder_state_input_h = Input(shape=(self.latent_dim,))
        decoder_state_input_c = Input(shape=(self.latent_dim,))
        decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder_outputs, state_h, state_c = decoder_lstm(
            decoder_inputs, initial_state=decoder_states_inputs)
        decoder_states = [state_h, state_c]
        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                              [decoder_outputs] + decoder_states)

        self.model = model
        self.encoder_model = encoder_model
        self.decoder_model = decoder_model



[docs]
    def compile(
            self,
            optimizer: Literal["sgd", "rmsprop", "adagrad", "adadelta", "adam", "adamax", "nadam"] = 'rmsprop',
            loss: str = 'categorical_crossentropy'
    ) -> None:
        """Compile the Keras model.

        Args:
            optimizer: Optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. Default: rmsprop.
            loss: Loss function from tensorflow.keras. Default: 'categorical_crossentropy'.
        """
        self.model.compile(optimizer=optimizer, loss=loss)
        self.compiled = True



[docs]
    def fit(
            self,
            encoder_input: npt.NDArray[np.float64],
            decoder_input: npt.NDArray[np.float64],
            decoder_output: npt.NDArray[np.float64],
            batch_size: int = 64,
            epochs: int = 100
    ) -> None:
        """Fit the seq2seq model.

        Args:
            encoder_input: Encoder input, a rank-3 tensor.
            decoder_input: Decoder input, a rank-3 tensor.
            decoder_output: Decoder output, a rank-3 tensor.
            batch_size: Batch size. Default: 64.
            epochs: Number of epochs. Default: 100.
        """
        self.model.fit([encoder_input, decoder_input], decoder_output,
                       batch_size=batch_size,
                       epochs=epochs)
        self.trained = True



[docs]
    def savemodel(self, prefix: str, final: bool=False) -> None:
        """Save the trained model to files.

        For compact save, use save_compact_model instead.

        Args:
            prefix: Prefix of the file path.
            final: Whether the model is final (cannot be further trained). Default: False.

        Raises:
            ModelNotTrainedException: If no trained model exists.
        """
        if not self.trained:
            raise ModelNotTrainedException()

        # save hyperparameters
        open(prefix + '_s2s_hyperparam.json', 'wb').write(
            orjson.dumps({'vecsize': self.vecsize, 'latent_dim': self.latent_dim})
        )

        # save whole model
        if final:
            self.model.save_weights(prefix+'.weights.h5')
        else:
            self.model.save(prefix+'.weights.h5')
        open(prefix+'.json', 'w').write(self.model.to_json())

        # save encoder and decoder
        if final:
            self.encoder_model.save_weights(prefix+'_encoder.weights.h5')
            self.decoder_model.save_weights(prefix + '_decoder.weights.h5')
        else:
            self.encoder_model.save(prefix + '_encoder.weights.h5')
            self.decoder_model.save(prefix+'_decoder.weights.h5')
        open(prefix+'_encoder.json', 'w').write(self.encoder_model.to_json())
        open(prefix+'_decoder.json', 'w').write(self.decoder_model.to_json())



[docs]
    def loadmodel(self, prefix: str) -> None:
        """Load a trained model from files.

        For compact load, use load_compact_model instead.

        Args:
            prefix: Prefix of the file path.
        """
        hyperparameters = orjson.loads(open(prefix+'_s2s_hyperparam.json', 'rb').read())
        self.vecsize, self.latent_dim = hyperparameters['vecsize'], hyperparameters['latent_dim']
        self.model = load_model(prefix+'.weights.h5')
        self.encoder_model = load_model(prefix+'_encoder.weights.h5')
        self.decoder_model = load_model(prefix+'_decoder.weights.h5')
        self.trained = True



[docs]
    @classmethod
    def from_pretrained(
            cls,
            path: str | PathLike,
            compact: bool = True
    ) -> Self:
        """Load a trained Seq2SeqWithKeras model from file.

        Args:
            path: Path of the model file.
            compact: Whether to load a compact model. Default: True.

        Returns:
            Seq2SeqWithKeras instance for sequence-to-sequence inference.
        """
        generator = Seq2SeqWithKeras(0, 0)
        if compact:
            generator.load_compact_model(path)
        else:
            generator.loadmodel(path)
        generator.compiled = True
        return generator





[docs]
@deprecated(deprecated_in="4.0.1", removed_in="5.0.0")
def load_seq2seq_model(path: str | PathLike, compact: bool=True) -> Seq2SeqWithKeras:
    """Load a trained Seq2SeqWithKeras model from file.

    Args:
        path: Path of the model file.
        compact: Whether to load a compact model. Default: True.

    Returns:
        Seq2SeqWithKeras instance for sequence-to-sequence inference.
    """
    return Seq2SeqWithKeras.from_pretrained(path, compact=compact)




[docs]
@deprecated(deprecated_in="4.0.0", removed_in="4.1.0")
def loadSeq2SeqWithKeras(path: str | PathLike, compact: bool=True) -> Seq2SeqWithKeras:
    """
    Deprecated. Call load_seq2seq_model instead.
    """
    return Seq2SeqWithKeras.from_pretrained(path, compact=compact)