Source code for shorttext.generators.seq2seq.s2skeras


from typing import Literal, Self
from os import PathLike

import numpy as np
import numpy.typing as npt
import orjson
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from deprecation import deprecated

from ...utils.compactmodel_io import CompactIOMachine
from ...utils.classification_exceptions import ModelNotTrainedException

# Reference: https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html

kerasseq2seq_suffices = ['.weights.h5', '.json', '_s2s_hyperparam.json', '_encoder.weights.h5', '_encoder.json', '_decoder.h5', '_decoder.weights.json']


[docs] class Seq2SeqWithKeras(CompactIOMachine): """Sequence-to-sequence (seq2seq) model using Keras. Implements encoder-decoder architecture for sequence generation tasks. Reference: Ilya Sutskever, James Martens, Geoffrey Hinton, "Generating Text with Recurrent Neural Networks," ICML (2011). https://www.cs.utoronto.ca/~ilya/pubs/2011/LANG-RNN.pdf Ilya Sutskever, Oriol Vinyals, Quoc V. Le, "Sequence to Sequence Learning with Neural Networks," arXiv:1409.3215 (2014). https://arxiv.org/abs/1409.3215 Francois Chollet, "A ten-minute introduction to sequence-to-sequence learning in Keras," The Keras Blog. https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html Aurelien Geron, Hands-On Machine Learning with Scikit-Learn and TensorFlow (Sebastopol, CA: O'Reilly Media, 2017). """
[docs] def __init__(self, vecsize: int, latent_dim: int): """Initialize the model. Args: vecsize: Vector size of the sequence. latent_dim: Latent dimension in the RNN cell. """ super().__init__( {'classifier': 'kerasseq2seq'}, 'kerasseq2seq', kerasseq2seq_suffices ) self.vecsize = vecsize self.latent_dim = latent_dim self.compiled = False self.trained = False
[docs] def prepare_model(self) -> None: """Prepare the Keras model.""" # Define an input sequence and process it. encoder_inputs = Input(shape=(None, self.vecsize)) encoder = LSTM(self.latent_dim, return_state=True) encoder_outputs, state_h, state_c = encoder(encoder_inputs) # We discard `encoder_outputs` and only keep the states. encoder_states = [state_h, state_c] # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None, self.vecsize)) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder_lstm = LSTM(self.latent_dim, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(self.vecsize, activation='softmax') decoder_outputs = decoder_dense(decoder_outputs) # Define the model that will turn # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # Define sampling models encoder_model = Model(encoder_inputs, encoder_states) decoder_state_input_h = Input(shape=(self.latent_dim,)) decoder_state_input_c = Input(shape=(self.latent_dim,)) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) self.model = model self.encoder_model = encoder_model self.decoder_model = decoder_model
[docs] def compile( self, optimizer: Literal["sgd", "rmsprop", "adagrad", "adadelta", "adam", "adamax", "nadam"] = 'rmsprop', loss: str = 'categorical_crossentropy' ) -> None: """Compile the Keras model. Args: optimizer: Optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. Default: rmsprop. loss: Loss function from tensorflow.keras. Default: 'categorical_crossentropy'. """ self.model.compile(optimizer=optimizer, loss=loss) self.compiled = True
[docs] def fit( self, encoder_input: npt.NDArray[np.float64], decoder_input: npt.NDArray[np.float64], decoder_output: npt.NDArray[np.float64], batch_size: int = 64, epochs: int = 100 ) -> None: """Fit the seq2seq model. Args: encoder_input: Encoder input, a rank-3 tensor. decoder_input: Decoder input, a rank-3 tensor. decoder_output: Decoder output, a rank-3 tensor. batch_size: Batch size. Default: 64. epochs: Number of epochs. Default: 100. """ self.model.fit([encoder_input, decoder_input], decoder_output, batch_size=batch_size, epochs=epochs) self.trained = True
[docs] def savemodel(self, prefix: str, final: bool=False) -> None: """Save the trained model to files. For compact save, use save_compact_model instead. Args: prefix: Prefix of the file path. final: Whether the model is final (cannot be further trained). Default: False. Raises: ModelNotTrainedException: If no trained model exists. """ if not self.trained: raise ModelNotTrainedException() # save hyperparameters open(prefix + '_s2s_hyperparam.json', 'wb').write( orjson.dumps({'vecsize': self.vecsize, 'latent_dim': self.latent_dim}) ) # save whole model if final: self.model.save_weights(prefix+'.weights.h5') else: self.model.save(prefix+'.weights.h5') open(prefix+'.json', 'w').write(self.model.to_json()) # save encoder and decoder if final: self.encoder_model.save_weights(prefix+'_encoder.weights.h5') self.decoder_model.save_weights(prefix + '_decoder.weights.h5') else: self.encoder_model.save(prefix + '_encoder.weights.h5') self.decoder_model.save(prefix+'_decoder.weights.h5') open(prefix+'_encoder.json', 'w').write(self.encoder_model.to_json()) open(prefix+'_decoder.json', 'w').write(self.decoder_model.to_json())
[docs] def loadmodel(self, prefix: str) -> None: """Load a trained model from files. For compact load, use load_compact_model instead. Args: prefix: Prefix of the file path. """ hyperparameters = orjson.loads(open(prefix+'_s2s_hyperparam.json', 'rb').read()) self.vecsize, self.latent_dim = hyperparameters['vecsize'], hyperparameters['latent_dim'] self.model = load_model(prefix+'.weights.h5') self.encoder_model = load_model(prefix+'_encoder.weights.h5') self.decoder_model = load_model(prefix+'_decoder.weights.h5') self.trained = True
[docs] @classmethod def from_pretrained( cls, path: str | PathLike, compact: bool = True ) -> Self: """Load a trained Seq2SeqWithKeras model from file. Args: path: Path of the model file. compact: Whether to load a compact model. Default: True. Returns: Seq2SeqWithKeras instance for sequence-to-sequence inference. """ generator = Seq2SeqWithKeras(0, 0) if compact: generator.load_compact_model(path) else: generator.loadmodel(path) generator.compiled = True return generator
[docs] @deprecated(deprecated_in="4.0.1", removed_in="5.0.0") def load_seq2seq_model(path: str | PathLike, compact: bool=True) -> Seq2SeqWithKeras: """Load a trained Seq2SeqWithKeras model from file. Args: path: Path of the model file. compact: Whether to load a compact model. Default: True. Returns: Seq2SeqWithKeras instance for sequence-to-sequence inference. """ return Seq2SeqWithKeras.from_pretrained(path, compact=compact)
[docs] @deprecated(deprecated_in="4.0.0", removed_in="4.1.0") def loadSeq2SeqWithKeras(path: str | PathLike, compact: bool=True) -> Seq2SeqWithKeras: """ Deprecated. Call load_seq2seq_model instead. """ return Seq2SeqWithKeras.from_pretrained(path, compact=compact)