.. raw:: html

.. code:: python from mxnet import np, npx from mxnet.gluon import nn, rnn from d2l import mxnet as d2l npx.set_np() batch_size, num_steps = 32, 35 train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps) .. raw:: html

.. raw:: html

.. code:: python import torch from torch import nn from torch.nn import functional as F from d2l import torch as d2l batch_size, num_steps = 32, 35 train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps) .. raw:: html

.. raw:: html

.. code:: python import tensorflow as tf from d2l import tensorflow as d2l batch_size, num_steps = 32, 35 train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps) .. raw:: html

.. raw:: html

mxnet pytorch tensorflow

.. raw:: html

.. code:: python num_hiddens = 256 rnn_layer = rnn.RNN(num_hiddens) rnn_layer.initialize() Khởi tạo trạng thái ẩn là đơn giản. Chúng tôi gọi hàm thành viên ``begin_state``. Điều này trả về một danh sách (``state``) chứa một trạng thái ẩn ban đầu cho mỗi ví dụ trong minibatch, có hình dạng là (số lớp ẩn, kích thước lô, số đơn vị ẩn). Đối với một số mô hình được giới thiệu sau (ví dụ: bộ nhớ ngắn hạn dài), một danh sách như vậy cũng chứa các thông tin khác. .. code:: python state = rnn_layer.begin_state(batch_size=batch_size) len(state), state[0].shape .. parsed-literal:: :class: output (1, (1, 32, 256)) .. raw:: html

.. raw:: html

.. code:: python num_hiddens = 256 rnn_layer = nn.RNN(len(vocab), num_hiddens) Chúng tôi sử dụng một tensor để khởi tạo trạng thái ẩn, có hình dạng là (số lớp ẩn, kích thước lô, số đơn vị ẩn). .. code:: python state = torch.zeros((1, batch_size, num_hiddens)) state.shape .. parsed-literal:: :class: output torch.Size([1, 32, 256]) .. raw:: html

.. raw:: html

.. code:: python num_hiddens = 256 rnn_cell = tf.keras.layers.SimpleRNNCell(num_hiddens, kernel_initializer='glorot_uniform') rnn_layer = tf.keras.layers.RNN(rnn_cell, time_major=True, return_sequences=True, return_state=True) state = rnn_cell.get_initial_state(batch_size=batch_size, dtype=tf.float32) state.shape .. parsed-literal:: :class: output TensorShape([32, 256]) .. raw:: html

.. raw:: html

mxnet pytorch tensorflow

.. raw:: html

Bên cạnh đó, trạng thái ẩn được cập nhật (``state_new``) trả về bởi ``rnn_layer`` đề cập đến trạng thái ẩn ở bước thời gian *last* của minibatch. Nó có thể được sử dụng để khởi tạo trạng thái ẩn cho minibatch tiếp theo trong một kỷ nguyên trong phân vùng tuần tự. Đối với nhiều lớp ẩn, trạng thái ẩn của mỗi lớp sẽ được lưu trữ trong biến này (``state_new``). Đối với một số mô hình được giới thiệu sau (ví dụ: bộ nhớ ngắn hạn dài), biến này cũng chứa các thông tin khác. .. code:: python X = np.random.uniform(size=(num_steps, batch_size, len(vocab))) Y, state_new = rnn_layer(X, state) Y.shape, len(state_new), state_new[0].shape .. parsed-literal:: :class: output ((35, 32, 256), 1, (1, 32, 256)) .. raw:: html

.. raw:: html

.. code:: python X = torch.rand(size=(num_steps, batch_size, len(vocab))) Y, state_new = rnn_layer(X, state) Y.shape, state_new.shape .. parsed-literal:: :class: output (torch.Size([35, 32, 256]), torch.Size([1, 32, 256])) .. raw:: html

.. raw:: html

.. code:: python X = tf.random.uniform((num_steps, batch_size, len(vocab))) Y, state_new = rnn_layer(X, state) Y.shape, len(state_new), state_new[0].shape .. parsed-literal:: :class: output (TensorShape([35, 32, 256]), 32, TensorShape([256])) .. raw:: html

.. raw:: html

mxnet pytorch tensorflow

.. raw:: html

.. code:: python #@save class RNNModel(nn.Block): """The RNN model.""" def __init__(self, rnn_layer, vocab_size, **kwargs): super(RNNModel, self).__init__(**kwargs) self.rnn = rnn_layer self.vocab_size = vocab_size self.dense = nn.Dense(vocab_size) def forward(self, inputs, state): X = npx.one_hot(inputs.T, self.vocab_size) Y, state = self.rnn(X, state) # The fully-connected layer will first change the shape of `Y` to # (`num_steps` * `batch_size`, `num_hiddens`). Its output shape is # (`num_steps` * `batch_size`, `vocab_size`). output = self.dense(Y.reshape(-1, Y.shape[-1])) return output, state def begin_state(self, *args, **kwargs): return self.rnn.begin_state(*args, **kwargs) .. raw:: html

.. raw:: html

.. code:: python #@save class RNNModel(nn.Module): """The RNN model.""" def __init__(self, rnn_layer, vocab_size, **kwargs): super(RNNModel, self).__init__(**kwargs) self.rnn = rnn_layer self.vocab_size = vocab_size self.num_hiddens = self.rnn.hidden_size # If the RNN is bidirectional (to be introduced later), # `num_directions` should be 2, else it should be 1. if not self.rnn.bidirectional: self.num_directions = 1 self.linear = nn.Linear(self.num_hiddens, self.vocab_size) else: self.num_directions = 2 self.linear = nn.Linear(self.num_hiddens * 2, self.vocab_size) def forward(self, inputs, state): X = F.one_hot(inputs.T.long(), self.vocab_size) X = X.to(torch.float32) Y, state = self.rnn(X, state) # The fully connected layer will first change the shape of `Y` to # (`num_steps` * `batch_size`, `num_hiddens`). Its output shape is # (`num_steps` * `batch_size`, `vocab_size`). output = self.linear(Y.reshape((-1, Y.shape[-1]))) return output, state def begin_state(self, device, batch_size=1): if not isinstance(self.rnn, nn.LSTM): # `nn.GRU` takes a tensor as hidden state return torch.zeros((self.num_directions * self.rnn.num_layers, batch_size, self.num_hiddens), device=device) else: # `nn.LSTM` takes a tuple of hidden states return (torch.zeros(( self.num_directions * self.rnn.num_layers, batch_size, self.num_hiddens), device=device), torch.zeros(( self.num_directions * self.rnn.num_layers, batch_size, self.num_hiddens), device=device)) .. raw:: html

.. raw:: html

.. code:: python #@save class RNNModel(tf.keras.layers.Layer): def __init__(self, rnn_layer, vocab_size, **kwargs): super(RNNModel, self).__init__(**kwargs) self.rnn = rnn_layer self.vocab_size = vocab_size self.dense = tf.keras.layers.Dense(vocab_size) def call(self, inputs, state): X = tf.one_hot(tf.transpose(inputs), self.vocab_size) # Later RNN like `tf.keras.layers.LSTMCell` return more than two values Y, *state = self.rnn(X, state) output = self.dense(tf.reshape(Y, (-1, Y.shape[-1]))) return output, state def begin_state(self, *args, **kwargs): return self.rnn.cell.get_initial_state(*args, **kwargs) .. raw:: html

.. raw:: html

mxnet pytorch tensorflow

.. raw:: html

.. code:: python device = d2l.try_gpu() net = RNNModel(rnn_layer, len(vocab)) net.initialize(force_reinit=True, ctx=device) d2l.predict_ch8('time traveller', 10, net, vocab, device) .. parsed-literal:: :class: output 'time travellervmoopwrrrr' .. raw:: html

.. raw:: html

.. code:: python device = d2l.try_gpu() net = RNNModel(rnn_layer, vocab_size=len(vocab)) net = net.to(device) d2l.predict_ch8('time traveller', 10, net, vocab, device) .. parsed-literal:: :class: output 'time travellerxxxxxxxxxx' .. raw:: html

.. raw:: html

.. code:: python device_name = d2l.try_gpu()._device_name strategy = tf.distribute.OneDeviceStrategy(device_name) with strategy.scope(): net = RNNModel(rnn_layer, vocab_size=len(vocab)) d2l.predict_ch8('time traveller', 10, net, vocab) .. parsed-literal:: :class: output 'time travellerjlclggqtfr' .. raw:: html

.. raw:: html

mxnet pytorch tensorflow

.. raw:: html

.. code:: python num_epochs, lr = 500, 1 d2l.train_ch8(net, train_iter, vocab, lr, num_epochs, device) .. parsed-literal:: :class: output perplexity 1.2, 131898.2 tokens/sec on gpu(0) time traveller held the time dimension with a uniformvelocity fr traveller for so it well they couldmmst an the mather the p .. figure:: output_rnn-concise_eff2f4_68_1.svg .. raw:: html

.. raw:: html

.. code:: python num_epochs, lr = 500, 1 d2l.train_ch8(net, train_iter, vocab, lr, num_epochs, device) .. parsed-literal:: :class: output perplexity 1.3, 286577.9 tokens/sec on cuda:0 time traveller proceeded anyong the eread must uply helo bree th travelleryou can show black is white by argumed thave back .. figure:: output_rnn-concise_eff2f4_71_1.svg .. raw:: html

.. raw:: html

.. code:: python num_epochs, lr = 500, 1 d2l.train_ch8(net, train_iter, vocab, lr, num_epochs, strategy) .. parsed-literal:: :class: output perplexity 1.3, 15412.8 tokens/sec on /GPU:0 time traveller after the pauserequired for the proper assimilati traveller of co urealy abmeding wion anof thed than ansmith .. figure:: output_rnn-concise_eff2f4_74_1.svg .. raw:: html

.. raw:: html