|
|
import tensorflow as tf |
|
|
from tensorflow.keras.layers import Input, Embedding, LayerNormalization, MultiHeadAttention, Dense, Add, Dropout, Layer |
|
|
from tensorflow.keras.models import Model |
|
|
from tensorflow.keras.optimizers import Adam |
|
|
from tensorflow.keras.losses import SparseCategoricalCrossentropy |
|
|
import numpy as np |
|
|
|
|
|
class VoidChatModel(tf.keras.Model): |
|
|
def __init__(self, vocab_size, seq_len, num_layers=6, num_heads=8, emb_dim=512, mlp_dim=2048, dropout_rate=0.1): |
|
|
super(VoidChatModel, self).__init__() |
|
|
self.vocab_size = vocab_size |
|
|
self.seq_len = seq_len |
|
|
self.num_layers = num_layers |
|
|
self.num_heads = num_heads |
|
|
self.emb_dim = emb_dim |
|
|
self.mlp_dim = mlp_dim |
|
|
self.dropout_rate = dropout_rate |
|
|
|
|
|
|
|
|
self.embedding = Embedding(input_dim=vocab_size, output_dim=emb_dim) |
|
|
|
|
|
|
|
|
self.transformer_blocks = [TransformerBlock(num_heads, emb_dim, mlp_dim, dropout_rate) for _ in range(num_layers)] |
|
|
|
|
|
|
|
|
self.output_layer = Dense(vocab_size, activation='softmax') |
|
|
|
|
|
def call(self, input_ids, training=False): |
|
|
|
|
|
x = self.embedding(input_ids) |