DeepBrain1 / model.py
DeepBrain81's picture
Update model.py
8cbdb92 verified
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LayerNormalization, MultiHeadAttention, Dense, Add, Dropout, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
import numpy as np
class VoidChatModel(tf.keras.Model):
def __init__(self, vocab_size, seq_len, num_layers=6, num_heads=8, emb_dim=512, mlp_dim=2048, dropout_rate=0.1):
super(VoidChatModel, self).__init__()
self.vocab_size = vocab_size
self.seq_len = seq_len
self.num_layers = num_layers
self.num_heads = num_heads
self.emb_dim = emb_dim
self.mlp_dim = mlp_dim
self.dropout_rate = dropout_rate
# Embedding layer
self.embedding = Embedding(input_dim=vocab_size, output_dim=emb_dim)
# Transformer layers
self.transformer_blocks = [TransformerBlock(num_heads, emb_dim, mlp_dim, dropout_rate) for _ in range(num_layers)]
# Output layer
self.output_layer = Dense(vocab_size, activation='softmax')
def call(self, input_ids, training=False):
# Embedding layer
x = self.embedding(input_ids)