| | --- |
| | license: mit |
| | language: |
| | - en |
| | library_name: transformers |
| | tags: |
| | - 'vision ' |
| | - speech |
| | - image-text-text |
| | - audio-text-text |
| | - Multi-Modal |
| | --- |
| | |
| | # Creation Process |
| | <img src="https://cdn-avatars.huggingface.co/v1/production/uploads/65d883893a52cd9bcd8ab7cf/tRsCJlHNZo1D02kBTmfy9.jpeg" width="300"/> |
| |
|
| | Vmodel = VisionEncoderDecoderModel.from_encoder_decoder_pretrained( |
| | "google/vit-base-patch16-224-in21k", "LeroyDyer/Mixtral_AI_Tiny" |
| | ) |
| | _Encoder_ImageProcessor = Vmodel.encoder |
| | _Decoder_ImageTokenizer = Vmodel.decoder |
| | _VisionEncoderDecoderModel = Vmodel |
| | # Add Pad tokems |
| | LM_MODEL.VisionEncoderDecoder = _VisionEncoderDecoderModel |
| | # Add Sub Components |
| | LM_MODEL.Encoder_ImageProcessor = _Encoder_ImageProcessor |
| | LM_MODEL.Decoder_ImageTokenizer = _Decoder_ImageTokenizer |
| | LM_MODEL |
| | |
| | |
| | ``` |
| | |
| | # ADD AUDIO |
| | |
| | ```python |
| | |
| | |
| | |
| | print('Add Audio...') |
| | #Add Head |
| | # Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model |
| | _AudioFeatureExtractor = AutoFeatureExtractor.from_pretrained("openai/whisper-small") |
| | _AudioTokenizer = AutoTokenizer.from_pretrained("openai/whisper-small") |
| | _SpeechEncoderDecoder = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained("openai/whisper-small","openai/whisper-small") |
| | |
| | # Add Pad tokems |
| | _SpeechEncoderDecoder.config.decoder_start_token_id = _AudioTokenizer.cls_token_id |
| | _SpeechEncoderDecoder.config.pad_token_id = _AudioTokenizer.pad_token_id |
| | LM_MODEL.SpeechEncoderDecoder = _SpeechEncoderDecoder |
| | # Add Sub Components |
| | LM_MODEL.Decoder_AudioTokenizer = _AudioTokenizer |
| | LM_MODEL.Encoder_AudioFeatureExtractor = _AudioFeatureExtractor |
| | LM_MODEL |
| | |
| | ``` |
| | |
| | # SAVE |
| | ```python |
| | print('Final stages:...') |
| | print('Add tokenizer...') |
| | LM_MODEL.resize_token_embeddings(len(tokenizer)) |
| | LM_MODEL.tokenizer = tokenizer |
| | print('Save model...') |
| | LM_MODEL.to(torch.float16) |
| | LM_MODEL.save_pretrained("Mixtral_AI_MiniModalTron") |
| | print('Save tokenizer...') |
| | tokenizer.save_pretrained("Mixtral_AI_MiniModalTron") |
| | |
| | ``` |