evalstate
/

dfine-cppe5-sample-demo

Model card Files Files and versions

dfine-cppe5-sample-demo / config.json

evalstate's picture

evalstate HF Staff

Upload DFineForObjectDetection

d427f7b verified 10 days ago

history blame contribute delete

3.55 kB

	{
	"activation_dropout": 0.0,
	"activation_function": "silu",
	"anchor_image_size": null,
	"architectures": [
	"DFineForObjectDetection"
	],
	"attention_dropout": 0.0,
	"auxiliary_loss": true,
	"backbone": null,
	"backbone_config": {
	"depths": [
	3,
	4,
	6,
	3
	],
	"downsample_in_bottleneck": false,
	"downsample_in_first_stage": false,
	"dtype": "float32",
	"embedding_size": 32,
	"hidden_act": "relu",
	"hidden_sizes": [
	128,
	256,
	512,
	1024
	],
	"initializer_range": 0.02,
	"layer_type": "basic",
	"model_type": "hgnet_v2",
	"num_channels": 3,
	"out_features": [
	"stage2",
	"stage3",
	"stage4"
	],
	"out_indices": [
	2,
	3,
	4
	],
	"stage_downsample": [
	false,
	true,
	true,
	true
	],
	"stage_in_channels": [
	16,
	64,
	256,
	512
	],
	"stage_kernel_size": [
	3,
	3,
	5,
	5
	],
	"stage_light_block": [
	false,
	false,
	true,
	true
	],
	"stage_mid_channels": [
	16,
	32,
	64,
	128
	],
	"stage_names": [
	"stem",
	"stage1",
	"stage2",
	"stage3",
	"stage4"
	],
	"stage_num_blocks": [
	1,
	1,
	2,
	1
	],
	"stage_numb_of_layers": [
	3,
	3,
	3,
	3
	],
	"stage_out_channels": [
	64,
	256,
	512,
	1024
	],
	"stem_channels": [
	3,
	16,
	16
	],
	"use_learnable_affine_block": true
	},
	"batch_norm_eps": 1e-05,
	"box_noise_scale": 1.0,
	"d_model": 256,
	"decoder_activation_function": "relu",
	"decoder_attention_heads": 8,
	"decoder_ffn_dim": 1024,
	"decoder_in_channels": [
	256,
	256,
	256
	],
	"decoder_layers": 3,
	"decoder_method": "default",
	"decoder_n_points": [
	3,
	6,
	3
	],
	"decoder_offset_scale": 0.5,
	"depth_mult": 0.34,
	"dropout": 0.0,
	"dtype": "float32",
	"encode_proj_layers": [
	2
	],
	"encoder_activation_function": "gelu",
	"encoder_attention_heads": 8,
	"encoder_ffn_dim": 1024,
	"encoder_hidden_dim": 256,
	"encoder_in_channels": [
	256,
	512,
	1024
	],
	"encoder_layers": 1,
	"eos_coefficient": 0.0001,
	"eval_idx": -1,
	"eval_size": null,
	"feat_strides": [
	8,
	16,
	32
	],
	"focal_loss_alpha": 0.75,
	"focal_loss_gamma": 2.0,
	"freeze_backbone_batch_norms": true,
	"hidden_expansion": 0.5,
	"id2label": {
	"0": "0",
	"1": "1",
	"2": "2",
	"3": "3",
	"4": "4"
	},
	"initializer_bias_prior_prob": null,
	"initializer_range": 0.01,
	"is_encoder_decoder": true,
	"label2id": {
	"0": 0,
	"1": 1,
	"2": 2,
	"3": 3,
	"4": 4
	},
	"label_noise_ratio": 0.5,
	"layer_norm_eps": 1e-05,
	"layer_scale": 1,
	"learn_initial_query": false,
	"lqe_hidden_dim": 64,
	"lqe_layers": 2,
	"matcher_alpha": 0.25,
	"matcher_bbox_cost": 5.0,
	"matcher_class_cost": 2.0,
	"matcher_gamma": 2.0,
	"matcher_giou_cost": 2.0,
	"max_num_bins": 32,
	"model_type": "d_fine",
	"normalize_before": false,
	"num_denoising": 100,
	"num_feature_levels": 3,
	"num_queries": 300,
	"positional_encoding_temperature": 10000,
	"reg_scale": 4.0,
	"tie_word_embeddings": true,
	"top_prob_values": 4,
	"transformers_version": "5.3.0",
	"up": 0.5,
	"use_focal_loss": true,
	"weight_loss_bbox": 5.0,
	"weight_loss_ddf": 1.5,
	"weight_loss_fgl": 0.15,
	"weight_loss_giou": 2.0,
	"weight_loss_vfl": 1.0,
	"with_box_refine": true
	}