PeteBleackley commited on
Commit
d3237a5
·
verified ·
1 Parent(s): 7abf6f6

End of training

Browse files
Files changed (4) hide show
  1. DisamBertSingleSense.py +19 -10
  2. README.md +16 -21
  3. model.safetensors +1 -1
  4. training_args.bin +2 -2
DisamBertSingleSense.py CHANGED
@@ -41,7 +41,8 @@ class DisamBertSingleSense(PreTrainedModel):
41
  def __init__(self, config: PreTrainedConfig):
42
  super().__init__(config)
43
  if config.init_basemodel:
44
- self.BaseModel = AutoModel.from_pretrained(config.name_or_path, device_map="auto")
 
45
  self.config.vocab_size += 2
46
  self.BaseModel.resize_token_embeddings(self.config.vocab_size)
47
  else:
@@ -101,24 +102,28 @@ class DisamBertSingleSense(PreTrainedModel):
101
  with self.device:
102
  vectors = self.BaseModel(candidates, candidate_attention_masks).last_hidden_state[:, 0]
103
  chunks = [
104
- torch.squeeze(vectors[(candidate_mapping == sentence_index).nonzero()],
105
- dim=1)
106
  for sentence_index in torch.unique(candidate_mapping)
107
  ]
108
  maxlen = max(chunk.shape[0] for chunk in chunks)
109
  return torch.stack(
110
  [
111
- torch.cat([chunk, torch.zeros((maxlen - chunk.shape[0], self.config.hidden_size))])
 
 
112
  for chunk in chunks
113
  ]
114
  )
115
 
116
 
117
  class CandidateLabeller:
118
- def __init__(self, tokenizer: PreTrainedTokenizer,
119
- ontology: Generator[LexicalExample],
120
- device:torch.device,
121
- retain_candidates: bool = False):
 
 
 
122
  self.tokenizer = tokenizer
123
  self.device = device
124
  self.gloss_tokens = {
@@ -137,7 +142,11 @@ class CandidateLabeller:
137
  ]
138
  tokens = self.tokenizer.pad(encoded, padding=True, return_tensors="pt")
139
  candidate_tokens = self.tokenizer.pad(
140
- [self.gloss_tokens[concept] for example in batch for concept in example["candidates"]],
 
 
 
 
141
  padding=True,
142
  return_attention_mask=True,
143
  return_tensors="pt",
@@ -159,5 +168,5 @@ class CandidateLabeller:
159
  [example["candidates"].index(example["label"]) for example in batch]
160
  )
161
  if self.retain_candidates:
162
- result['candidates'] = [example['candidates'] for example in batch]
163
  return result
 
41
  def __init__(self, config: PreTrainedConfig):
42
  super().__init__(config)
43
  if config.init_basemodel:
44
+ self.BaseModel = AutoModel.from_pretrained(config.name_or_path,
45
+ device_map="auto")
46
  self.config.vocab_size += 2
47
  self.BaseModel.resize_token_embeddings(self.config.vocab_size)
48
  else:
 
102
  with self.device:
103
  vectors = self.BaseModel(candidates, candidate_attention_masks).last_hidden_state[:, 0]
104
  chunks = [
105
+ torch.squeeze(vectors[(candidate_mapping == sentence_index).nonzero()], dim=1)
 
106
  for sentence_index in torch.unique(candidate_mapping)
107
  ]
108
  maxlen = max(chunk.shape[0] for chunk in chunks)
109
  return torch.stack(
110
  [
111
+ torch.cat(
112
+ [chunk, torch.zeros((maxlen - chunk.shape[0], self.config.hidden_size))]
113
+ )
114
  for chunk in chunks
115
  ]
116
  )
117
 
118
 
119
  class CandidateLabeller:
120
+ def __init__(
121
+ self,
122
+ tokenizer: PreTrainedTokenizer,
123
+ ontology: Generator[LexicalExample],
124
+ device: torch.device,
125
+ retain_candidates: bool = False,
126
+ ):
127
  self.tokenizer = tokenizer
128
  self.device = device
129
  self.gloss_tokens = {
 
142
  ]
143
  tokens = self.tokenizer.pad(encoded, padding=True, return_tensors="pt")
144
  candidate_tokens = self.tokenizer.pad(
145
+ [
146
+ self.gloss_tokens[concept]
147
+ for example in batch
148
+ for concept in example["candidates"]
149
+ ],
150
  padding=True,
151
  return_attention_mask=True,
152
  return_tensors="pt",
 
168
  [example["candidates"].index(example["label"]) for example in batch]
169
  )
170
  if self.retain_candidates:
171
+ result["candidates"] = [example["candidates"] for example in batch]
172
  return result
README.md CHANGED
@@ -11,22 +11,22 @@ metrics:
11
  - recall
12
  - f1
13
  model-index:
14
- - name: DisambertSingleSense-base
15
  results: []
16
  ---
17
 
18
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
19
  should probably proofread and complete it, then remove this comment. -->
20
 
21
- # DisambertSingleSense-base
22
 
23
  This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the semcor dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 7.9132
26
- - Precision: 0.7725
27
- - Recall: 0.7594
28
- - F1: 0.7659
29
- - Matthews: 0.7589
30
 
31
  ## Model description
32
 
@@ -49,31 +49,26 @@ The following hyperparameters were used during training:
49
  - train_batch_size: 8
50
  - eval_batch_size: 8
51
  - seed: 42
52
- - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
53
  - lr_scheduler_type: inverse_sqrt
54
  - lr_scheduler_warmup_steps: 1000
55
- - num_epochs: 10
56
 
57
  ### Training results
58
 
59
  | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Matthews |
60
  |:-------------:|:-----:|:------:|:---------------:|:---------:|:------:|:------:|:--------:|
61
- | No log | 0 | 0 | 83.4924 | 0.4375 | 0.3642 | 0.3975 | 0.3634 |
62
- | 0.4971 | 1.0 | 28027 | 0.7339 | 0.7793 | 0.7669 | 0.7730 | 0.7664 |
63
- | 0.3296 | 2.0 | 56054 | 0.9845 | 0.7756 | 0.7656 | 0.7705 | 0.7651 |
64
- | 0.1843 | 3.0 | 84081 | 2.0537 | 0.7743 | 0.7616 | 0.7679 | 0.7611 |
65
- | 0.0903 | 4.0 | 112108 | 3.9497 | 0.7729 | 0.7559 | 0.7643 | 0.7554 |
66
- | 0.0171 | 5.0 | 140135 | 5.8641 | 0.7727 | 0.7555 | 0.7640 | 0.7550 |
67
- | 0.0394 | 6.0 | 168162 | 6.5708 | 0.7747 | 0.7555 | 0.7650 | 0.7550 |
68
- | 0.0011 | 7.0 | 196189 | 7.4188 | 0.7705 | 0.7550 | 0.7627 | 0.7545 |
69
- | 0.0231 | 8.0 | 224216 | 7.0225 | 0.7762 | 0.7621 | 0.7691 | 0.7615 |
70
- | 0.0015 | 9.0 | 252243 | 6.9004 | 0.7766 | 0.7599 | 0.7681 | 0.7594 |
71
- | 0.0000 | 10.0 | 280270 | 7.9132 | 0.7725 | 0.7594 | 0.7659 | 0.7589 |
72
 
73
 
74
  ### Framework versions
75
 
76
  - Transformers 5.2.0
77
- - Pytorch 2.6.0+cu124
78
  - Datasets 4.5.0
79
  - Tokenizers 0.22.2
 
11
  - recall
12
  - f1
13
  model-index:
14
+ - name: DisamBertSingleSense-base
15
  results: []
16
  ---
17
 
18
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
19
  should probably proofread and complete it, then remove this comment. -->
20
 
21
+ # DisamBertSingleSense-base
22
 
23
  This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the semcor dataset.
24
  It achieves the following results on the evaluation set:
25
+ - Loss: 79.1326
26
+ - Precision: 0.5602
27
+ - Recall: 0.5916
28
+ - F1: 0.5755
29
+ - Matthews: 0.5910
30
 
31
  ## Model description
32
 
 
49
  - train_batch_size: 8
50
  - eval_batch_size: 8
51
  - seed: 42
52
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
53
  - lr_scheduler_type: inverse_sqrt
54
  - lr_scheduler_warmup_steps: 1000
55
+ - num_epochs: 5
56
 
57
  ### Training results
58
 
59
  | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Matthews |
60
  |:-------------:|:-----:|:------:|:---------------:|:---------:|:------:|:------:|:--------:|
61
+ | No log | 0 | 0 | 614.2778 | 0.4290 | 0.3663 | 0.3952 | 0.3654 |
62
+ | 0.9441 | 1.0 | 28027 | 1.9705 | 0.5491 | 0.5863 | 0.5671 | 0.5858 |
63
+ | 0.9829 | 2.0 | 56054 | 2.1196 | 0.5651 | 0.6021 | 0.5830 | 0.6015 |
64
+ | 0.9407 | 3.0 | 84081 | 41.6424 | 0.5563 | 0.5938 | 0.5744 | 0.5932 |
65
+ | 0.8930 | 4.0 | 112108 | 666.7456 | 0.4864 | 0.5223 | 0.5037 | 0.5221 |
66
+ | 0.8190 | 5.0 | 140135 | 79.1326 | 0.5602 | 0.5916 | 0.5755 | 0.5910 |
 
 
 
 
 
67
 
68
 
69
  ### Framework versions
70
 
71
  - Transformers 5.2.0
72
+ - Pytorch 2.10.0+cu128
73
  - Datasets 4.5.0
74
  - Tokenizers 0.22.2
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e4738031e8de40fca39c5b910e41d301a71d07047dbc0b28893383db23b534
3
  size 596077624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dde9eb27703b90db3b8736d9f877e8e1f25f2237102e5a8053c38f655c8bb92
3
  size 596077624
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79b648b291efd56f0128f34fe729eaf985ba8d68028678fbbb6e87384cb7e662
3
- size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:717507419deb53ab5dc0abef075bca3820ae90ddafd0ae97e346a0d216f618cd
3
+ size 5265