GliteTech
/

DisambertSingleSense-base

@@ -41,7 +41,8 @@ class DisamBertSingleSense(PreTrainedModel):
     def __init__(self, config: PreTrainedConfig):
         super().__init__(config)
         if config.init_basemodel:
-            self.BaseModel = AutoModel.from_pretrained(config.name_or_path, device_map="auto")
             self.config.vocab_size += 2
             self.BaseModel.resize_token_embeddings(self.config.vocab_size)
         else:
@@ -101,24 +102,28 @@ class DisamBertSingleSense(PreTrainedModel):
         with self.device:
             vectors = self.BaseModel(candidates, candidate_attention_masks).last_hidden_state[:, 0]
             chunks = [
-                torch.squeeze(vectors[(candidate_mapping == sentence_index).nonzero()],
-                              dim=1)
                 for sentence_index in torch.unique(candidate_mapping)
             ]
             maxlen = max(chunk.shape[0] for chunk in chunks)
             return torch.stack(
                 [
-                    torch.cat([chunk, torch.zeros((maxlen - chunk.shape[0], self.config.hidden_size))])
                     for chunk in chunks
                 ]
             )
 class CandidateLabeller:
-    def __init__(self, tokenizer: PreTrainedTokenizer,
-                 ontology: Generator[LexicalExample],
-                 device:torch.device,
-                 retain_candidates: bool = False):
         self.tokenizer = tokenizer
         self.device = device
         self.gloss_tokens = {
@@ -137,7 +142,11 @@ class CandidateLabeller:
             ]
             tokens = self.tokenizer.pad(encoded, padding=True, return_tensors="pt")
             candidate_tokens = self.tokenizer.pad(
-                [self.gloss_tokens[concept] for example in batch for concept in example["candidates"]],
                 padding=True,
                 return_attention_mask=True,
                 return_tensors="pt",
@@ -159,5 +168,5 @@ class CandidateLabeller:
                     [example["candidates"].index(example["label"]) for example in batch]
                 )
             if self.retain_candidates:
-                result['candidates'] = [example['candidates'] for example in batch]
             return result

     def __init__(self, config: PreTrainedConfig):
         super().__init__(config)
         if config.init_basemodel:
+            self.BaseModel = AutoModel.from_pretrained(config.name_or_path,
+                                                       device_map="auto")
             self.config.vocab_size += 2
             self.BaseModel.resize_token_embeddings(self.config.vocab_size)
         else:
         with self.device:
             vectors = self.BaseModel(candidates, candidate_attention_masks).last_hidden_state[:, 0]
             chunks = [
+                torch.squeeze(vectors[(candidate_mapping == sentence_index).nonzero()], dim=1)
                 for sentence_index in torch.unique(candidate_mapping)
             ]
             maxlen = max(chunk.shape[0] for chunk in chunks)
             return torch.stack(
                 [
+                    torch.cat(
+                        [chunk, torch.zeros((maxlen - chunk.shape[0], self.config.hidden_size))]
+                    )
                     for chunk in chunks
                 ]
             )
 class CandidateLabeller:
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizer,
+        ontology: Generator[LexicalExample],
+        device: torch.device,
+        retain_candidates: bool = False,
+    ):
         self.tokenizer = tokenizer
         self.device = device
         self.gloss_tokens = {
             ]
             tokens = self.tokenizer.pad(encoded, padding=True, return_tensors="pt")
             candidate_tokens = self.tokenizer.pad(
+                [
+                    self.gloss_tokens[concept]
+                    for example in batch
+                    for concept in example["candidates"]
+                ],
                 padding=True,
                 return_attention_mask=True,
                 return_tensors="pt",
                     [example["candidates"].index(example["label"]) for example in batch]
                 )
             if self.retain_candidates:
+                result["candidates"] = [example["candidates"] for example in batch]
             return result

README.md CHANGED Viewed

@@ -11,22 +11,22 @@ metrics:
 - recall
 - f1
 model-index:
-- name: DisambertSingleSense-base
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# DisambertSingleSense-base
 This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the semcor dataset.
 It achieves the following results on the evaluation set:
-- Loss: 7.9132
-- Precision: 0.7725
-- Recall: 0.7594
-- F1: 0.7659
-- Matthews: 0.7589
 ## Model description
@@ -49,31 +49,26 @@ The following hyperparameters were used during training:
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
-- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: inverse_sqrt
 - lr_scheduler_warmup_steps: 1000
-- num_epochs: 10
 ### Training results
 | Training Loss | Epoch | Step   | Validation Loss | Precision | Recall | F1     | Matthews |
 |:-------------:|:-----:|:------:|:---------------:|:---------:|:------:|:------:|:--------:|
-| No log        | 0     | 0      | 83.4924         | 0.4375    | 0.3642 | 0.3975 | 0.3634   |
-| 0.4971        | 1.0   | 28027  | 0.7339          | 0.7793    | 0.7669 | 0.7730 | 0.7664   |
-| 0.3296        | 2.0   | 56054  | 0.9845          | 0.7756    | 0.7656 | 0.7705 | 0.7651   |
-| 0.1843        | 3.0   | 84081  | 2.0537          | 0.7743    | 0.7616 | 0.7679 | 0.7611   |
-| 0.0903        | 4.0   | 112108 | 3.9497          | 0.7729    | 0.7559 | 0.7643 | 0.7554   |
-| 0.0171        | 5.0   | 140135 | 5.8641          | 0.7727    | 0.7555 | 0.7640 | 0.7550   |
-| 0.0394        | 6.0   | 168162 | 6.5708          | 0.7747    | 0.7555 | 0.7650 | 0.7550   |
-| 0.0011        | 7.0   | 196189 | 7.4188          | 0.7705    | 0.7550 | 0.7627 | 0.7545   |
-| 0.0231        | 8.0   | 224216 | 7.0225          | 0.7762    | 0.7621 | 0.7691 | 0.7615   |
-| 0.0015        | 9.0   | 252243 | 6.9004          | 0.7766    | 0.7599 | 0.7681 | 0.7594   |
-| 0.0000        | 10.0  | 280270 | 7.9132          | 0.7725    | 0.7594 | 0.7659 | 0.7589   |
 ### Framework versions
 - Transformers 5.2.0
-- Pytorch 2.6.0+cu124
 - Datasets 4.5.0
 - Tokenizers 0.22.2

 - recall
 - f1
 model-index:
+- name: DisamBertSingleSense-base
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# DisamBertSingleSense-base
 This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the semcor dataset.
 It achieves the following results on the evaluation set:
+- Loss: 79.1326
+- Precision: 0.5602
+- Recall: 0.5916
+- F1: 0.5755
+- Matthews: 0.5910
 ## Model description
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: inverse_sqrt
 - lr_scheduler_warmup_steps: 1000
+- num_epochs: 5
 ### Training results
 | Training Loss | Epoch | Step   | Validation Loss | Precision | Recall | F1     | Matthews |
 |:-------------:|:-----:|:------:|:---------------:|:---------:|:------:|:------:|:--------:|
+| No log        | 0     | 0      | 614.2778        | 0.4290    | 0.3663 | 0.3952 | 0.3654   |
+| 0.9441        | 1.0   | 28027  | 1.9705          | 0.5491    | 0.5863 | 0.5671 | 0.5858   |
+| 0.9829        | 2.0   | 56054  | 2.1196          | 0.5651    | 0.6021 | 0.5830 | 0.6015   |
+| 0.9407        | 3.0   | 84081  | 41.6424         | 0.5563    | 0.5938 | 0.5744 | 0.5932   |
+| 0.8930        | 4.0   | 112108 | 666.7456        | 0.4864    | 0.5223 | 0.5037 | 0.5221   |
+| 0.8190        | 5.0   | 140135 | 79.1326         | 0.5602    | 0.5916 | 0.5755 | 0.5910   |
 ### Framework versions
 - Transformers 5.2.0
+- Pytorch 2.10.0+cu128
 - Datasets 4.5.0
 - Tokenizers 0.22.2

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0e4738031e8de40fca39c5b910e41d301a71d07047dbc0b28893383db23b534
 size 596077624

 version https://git-lfs.github.com/spec/v1
+oid sha256:9dde9eb27703b90db3b8736d9f877e8e1f25f2237102e5a8053c38f655c8bb92
 size 596077624

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79b648b291efd56f0128f34fe729eaf985ba8d68028678fbbb6e87384cb7e662
-size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:717507419deb53ab5dc0abef075bca3820ae90ddafd0ae97e346a0d216f618cd
+size 5265