bayan-api / debug_pc023.py
youssefreda9's picture
update benchmark logs
aea385b
Raw
History Blame Contribute Delete
2.16 kB
import difflib
import re
def get_word_positions(text):
positions = []
for m in re.finditer(r'\S+', text):
positions.append((m.group(), m.start(), m.end()))
return positions
# Simulate what happens in the pipeline:
# Spelling changes القصه→القصة but NOT طويل or ومملل
# So current_text becomes: القصة طويل ومملل (if spelling fixed القصه)
# But wait - from the logs, grammar input was: القصه طويل ومملل
# That means spelling didn't fix القصه either. But the benchmark shows
# a spelling suggestion [0:5] 'القصه'→'القصة'. So spelling DID produce a patch.
# Grammar runs on spelling-corrected text.
# If spelling fixed القصه→القصة, grammar input would be: القصة طويل ومملل
grammar_input = "القصة طويل ومملل"
grammar_output = "القصة طويلة ومملة"
print(f"Grammar Input: '{grammar_input}' (len={len(grammar_input)})")
print(f"Grammar Output: '{grammar_output}' (len={len(grammar_output)})")
orig_words = get_word_positions(grammar_input)
corr_words = get_word_positions(grammar_output)
print(f"\nInput words:")
for w, s, e in orig_words:
print(f" '{w}' [{s}:{e}]")
print(f"\nOutput words:")
for w, s, e in corr_words:
print(f" '{w}' [{s}:{e}]")
s = difflib.SequenceMatcher(None, [w[0] for w in orig_words], [w[0] for w in corr_words])
print(f"\nOpcodes:")
for tag, i1, i2, j1, j2 in s.get_opcodes():
print(f" {tag}: orig_words[{i1}:{i2}] vs corr_words[{j1}:{j2}]")
if tag != 'equal':
start_char = orig_words[i1][1]
end_char = orig_words[i2-1][2]
orig_slice = grammar_input[start_char:end_char]
corr_text = " ".join([w[0] for w in corr_words[j1:j2]])
print(f" [{start_char}:{end_char}] '{orig_slice}' → '{corr_text}'")
# Now map back to ORIGINAL text coordinates
# Original text: القصه طويل ومملل (len=16)
# After spelling: القصة طويل ومملل (len=16) — same length!
# Grammar diff on current: [5:16] 'طويل ومملل' → 'طويلة ومملة'
# Mapped to original: same since no length change from spelling