Spaces:

bayan10
/

bayan-api

Running

App Files Files Community

bayan-api / debug_pc023.py

youssefreda9

update benchmark logs

aea385b 3 days ago

Raw

History Blame Contribute Delete

2.16 kB

	import difflib
	import re

	def get_word_positions(text):
	positions = []
	for m in re.finditer(r'\S+', text):
	positions.append((m.group(), m.start(), m.end()))
	return positions

	# Simulate what happens in the pipeline:
	# Spelling changes القصه→القصة but NOT طويل or ومملل
	# So current_text becomes: القصة طويل ومملل (if spelling fixed القصه)
	# But wait - from the logs, grammar input was: القصه طويل ومملل
	# That means spelling didn't fix القصه either. But the benchmark shows
	# a spelling suggestion [0:5] 'القصه'→'القصة'. So spelling DID produce a patch.

	# Grammar runs on spelling-corrected text.
	# If spelling fixed القصه→القصة, grammar input would be: القصة طويل ومملل
	grammar_input = "القصة طويل ومملل"
	grammar_output = "القصة طويلة ومملة"

	print(f"Grammar Input: '{grammar_input}' (len={len(grammar_input)})")
	print(f"Grammar Output: '{grammar_output}' (len={len(grammar_output)})")

	orig_words = get_word_positions(grammar_input)
	corr_words = get_word_positions(grammar_output)

	print(f"\nInput words:")
	for w, s, e in orig_words:
	print(f" '{w}' [{s}:{e}]")

	print(f"\nOutput words:")
	for w, s, e in corr_words:
	print(f" '{w}' [{s}:{e}]")

	s = difflib.SequenceMatcher(None, [w[0] for w in orig_words], [w[0] for w in corr_words])
	print(f"\nOpcodes:")
	for tag, i1, i2, j1, j2 in s.get_opcodes():
	print(f" {tag}: orig_words[{i1}:{i2}] vs corr_words[{j1}:{j2}]")
	if tag != 'equal':
	start_char = orig_words[i1][1]
	end_char = orig_words[i2-1][2]
	orig_slice = grammar_input[start_char:end_char]
	corr_text = " ".join([w[0] for w in corr_words[j1:j2]])
	print(f" [{start_char}:{end_char}] '{orig_slice}' → '{corr_text}'")

	# Now map back to ORIGINAL text coordinates
	# Original text: القصه طويل ومملل (len=16)
	# After spelling: القصة طويل ومملل (len=16) — same length!
	# Grammar diff on current: [5:16] 'طويل ومملل' → 'طويلة ومملة'
	# Mapped to original: same since no length change from spelling