fivetech commited on
Commit
315e10d
·
verified ·
1 Parent(s): a68d103

Upload test_battery_continue.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. test_battery_continue.py +159 -0
test_battery_continue.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Continue test battery from checkpoint - tests 51-100"""
3
+
4
+ import json, time, subprocess, requests, sys
5
+ from pathlib import Path
6
+ from datetime import datetime
7
+
8
+ OLLAMA_URL = "http://localhost:11434/api/generate"
9
+ MODEL = "qwen3.6:35b"
10
+ HARBOUR = "/home/fivetech/harbour/bin/linux/gcc/harbour"
11
+ WORK_DIR = Path("/home/fivetech/finetune/test_output")
12
+ RESULTS_FILE = Path("/home/fivetech/finetune/test_baseline_100.json")
13
+
14
+ SYSTEM = """You are an expert Harbour programmer. Write clean, correct, COMPILABLE Harbour code.
15
+ Use Hungarian notation: n=numeric, c=character, l=logical, a=array, o=object, d=date.
16
+ Use 3-space indentation.
17
+ Do NOT include explanations, markdown, or #include. Only raw Harbour code.
18
+ End functions with RETURN and END FUNCTION."""
19
+
20
+ def query(prompt, timeout=180):
21
+ payload = {"model": MODEL, "prompt": prompt, "stream": False,
22
+ "options": {"temperature": 0.2, "num_predict": 1500, "top_p": 0.9}}
23
+ try:
24
+ t0 = time.time()
25
+ r = requests.post(OLLAMA_URL, json=payload, timeout=timeout)
26
+ d = r.json()
27
+ return {"resp": d.get("response",""), "tok": d.get("eval_count",0),
28
+ "dur": time.time()-t0, "tps": d.get("eval_count",0)/max(d.get("eval_duration",1)/1e9,.001)}
29
+ except Exception as e:
30
+ return {"resp":"", "tok":0, "dur":0, "tps":0, "err":str(e)}
31
+
32
+ def compile_hb(code):
33
+ f = WORK_DIR/"test.prg"
34
+ f.write_text(code)
35
+ try:
36
+ r = subprocess.run([HARBOUR, str(f), "-n", "-w"], capture_output=True, text=True, timeout=20)
37
+ return r.returncode == 0, (r.stderr or r.stdout).strip()[:400]
38
+ except:
39
+ return False, "timeout"
40
+
41
+ def clean(resp):
42
+ lines = resp.split('\n')
43
+ in_code = False
44
+ code = []
45
+ for line in lines:
46
+ s = line.strip()
47
+ if s.startswith('```'):
48
+ in_code = not in_code
49
+ continue
50
+ if in_code:
51
+ code.append(line)
52
+ elif not code:
53
+ u = s.upper()
54
+ if any(u.startswith(k) for k in ['FUNCTION','PROCEDURE','LOCAL','STATIC','#DEFINE','CLASS','METHOD','RETURN','SET','REQUEST','MEMVAR','*']):
55
+ code.append(line)
56
+ return '\n'.join(code).strip() if code else resp.strip()
57
+
58
+ def save(results, meta):
59
+ with open(RESULTS_FILE, "w") as f:
60
+ json.dump({"model":MODEL,"ts":datetime.now().isoformat(),**meta,"results":results}, f, indent=2, ensure_ascii=False)
61
+
62
+ # Tests 70-100 (remaining after A01-A48, O01-O21)
63
+ TESTS = [
64
+ ("O22","OOP","Composition","Write Harbour classes using composition Engine inside Car."),
65
+ ("X01","Other","Preprocessor defines","Write Harbour preprocessor #define for constants and #ifdef platform detection."),
66
+ ("X02","Other","Custom command","Write #xcommand shorthand for declaring variables with initialization."),
67
+ ("X03","Other","HB_Is functions","Write validation using HB_IsString HB_IsNumeric HB_IsArray HB_IsNil."),
68
+ ("X04","Other","Regex validation","Write Harbour code using HB_RegExCompile HB_RegExMatch to validate emails."),
69
+ ("X05","Other","Serialization","Write Harbour code using HB_Serialize HB_Deserialize to save load hash."),
70
+ ("X06","Other","File path ops","Write Harbour code using hb_DirBuild hb_FileNameGet hb_PathJoin."),
71
+ ("X07","Other","Version check","Write Harbour code using HB_Version to detect version conditionally."),
72
+ ("X08","Other","Translation","Write Harbour #translate directives mapping alternative syntax."),
73
+ ("X09","Other","Conditional defines","Write Harbour code with nested ifdef ifndef else for feature toggling."),
74
+ ("F01","Functions","Default params","Write Harbour function with default parameter values."),
75
+ ("F02","Functions","Recursion","Write recursive Harbour function for factorial."),
76
+ ("F03","Functions","Scope demo","Write Harbour code demonstrating LOCAL STATIC PRIVATE PUBLIC scope."),
77
+ ("F04","Functions","Code block eval","Write Harbour code using Eval with code blocks and AEval."),
78
+ ("F05","Functions","Error handling","Write Harbour function with BEGIN SEQUENCE RECOVER for safe reading."),
79
+ ("F06","Functions","Pass by ref","Write Harbour function modifying caller variable with @."),
80
+ ("F07","Functions","Variable args","Write Harbour function accepting variable number of arguments."),
81
+ ("F08","Functions","Nested calls","Write Harbour code with nested function calls and scope isolation."),
82
+ ("D01","Database","Create DBF","Write Harbour code creating DBF with DBCreate specifying field types."),
83
+ ("D02","Database","Open append","Write Harbour code opening DBF with DBUseArea appending records."),
84
+ ("D03","Database","Indexing","Write Harbour code creating index with ORDCREATE and DBSeek."),
85
+ ("D04","Database","DBEval sum","Write Harbour code using DBEval to sum numeric field."),
86
+ ("D05","Database","Filter","Write Harbour code using SET FILTER TO processing filtered records."),
87
+ ("D06","Database","Multi-area","Write Harbour code using multiple work areas with SELECT."),
88
+ ("D07","Database","Relations","Write Harbour code setting parent-child relation DBSetRelation."),
89
+ ("I01","File I/O","Text read write","Write Harbour functions for text file R/W using FCreate FOpen FRead FWrite FClose."),
90
+ ("I02","File I/O","Line by line","Write Harbour code reading file line by line with FEof."),
91
+ ("I03","File I/O","Directory list","Write Harbour code using Directory listing files with pattern."),
92
+ ("I04","File I/O","File exists","Write Harbour code checking file existence with File function."),
93
+ ("C01","Control","Complex IF","Write Harbour function nested IF ELSEIF ELSE with AND OR conditions."),
94
+ ("C02","Control","Nested loops","Write Harbour code nested FOR loops EXIT LOOP finding combinations."),
95
+ ]
96
+
97
+ # Load existing results
98
+ with open(RESULTS_FILE) as f:
99
+ data = json.load(f)
100
+ results = data["results"]
101
+ pass_c = data["pass"]
102
+ fail_c = data["fail"]
103
+
104
+ print(f"{'='*60}")
105
+ print(f"CONTINUING from test {len(results)+1}/100")
106
+ print(f"So far: {pass_c} pass, {fail_c} fail ({data['rate']:.1f}%)")
107
+ print(f"{'='*60}")
108
+
109
+ for i, (tid, cat, name, prompt) in enumerate(TESTS, len(results)+1):
110
+ sys.stdout.write(f"\r[{i:3d}/100] {tid} {name}...")
111
+ sys.stdout.flush()
112
+
113
+ res = query(prompt)
114
+
115
+ if res.get("err"):
116
+ results.append({"id":tid,"cat":cat,"name":name,"ok":False,"err":res["err"],"code":"","tok":0,"tps":0,"dur":0,"lines":0})
117
+ fail_c += 1
118
+ print(f"\r[{i:3d}/100] {tid} {name}... ERR: {res['err'][:50]}")
119
+ save(results, {"pass":pass_c,"fail":fail_c,"rate":pass_c/len(results)*100})
120
+ continue
121
+
122
+ code = clean(res["resp"])
123
+ ok, cerr = compile_hb(code)
124
+
125
+ if ok: pass_c += 1
126
+ else: fail_c += 1
127
+
128
+ err_short = cerr.split('\n')[0][:60] if cerr and not ok else ""
129
+ print(f"\r[{i:3d}/100] {tid} {name}... {'PASS' if ok else 'FAIL'} | {code.count(chr(10))+1}L | {res['tok']}t | {res['tps']:.0f}tps" + (f" | {err_short}" if err_short else ""))
130
+
131
+ results.append({"id":tid,"cat":cat,"name":name,"ok":ok,"err":cerr[:400],"code":code[:2500],"tok":res["tok"],"tps":res["tps"],"dur":res["dur"],"lines":code.count('\n')+1})
132
+
133
+ save(results, {"pass":pass_c,"fail":fail_c,"rate":pass_c/len(results)*100})
134
+
135
+ # Final summary
136
+ print(f"\n\n{'='*60}")
137
+ print(f"FINAL RESULTS (100/100)")
138
+ print(f"{'='*60}")
139
+
140
+ cats = {}
141
+ for r in results:
142
+ c = r["cat"]
143
+ if c not in cats: cats[c] = [0,0]
144
+ cats[c][0 if r["ok"] else 1] += 1
145
+
146
+ print(f"\n{'Category':<12} {'Pass':>5} {'Fail':>5} {'Rate':>7}")
147
+ print("-"*32)
148
+ for c in sorted(cats):
149
+ p,f = cats[c]
150
+ print(f"{c:<12} {p:>5} {f:>5} {p/(p+f)*100:>6.0f}%")
151
+ print(f"\n{'TOTAL':<12} {pass_c:>5} {fail_c:>5} {pass_c/len(results)*100:>6.0f}%")
152
+
153
+ total_tok = sum(r["tok"] for r in results)
154
+ total_dur = sum(r["dur"] for r in results)
155
+ print(f"Tokens: {total_tok:,} | Time: {total_dur:.0f}s | TPS: {total_tok/max(total_dur,1):.0f}")
156
+
157
+ save(results, {"pass":pass_c,"fail":fail_c,"rate":pass_c/len(results)*100,"cats":cats,
158
+ "total_tok":total_tok,"total_dur":total_dur})
159
+ print(f"\nSaved: {RESULTS_FILE}")