Text Generation
MLX
Safetensors
English
phi
phi-2
html
css
web-development
code-generation
fine-tuned
apple-silicon
conversational
4-bit precision
Instructions to use nexsendev/webicoder-v3-mlx-q4 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLX
How to use nexsendev/webicoder-v3-mlx-q4 with MLX:
# Make sure mlx-lm is installed # pip install --upgrade mlx-lm # Generate text with mlx-lm from mlx_lm import load, generate model, tokenizer = load("nexsendev/webicoder-v3-mlx-q4") prompt = "Write a story about Einstein" messages = [{"role": "user", "content": prompt}] prompt = tokenizer.apply_chat_template( messages, add_generation_prompt=True ) text = generate(model, tokenizer, prompt=prompt, verbose=True) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- LM Studio
- MLX LM
How to use nexsendev/webicoder-v3-mlx-q4 with MLX LM:
Generate or start a chat session
# Install MLX LM uv tool install mlx-lm # Interactive chat REPL mlx_lm.chat --model "nexsendev/webicoder-v3-mlx-q4"
Run an OpenAI-compatible server
# Install MLX LM uv tool install mlx-lm # Start the server mlx_lm.server --model "nexsendev/webicoder-v3-mlx-q4" # Calling the OpenAI-compatible server with curl curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "nexsendev/webicoder-v3-mlx-q4", "messages": [ {"role": "user", "content": "Hello"} ] }'
| #!/usr/bin/env python3 | |
| """ | |
| WebICoder v3 β Quick Start Example | |
| Generate HTML websites from natural language prompts using MLX on Apple Silicon. | |
| β οΈ MANDATORY: This script implements all 5 required rules for correct output. | |
| See README.md for full documentation. | |
| Usage: | |
| python example.py "Create a landing page for a coffee shop" | |
| python example.py --interactive | |
| """ | |
| import sys | |
| import re | |
| from mlx_lm import load, stream_generate | |
| from mlx_lm.sample_utils import make_sampler, make_logits_processors | |
| # βββ Configuration ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_PATH = "." # Current directory (the model repo) | |
| # RULE 1: System prompt + Alpaca format (### Instruction / ### Response) | |
| SYSTEM_PROMPT = ( | |
| "You are WebICoder, an expert frontend web developer specializing in premium, " | |
| "Apple-inspired design. You create stunning websites using only HTML, CSS, and " | |
| "vanilla JavaScript. Your designs feature: minimalist layouts, elegant typography, " | |
| "smooth animations, glassmorphism effects, generous whitespace, and a refined " | |
| "color palette. You always produce complete, production-ready code." | |
| ) | |
| # RULE 2: Stop sequences β MANDATORY to prevent infinite loops | |
| STOP_SEQUENCES = ["</html>", "### Instruction:", "You are Deepcoder", "You are WebICoder"] | |
| # RULE 4: Low temperature β MANDATORY for coherent HTML | |
| DEFAULT_TEMP = 0.4 | |
| DEFAULT_MAX_TOKENS = 4096 | |
| # βββ RULE 1: Prompt Formatting (MANDATORY) ββββββββββββββββββββββββββββββββββ | |
| def format_prompt(user_input: str) -> str: | |
| """ | |
| MANDATORY: Format user input into the model's training prompt format. | |
| The model was trained with Alpaca-style prompts. Sending raw text | |
| without this formatting will produce garbage output. | |
| """ | |
| return f"{SYSTEM_PROMPT}\n\n### Instruction:\n{user_input}\n\n### Response:\n" | |
| # βββ RULE 5: Post-Processing (MANDATORY) ββββββββββββββββββββββββββββββββββββ | |
| def clean_html(text: str) -> str: | |
| """ | |
| MANDATORY: Extract clean HTML from model output. | |
| The model may leak training artifacts (system prompt, instruction markers). | |
| This function strips them and returns only valid HTML. | |
| """ | |
| # Remove system prompt leaks | |
| for pattern in [ | |
| r"You are (?:Deep|Web[iI])coder.*?production-ready code\.\n*", | |
| r"### Instruction:.*", | |
| r"### Response:\s*", | |
| ]: | |
| text = re.sub(pattern, "", text, flags=re.DOTALL) | |
| # Extract complete HTML document | |
| html_match = re.search(r"(<(?:!DOCTYPE\s+html|html)[\s\S]*?</html>)", text, re.IGNORECASE) | |
| if html_match: | |
| return html_match.group(1).strip() | |
| # Fallback: find any HTML content and wrap it | |
| html_start = re.search(r"<(?:!DOCTYPE|html|head|body|link)", text, re.IGNORECASE) | |
| if html_start: | |
| html = text[html_start.start():].strip() | |
| if not html.lower().startswith("<!doctype"): | |
| html = "<!DOCTYPE html>\n<html>\n" + html | |
| if "</html>" not in html.lower(): | |
| html += "\n</html>" | |
| return html | |
| return text.strip() | |
| # βββ Generation βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_html(prompt: str, temperature: float = DEFAULT_TEMP, max_tokens: int = DEFAULT_MAX_TOKENS) -> str: | |
| """ | |
| Generate HTML from a natural language prompt. | |
| Implements all 5 mandatory rules: | |
| 1. Prompt formatting (### Instruction / ### Response) | |
| 2. Stop at </html> | |
| 3. Repetition penalty (1.2, context=256) | |
| 4. Low temperature (0.4) | |
| 5. Post-processing (clean_html) | |
| """ | |
| print(f"[INFO] Loading model from: {MODEL_PATH}") | |
| model, tokenizer = load(MODEL_PATH) | |
| # RULE 1: Format the prompt | |
| formatted_prompt = format_prompt(prompt) | |
| # RULE 4: Low temperature sampler | |
| sampler = make_sampler(temp=temperature) | |
| # RULE 3: Repetition penalty β MANDATORY | |
| logits_processors = make_logits_processors( | |
| repetition_penalty=1.2, | |
| repetition_context_size=256, | |
| ) | |
| print(f"[INFO] Generating (temp={temperature}, max_tokens={max_tokens}, rep_penalty=1.2)...") | |
| print("β" * 60) | |
| full_text = "" | |
| last_response = None | |
| for response in stream_generate( | |
| model, tokenizer, | |
| prompt=formatted_prompt, | |
| max_tokens=max_tokens, | |
| sampler=sampler, | |
| logits_processors=logits_processors, # RULE 3 | |
| ): | |
| last_response = response | |
| token_str = response.text | |
| full_text += token_str | |
| print(token_str, end="", flush=True) | |
| # RULE 2: Stop at </html> β MANDATORY | |
| should_stop = False | |
| for stop_seq in STOP_SEQUENCES: | |
| if stop_seq in full_text: | |
| idx = full_text.find(stop_seq) | |
| if stop_seq == "</html>": | |
| full_text = full_text[:idx + len(stop_seq)] | |
| else: | |
| full_text = full_text[:idx] | |
| should_stop = True | |
| break | |
| if should_stop or response.finish_reason is not None: | |
| break | |
| print("\n" + "β" * 60) | |
| if last_response: | |
| print(f"[INFO] Generated {last_response.generation_tokens} tokens at {last_response.generation_tps:.1f} tok/s") | |
| print(f"[INFO] Peak memory: {last_response.peak_memory:.2f} GB") | |
| # RULE 5: Clean the output β MANDATORY | |
| return clean_html(full_text) | |
| # βββ Main ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def main(): | |
| if len(sys.argv) > 1 and sys.argv[1] != "--interactive": | |
| # Single prompt mode | |
| prompt = " ".join(sys.argv[1:]) | |
| html = generate_html(prompt) | |
| output_file = "output.html" | |
| with open(output_file, "w") as f: | |
| f.write(html) | |
| print(f"\n[INFO] Saved to {output_file} ({len(html)} chars)") | |
| else: | |
| # Interactive mode | |
| print("=" * 60) | |
| print(" β‘ WebICoder v3 β Interactive Mode") | |
| print(" Type a website description, press Enter to generate.") | |
| print(" Type 'quit' to exit.") | |
| print("=" * 60) | |
| while True: | |
| try: | |
| prompt = input("\nπ Describe your website: ").strip() | |
| if not prompt or prompt.lower() in ("quit", "exit", "q"): | |
| break | |
| html = generate_html(prompt) | |
| output_file = "output.html" | |
| with open(output_file, "w") as f: | |
| f.write(html) | |
| print(f"\n[INFO] Saved to {output_file} ({len(html)} chars)") | |
| except KeyboardInterrupt: | |
| print("\n[INFO] Bye!") | |
| break | |
| if __name__ == "__main__": | |
| main() | |