| | from flask import Flask |
| | import ollama |
| |
|
| |
|
| | def ps(): |
| | from ollama import ProcessResponse, chat, ps, pull |
| | ansver = [] |
| |
|
| | response: ProcessResponse = ps() |
| | for model in response.models: |
| | ansver.append(f'<h3>Mode<h3>: {model.model}') |
| | ansver.append(f'\t Digest: {model.digest}') |
| | ansver.append(f'\t Expires at: {model.expires_at}') |
| | ansver.append(f'\t Size: {model.size}') |
| | ansver.append(f'\t Size vram: {model.size_vram}') |
| | ansver.append(f'\t Details: {model.details}') |
| | ansver.append(f'\t Context length: {model.context_length}') |
| | ansver.append(f'\n') |
| | result = [f'<p>{answ}</p>' for answ in ansver ] |
| | return '\n'.join(result) |
| |
|
| | def time_model(model_name='qwen3:0.6b'): |
| | from datetime import datetime |
| | from ollama import Client |
| | from ollama import chat |
| |
|
| | t_start=datetime.now() |
| | ansver = [] |
| | messages = [ |
| | { |
| | 'role': 'user', |
| | 'content': 'Расскажи о себе подробно', |
| | }, |
| | ] |
| | response = chat(model_name, messages=messages) |
| | ansver.append(f' <h3> {model_name}</h3>') |
| | ansver.append(f' start {t_start} ') |
| | response_time=datetime.now()-t_start |
| | ansver.append(f' duration {response_time} ') |
| | |
| | response_len = len(response['message']['content']) |
| | ansver.append(f' lehgth {response_len}') |
| | response_speed=response_len/int(response_time.seconds) |
| | ansver.append(f' token/sek {response_speed}') |
| | ansver.append(response['message']['content']) |
| | result = [f'<p>{answ}</p>' for answ in ansver ] |
| | return '\n'.join(result) |
| |
|
| |
|