from flask import Flask import ollama def ps(): from ollama import ProcessResponse, chat, ps, pull ansver = [] response: ProcessResponse = ps() for model in response.models: ansver.append(f'

Mode

: {model.model}') ansver.append(f'\t Digest: {model.digest}') ansver.append(f'\t Expires at: {model.expires_at}') ansver.append(f'\t Size: {model.size}') ansver.append(f'\t Size vram: {model.size_vram}') ansver.append(f'\t Details: {model.details}') ansver.append(f'\t Context length: {model.context_length}') ansver.append(f'\n') result = [f'

{answ}

' for answ in ansver ] return '\n'.join(result) def time_model(model_name='qwen3:0.6b'): from datetime import datetime from ollama import Client from ollama import chat t_start=datetime.now() ansver = [] messages = [ { 'role': 'user', 'content': 'Расскажи о себе подробно', }, ] response = chat(model_name, messages=messages) ansver.append(f'

{model_name}

') ansver.append(f' start {t_start} ') response_time=datetime.now()-t_start ansver.append(f' duration {response_time} ') # response_len = len(response['message']['content']) ansver.append(f' lehgth {response_len}') response_speed=response_len/int(response_time.seconds) ansver.append(f' token/sek {response_speed}') ansver.append(response['message']['content']) result = [f'

{answ}

' for answ in ansver ] return '\n'.join(result)