Commit e75bc9be authored by chenzk's avatar chenzk
Browse files

v1.0

parents
from mlx_lm import load, generate
model, tokenizer = load("mlx-community/SmolLM-1.7B-Instruct-8bit")
response = generate(model, tokenizer, prompt="hello", verbose=True)
print(response)
\ No newline at end of file
import { pipeline } from "@huggingface/transformers";
// Create a text generation pipeline
const generator = await pipeline(
"text-generation",
"HuggingFaceTB/SmolLM2-135M-Instruct",
{ dtype: "q4f16", revision:'refs/pr/6' },
);
// Define the list of messages
const messages = [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "Rewrite the following: hello how r u?" },
];
// Generate a response
const output = await generator(messages, { max_new_tokens: 128 });
console.log(output[0].generated_text.at(-1).content);
// "Hello, how's it going?"
\ No newline at end of file
# 模型编码
modelCode=1120
# 模型名称
modelName=smollm_pytorch
# 模型描述
modelDescription=端侧小模型新星molLM2 1.7B击败Qwen 2.5 1.5B和Llama 3.2 1B,可以在不到2GB的VRAM上运行。
# 应用场景
appScenario=推理,训练,对话问答,制造,广媒,金融,能源,医疗,家居,教育
# 框架类型
frameType=pytorch
# Pre-training
We use [nanotron](https://github.com/huggingface/nanotron/) library for training SmolLM and SmolLM2 base models.
The scripts for training SmolLM v1 can be found in the `smollm1` folder. SmolLM2 has a similar architecture and setup but uses an improved data mixture that we curated and significantly longer training periods (11 trillion tokens for the 1.7B, 4 trillion for the 360M and 2 trillion for the 135M). We will upload the SmolLM2 configs soon.
## Setup
Please refer to [nanotron](https://github.com/huggingface/nanotron/) for detailed instructions on setting up your training environment and launching jobs.
After setting up the environment and tokenizing the training datasets with [datatrove](https://github.com/huggingface/datatrove) (instructions available [here](https://github.com/huggingface/nanotron/blob/main/docs/nanoset.md#nanosets)), you can modify the configurations to match your number of nodes and local paths.
Below is an example of launching SmolLM1 135M training on 1 node (you can change the DP value to 8 in the config and adjust the batch size) and run:
```bash
git clone https://github.com/huggingface/nanotron
cd nanotron
# follow installation
CUDA_DEVICE_MAX_CONNECTIONS=1 torchrun --nproc_per_node=8 run_train.py --config-file smollm1/config_smollm1_135M.yaml
```
If you are working on a slurm cluster, you can modify the `launch.slurm` and launch the training with:
```bash
sbatch launch.slurm
```
> [!NOTE]
> Don't forget to create the logs directory before launching the job:
#!/bin/bash
#SBATCH --job-name=smollm1-135M
#SBATCH --nodes=4
#SBATCH --gres=gpu:8
#SBATCH --qos=high
#SBATCH --output=./logs/train-%j.out
#SBATCH --error=./logs/train-%j.err
set -e
TRAINER_PYTHON_FILE="run_train.py"
CONFIG_PATH_YAML="smollm1/config_smollm1_135M.yaml"
nvidia-smi
# Show some environment variables
echo python3 version = `python3 --version`
echo "Python path: $(which python3)"
echo "NCCL version: $(python -c "import torch;print(torch.cuda.nccl.version())")"
echo "CUDA version: $(python -c "import torch;print(torch.version.cuda)")"
echo "START TIME: $(date)"
secs_to_human() {
echo "$(( ${1} / 3600 )):$(( (${1} / 60) % 60 )):$(( ${1} % 60 ))"
}
start=$(date +%s)
echo "$(date -d @${start} "+%Y-%m-%d %H:%M:%S"): ${SLURM_JOB_NAME} start id=${SLURM_JOB_ID}\n"
# SLURM stuff
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
export MASTER_PORT=6000
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
export CUDA_DEVICE_MAX_CONNECTIONS="1"
echo "Number of nodes: $COUNT_NODE"
echo "Hostnames: $HOSTNAMES"
CMD=" $TRAINER_PYTHON_FILE \
--config-file $CONFIG_PATH_YAML \
"
export LAUNCHER="torchrun \
--nproc_per_node 8 \
--nnodes $COUNT_NODE \
--node_rank $SLURM_PROCID \
--role $SLURMD_NODENAME: \
--max_restarts 0 \
--tee 3 \
"
# Wait a random number between 0 and 1000 (milliseconds) to avoid too many concurrent requests to the hub
random_milliseconds=$(( RANDOM % 1001 ))
sleep_time=$(bc <<< "scale=3; $random_milliseconds / 1000")
echo "Sleeping for $sleep_time seconds..."
sleep $sleep_time
srun $SRUN_ARGS -u bash -c "$LAUNCHER --node_rank $SLURM_PROCID --role $SLURMD_NODENAME: $CMD"
echo "END TIME: $(date)"
\ No newline at end of file
# SmolLM1 135M trained on 600B tokens
checkpoints:
checkpoint_interval: 2000
checkpoints_path: checkpoints
checkpoints_path_is_shared_file_system: false
resume_checkpoint_path: null
save_final_state: false
save_initial_state: false
data_stages:
- data:
dataset:
dataset_folder: # paths to tokenized datasets
- datasets/fineweb-edu-dedup
- datasets/cosmopedia-v2
- datasets/python-edu
- datasets/open-web-math
- datasets/stackoverflow
dataset_weights:
- 0.7
- 0.15
- 0.08
- 0.06
- 0.01
num_loading_workers: 1
seed: 42
name: training stage
start_training_step: 1
general:
benchmark_csv_path: null
consumed_train_samples: null
ignore_sanity_checks: true
project: smollm
run: smollm-135M
seed: 8
step: null
logging:
iteration_step_info_interval: 1
log_level: info
log_level_replica: info
model:
ddp_bucket_cap_mb: 25
dtype: bfloat16
init_method:
std: 0.0416 # 1/sqrt(hidden_size)
make_vocab_size_divisible_by: 1
model_config:
bos_token_id: 0
eos_token_id: 0
hidden_act: silu
hidden_size: 576
initializer_range: 0.02
intermediate_size: 1536
is_llama_config: true
max_position_embeddings: 2048
num_attention_heads: 9
num_hidden_layers: 30
num_key_value_heads: 3
pad_token_id: null
pretraining_tp: 1
rms_norm_eps: 1.0e-05
rope_scaling: null
rope_theta: 10000.0
tie_word_embeddings: true
use_cache: true
vocab_size: 49152
optimizer:
accumulate_grad_in_fp32: true
clip_grad: 1.0
learning_rate_scheduler:
learning_rate: 0.003
lr_decay_starting_step: 250000
lr_decay_steps: 50000
lr_decay_style: 1-sqrt
lr_warmup_steps: 2500
lr_warmup_style: linear
min_decay_lr: 0
optimizer_factory:
adam_beta1: 0.9
adam_beta2: 0.95
adam_eps: 1.0e-08
name: adamW
torch_adam_is_fused: true
weight_decay: 0.01
zero_stage: 0
parallelism:
dp: 32 # 4 nodes
expert_parallel_size: 1
pp: 1
pp_engine: 1f1b
recompute_layer: false
tp: 1
tp_linear_async_communication: true
tp_mode: REDUCE_SCATTER
tp_recompute_allgather: true
profiler: null
tokenizer:
tokenizer_max_length: null
tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
tokenizer_revision: null
tokens:
batch_accumulation_per_replica: 2
limit_test_batches: 0
limit_val_batches: 0
micro_batch_size: 8 # GBS = 8*2*32*sequence_length = 512*sequence_length = 1M tokens
sequence_length: 2048
train_steps: 600000
val_check_interval: -1
\ No newline at end of file
# SmolLM1 1.7B trained on 1T tokens
checkpoints:
checkpoint_interval: 2000
checkpoints_path: checkpoints
checkpoints_path_is_shared_file_system: false
resume_checkpoint_path: null
save_final_state: false
save_initial_state: false
data_stages:
- data:
dataset:
dataset_folder: # paths to tokenized datasets
- datasets/fineweb-edu-dedup
- datasets/cosmopedia-v2
- datasets/open-web-math
- datasets/starcoderdata-python
- datasets/stackoverflow
dataset_weights:
- 0.7
- 0.15
- 0.06
- 0.08
- 0.01
num_loading_workers: 1
seed: 42
name: training stage
start_training_step: 1
- data:
dataset: # we change data mixture to use python-edu
dataset_folder:
- datasets/fineweb-edu-dedup
- datasets/cosmopedia-v2
- datasets/open-web-math
- datasets/python-edu
- datasets/stackoverflow
- datasets/deepmind_mathematics
dataset_weights:
- 0.7
- 0.15
- 0.055
- 0.08
- 0.01
- 0.005
num_loading_workers: 1
seed: 42
name: training stage 2
start_training_step: 300000
general:
benchmark_csv_path: null
consumed_train_samples: null
ignore_sanity_checks: true
project: smollm
run: smollm-1700M
seed: 8
step: null
logging:
iteration_step_info_interval: 1
log_level: info
log_level_replica: info
model:
ddp_bucket_cap_mb: 25
dtype: bfloat16
init_method:
std: 0.022097086912079608
make_vocab_size_divisible_by: 1
model_config:
bos_token_id: 0
eos_token_id: 0
hidden_act: silu
hidden_size: 2048
initializer_range: 0.02
intermediate_size: 8192
is_llama_config: true
max_position_embeddings: 2048
num_attention_heads: 32
num_hidden_layers: 24
num_key_value_heads: 32
pad_token_id: null
pretraining_tp: 1
rms_norm_eps: 1.0e-05
rope_scaling: null
rope_theta: 10000.0
tie_word_embeddings: true
use_cache: true
vocab_size: 49152
optimizer:
accumulate_grad_in_fp32: true
clip_grad: 1.0
learning_rate_scheduler:
learning_rate: 0.0005
lr_decay_starting_step: 400000
lr_decay_steps: 100000
lr_decay_style: 1-sqrt
lr_warmup_steps: 2000
lr_warmup_style: linear
min_decay_lr: 0
optimizer_factory:
adam_beta1: 0.9
adam_beta2: 0.95
adam_eps: 1.0e-08
name: adamW
torch_adam_is_fused: true
weight_decay: 0.01
zero_stage: 0
parallelism:
dp: 64 # 8 nodes
expert_parallel_size: 1
pp: 1
pp_engine: 1f1b
recompute_layer: false
tp: 1
tp_linear_async_communication: true
tp_mode: REDUCE_SCATTER
tp_recompute_allgather: true
profiler: null
tokenizer:
tokenizer_max_length: null
tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
tokenizer_revision: null
tokens:
batch_accumulation_per_replica: 4
limit_test_batches: 0
limit_val_batches: 0
micro_batch_size: 4 # GBS = 4*4*64*sequence_length = 1024*sequence_length = 2.1M tokens
sequence_length: 2048
train_steps: 500000
val_check_interval: -1
\ No newline at end of file
# SmolLM1 360M trained on 600B tokens
checkpoints:
checkpoint_interval: 2000
checkpoints_path: checkpoints
checkpoints_path_is_shared_file_system: false
resume_checkpoint_path: null
save_final_state: false
save_initial_state: false
data_stages:
- data:
dataset:
dataset_folder: # paths to tokenized datasets
- datasets/fineweb-edu-dedup
- datasets/cosmopedia-v2
- datasets/python-edu
- datasets/open-web-math
- datasets/stackoverflow
dataset_weights:
- 0.7
- 0.15
- 0.08
- 0.06
- 0.01
num_loading_workers: 1
seed: 42
name: training stage
start_training_step: 1
general:
benchmark_csv_path: null
consumed_train_samples: null
ignore_sanity_checks: true
project: smollm
run: smollm-360M
seed: 8
step: null
logging:
iteration_step_info_interval: 1
log_level: info
log_level_replica: info
model:
ddp_bucket_cap_mb: 25
dtype: bfloat16
init_method:
std: 0.03227486121839514
make_vocab_size_divisible_by: 1
model_config:
bos_token_id: 0
eos_token_id: 0
hidden_act: silu
hidden_size: 960
initializer_range: 0.02
intermediate_size: 2560
is_llama_config: true
max_position_embeddings: 2048
num_attention_heads: 15
num_hidden_layers: 32
num_key_value_heads: 5
pad_token_id: null
pretraining_tp: 1
rms_norm_eps: 1.0e-05
rope_scaling: null
rope_theta: 10000.0
tie_word_embeddings: true
use_cache: true
vocab_size: 49152
optimizer:
accumulate_grad_in_fp32: true
clip_grad: 1.0
learning_rate_scheduler:
learning_rate: 0.003
lr_decay_starting_step: 500000
lr_decay_steps: 100000
lr_decay_style: 1-sqrt
lr_warmup_steps: 5000
lr_warmup_style: linear
min_decay_lr: 0
optimizer_factory:
adam_beta1: 0.9
adam_beta2: 0.95
adam_eps: 1.0e-08
name: adamW
torch_adam_is_fused: true
weight_decay: 0.01
zero_stage: 0
parallelism:
dp: 32
expert_parallel_size: 1
pp: 1
pp_engine: 1f1b
recompute_layer: false
tp: 1
tp_linear_async_communication: true
tp_mode: REDUCE_SCATTER
tp_recompute_allgather: true
profiler: null
tokenizer:
tokenizer_max_length: null
tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
tokenizer_revision: null
tokens:
batch_accumulation_per_replica: 2
limit_test_batches: 0
limit_val_batches: 0
micro_batch_size: 8
sequence_length: 2048
train_steps: 600000
val_check_interval: -1
\ No newline at end of file
# smol-tools
A collection of lightweight AI-powered tools built with LLaMA.cpp and small language models. These tools are designed to run locally on your machine without requiring expensive GPU resources. They can also run offline, without any internet connection.
## Features
### SmolSummarizer
- Quick text summarization using SmolLM2-1.7B Instruct
- Maintains key points while providing concise summaries
- Able to reply to follow-up questions
### SmolRewriter
- Rewrites text to be more professional and approachable
- Maintains the original message's intent and key points
- Perfect for email and message drafting
### SmolAgent
- An AI agent that can perform various tasks through tool integration
- Built-in tools include:
- Weather lookup
- Random number generation
- Current time
- Web browser control
- Extensible tool system for adding new capabilities
## Installation
1. Clone the repository:
```bash
git clone https://github.com/huggingface/smollm.git
cd smollm/smol_tools
```
2. Install dependencies:
```bash
uv venv --python 3.11
source .venv/bin/activate
uv pip install -r requirements.txt
```
If you're on mac, and you don't have tkinter installed, you can install it with:
```bash
brew install python-tk@3.11
```
For linux, you can install it with:
```bash
sudo apt-get install python3-tk
```
On Windows, when you install python you need to check the option to also install the tkinter library.
## Usage
### GUI Demo
Run the Tkinter-based demo application:
```bash
python demo_tkinter.py
```
The demo provides a user-friendly interface with the following shortcuts:
- `F1`: Open SmolDraft interface
- `F2`: Summarize selected text
- `F5`: Open SmolChat interface
- `F10`: Open SmolAgent interface
### Programmatic Usage
```python
from smol_tools.summarizer import SmolSummarizer
from smol_tools.rewriter import SmolRewriter
from smol_tools.agent import SmolToolAgent
# Initialize tools
summarizer = SmolSummarizer()
rewriter = SmolRewriter()
agent = SmolToolAgent()
# Generate a summary
for summary in summarizer.process("Your text here"):
print(summary)
# Rewrite text
for improved in rewriter.process("Your text here"):
print(improved)
# Use the agent
for response in agent.process("What's the weather in London?"):
print(response)
```
## Models
The tools use the following models:
- SmolSummarizer: SmolLM2-1.7B Instruct
All models are quantized to 16-bit floating-point (F16) for efficient inference. Training was done on BF16, but in our tests, this format provides slower inference on Mac M-series chips.
## License
This project is licensed under the Apache License 2.0 - see the LICENSE file for details.
## Contributing
Contributions are welcome! Please feel free to submit a Pull Request.
\ No newline at end of file
import tkinter as tk
from tkmacosx import Button
import time
import threading
import queue
from pynput import keyboard
from smol_tools.summarizer import SmolSummarizer
from smol_tools.rewriter import SmolRewriter
from pynput.keyboard import Key, Controller
import pyperclip
from smol_tools.agent import SmolToolAgent
from smol_tools.chatter import SmolChatter
from smol_tools.titler import SmolTitler
import os
import getpass
class TextPopupApp:
def __init__(self, root):
self.root = root
self.root.withdraw() # Start with the window hidden
self.last_text = ""
self.active_popups = []
self.last_summary = ""
# Initialize tools
self.summarizer = SmolSummarizer()
self.rewriter = SmolRewriter()
self.titler = SmolTitler()
self.agent = SmolToolAgent()
self.chatter = SmolChatter()
self.keyboard_controller = Controller()
# Replace the keyboard listener with GlobalHotKeys
self.keyboard_listener = keyboard.GlobalHotKeys({
"<F1>": lambda: self.show_draft_input(
self.root.winfo_x(),
self.root.winfo_y(),
self.root.winfo_width()
),
"<F2>": self.generate_summary_from_selected_text,
"<F5>": self.show_chat_window,
"<F10>": self.show_agent_input,
})
self.keyboard_listener.start()
self.username = getpass.getuser() # Get system username
def generate_summary_from_selected_text(self):
selected_text = self.get_selected_text()
if selected_text:
# Directly generate summary instead of showing confirmation popup
self.generate_summary_direct(selected_text)
# New method to directly show summary window
def generate_summary_direct(self, text):
summary_popup = tk.Toplevel(self.root)
summary_popup.withdraw() # Hide the window initially
self.active_popups.append(summary_popup)
summary_popup.title("Summary Chat")
summary_popup.configure(bg='#f6f8fa')
# Set minimum window size
summary_popup.minsize(600, 600)
# Main container
container = tk.Frame(summary_popup, bg='#f6f8fa')
container.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)
# Chat display area (top)
chat_frame = tk.Frame(
container,
bg='white',
highlightbackground='#e1e4e8',
highlightthickness=1,
bd=0,
relief=tk.FLAT
)
chat_frame.pack(fill=tk.BOTH, expand=True, pady=(0, 10))
# Add chat display with scrollbar
chat_display = tk.Text(
chat_frame,
wrap=tk.WORD,
borderwidth=0,
highlightthickness=0,
bg='white',
fg='#24292e',
font=('Segoe UI', 12),
padx=15,
pady=12
)
scrollbar = tk.Scrollbar(chat_frame, command=chat_display.yview)
chat_display.configure(yscrollcommand=scrollbar.set)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
chat_display.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# Configure text tags
chat_display.tag_configure("assistant_name", foreground="#E57373", font=('Segoe UI', 12, 'bold'))
chat_display.tag_configure("user_name", foreground="#7986CB", font=('Segoe UI', 12, 'bold'))
chat_display.config(state='disabled')
# Input area (bottom)
input_container = tk.Frame(
container,
bg='white',
highlightbackground='#e1e4e8',
highlightthickness=1,
bd=0,
relief=tk.FLAT
)
input_container.pack(fill=tk.X)
# Text input
chat_input = tk.Text(
input_container,
height=3,
wrap=tk.WORD,
borderwidth=0,
highlightthickness=0,
bg='white',
fg='#24292e',
font=('Segoe UI', 12),
padx=15,
pady=12,
insertwidth=2, # Width of cursor
insertbackground='#0066FF', # Color of cursor matching our theme
insertofftime=500, # Cursor blink off time in milliseconds
insertontime=500 # Cursor blink on time in milliseconds
)
chat_input.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Send button
send_btn = Button(
input_container,
text="Ask Question",
command=lambda: self.process_summary_question(
text, chat_input.get("1.0", "end-1c").strip(),
chat_display, chat_input
),
font=('Segoe UI', 12),
bg='#0066FF',
fg='white',
activebackground='#0052CC',
activeforeground='white',
borderless=True,
focuscolor='',
height=32,
padx=15
)
send_btn.pack(side=tk.RIGHT, padx=15, pady=8)
# Bind Enter key
chat_input.bind("<Return>", lambda e: [
self.process_summary_question(
text, chat_input.get("1.0", "end-1c").strip(),
chat_display, chat_input
),
"break"
][1])
# Display initial summary request
preview = text[:100] + "..." if len(text) > 100 else text
self.update_summary_chat(chat_display, self.username, f"Please summarize this text: {preview}")
# Position window
summary_popup.update_idletasks()
popup_width = 600
popup_height = 600
# Get mouse position and screen dimensions
mouse_x = self.root.winfo_pointerx()
mouse_y = self.root.winfo_pointery()
screen_width = self.root.winfo_screenwidth()
screen_height = self.root.winfo_screenheight()
# Calculate position
x = min(max(mouse_x - popup_width//2, 0), screen_width - popup_width)
y = min(max(mouse_y - popup_height//2, 0), screen_height - popup_height)
summary_popup.geometry(f"{popup_width}x{popup_height}+{x}+{y}")
summary_popup.deiconify()
def summarize(input_text):
try:
# First message from the model
self.root.after(0, lambda: self.update_summary_chat(
chat_display, self.summarizer.name, ""))
current_response = ""
for output in self.summarizer.process(input_text):
# Only send the new part of the response
if output.startswith(current_response):
new_text = output[len(current_response):]
if new_text: # Only update if there's new text
current_response = output
self.root.after(0, lambda t=new_text: chat_display.config(state='normal') or
chat_display.insert("end-1c", t) or
chat_display.config(state='disabled'))
except Exception as e:
print(e)
threading.Thread(target=lambda: summarize(text), daemon=True).start()
def update_summary_chat(self, chat_display: tk.Text, sender: str, message: str):
"""Update the summary chat display with new message"""
chat_display.config(state='normal')
# Add the message with appropriate styling
chat_display.insert(tk.END, "\n") # Add spacing
chat_display.insert(tk.END, sender,
"assistant_name" if sender == self.summarizer.name else "user_name")
chat_display.insert(tk.END, f": {message}")
chat_display.see(tk.END)
chat_display.config(state='disabled')
def process_summary_question(self, original_text: str, question: str,
chat_display: tk.Text, chat_input: tk.Text):
"""Process a follow-up question about the summarized text"""
if not question.strip():
return
# Clear input
chat_input.delete("1.0", tk.END)
# Display user question
self.update_summary_chat(chat_display, self.username, question)
def process_question():
try:
# First message from the model
self.root.after(0, lambda: self.update_summary_chat(
chat_display, self.summarizer.name, ""))
current_response = ""
for output in self.summarizer.process(original_text, question=question):
# Only send the new part of the response
if output.startswith(current_response):
new_text = output[len(current_response):]
if new_text: # Only update if there's new text
current_response = output
self.root.after(0, lambda t=new_text: chat_display.config(state='normal') or
chat_display.insert("end-1c", t) or
chat_display.config(state='disabled'))
except Exception as e:
print(e)
threading.Thread(target=process_question, daemon=True).start()
def get_selected_text(self):
# Copy selected text to clipboard
with self.keyboard_controller.pressed(Key.cmd):
self.keyboard_controller.tap('c')
# Small delay to ensure clipboard is updated
time.sleep(0.1)
# Get text from clipboard
return pyperclip.paste()
def destroy_active_popups(self):
# Destroy all active popups
for popup in self.active_popups:
try:
popup.destroy()
except:
pass # Popup might already be destroyed
self.active_popups = []
def show_draft_input(self, summary_x, summary_y, summary_width):
draft_popup = tk.Toplevel(self.root)
draft_popup.withdraw() # Hide initially
self.active_popups.append(draft_popup)
draft_popup.title("Draft Reply")
draft_popup.configure(bg='#f6f8fa')
# Create frame for the two columns
columns_frame = tk.Frame(draft_popup, bg='#f6f8fa')
columns_frame.pack(expand=True, fill='both', padx=20, pady=20)
# Calculate required height based on summary content
num_lines = len(self.last_summary.split('\n'))
line_height = max(num_lines * 1.5, 15)
widget_height = min(line_height, 40)
# Column 1: Draft Input
input_frame = tk.Frame(
columns_frame,
bg='white',
highlightbackground='#e1e4e8',
highlightthickness=1,
bd=0,
relief=tk.FLAT
)
input_frame.pack(side=tk.LEFT, padx=5, fill='both', expand=True)
tk.Label(input_frame, text="Your Reply", bg='white', fg='#24292e', font=('Segoe UI', 12)).pack(padx=15, pady=(12,0), anchor='w')
text_input = tk.Text(
input_frame,
height=widget_height,
width=30,
wrap=tk.WORD,
borderwidth=0,
highlightthickness=0,
selectbackground='#e1e4e8',
selectforeground='#24292e',
insertwidth=2, # Width of cursor
insertbackground='#0066FF', # Color of cursor matching our theme
insertofftime=500, # Cursor blink off time in milliseconds
insertontime=500 # Cursor blink on time in milliseconds
)
text_input.pack(fill='both', expand=True, padx=15, pady=12)
text_input.config(bg='white', fg='#24292e', font=('Segoe UI', 12))
# Column 2: Improved Text
improved_frame = tk.Frame(
columns_frame,
bg='white',
highlightbackground='#e1e4e8',
highlightthickness=1,
bd=0,
relief=tk.FLAT
)
improved_frame.pack(side=tk.LEFT, padx=5, fill='both', expand=True)
tk.Label(improved_frame, text="Improved Reply", bg='white', fg='#24292e', font=('Segoe UI', 12)).pack(padx=15, pady=(12,0), anchor='w')
improved_text = tk.Text(
improved_frame,
height=widget_height,
width=30,
wrap=tk.WORD,
borderwidth=0,
highlightthickness=0,
selectbackground='#e1e4e8',
selectforeground='#24292e'
)
improved_text.pack(fill='both', expand=True, padx=15, pady=12)
improved_text.config(state='disabled', bg='white', fg='#586069', font=('Segoe UI', 12))
# Add Copy button using tkmacosx
copy_btn = Button(
improved_frame,
text="Copy",
command=lambda: pyperclip.copy(improved_text.get("1.0", "end-1c")),
font=('Segoe UI', 14),
bg='#0066FF',
fg='white',
activebackground='#0052CC',
activeforeground='white',
borderless=True,
focuscolor='',
padx=20,
pady=8,
cursor='hand2'
)
copy_btn.pack(pady=(0, 12))
# Add "Smol Improvement?" button using tkmacosx
improve_btn = Button(
input_frame,
text="Smol Improvement?",
command=lambda: self.generate_improved_text(
text_input.get("1.0", "end-1c"),
improved_text),
font=('Segoe UI', 14),
bg='#0066FF',
fg='white',
activebackground='#0052CC',
activeforeground='white',
borderless=True,
focuscolor='',
padx=20,
pady=8,
cursor='hand2'
)
improve_btn.pack(pady=(0, 12))
# Position window relative to the summary window's position
draft_popup.update_idletasks()
screen_width = draft_popup.winfo_screenwidth()
screen_height = draft_popup.winfo_screenheight()
popup_width = draft_popup.winfo_width()
if popup_width == 1:
popup_width = 800
popup_height = draft_popup.winfo_height()
# Calculate center point of the summary window
summary_center_x = summary_x + summary_width//2
# Center the new window on the same point, but ensure it stays on screen
new_x = max(min(summary_center_x - popup_width//2, screen_width - popup_width), 0)
# Position vertically based on screen space available
if summary_y > screen_height / 2:
new_y = max(summary_y - popup_height - 10, 0) # 10px gap above summary
else:
new_y = min(summary_y + 10, screen_height - popup_height) # 10px gap below summary
draft_popup.geometry(f"+{new_x}+{new_y}")
draft_popup.deiconify()
def generate_improved_text(self, text, improved_text_widget):
# Get reference to the improve button
improve_btn = improved_text_widget.master.master.children['!frame2'].children['!button']
# Disable the button and change text to show processing
improve_btn.configure(
state='disabled',
text="Generating...",
bg='#A8A8A8', # Grayed out color
)
# Update the improve function
improved_text_widget.config(state='normal')
improved_text_widget.delete("1.0", tk.END)
improved_text_widget.insert("1.0", "Generating improvement...")
improved_text_widget.config(state='disabled')
def improve(input_text):
try:
for output in self.rewriter.process(input_text):
self.root.after(0, lambda t=output: self.update_improved_text(improved_text_widget, t))
# Re-enable button and restore original state after generation is complete
self.root.after(0, lambda: improve_btn.config(
state='normal',
text="Copy",
bg='#0066FF'
))
except Exception as e:
# Make sure to re-enable button even if there's an error
self.root.after(0, lambda: improve_btn.config(
state='normal',
text="Copy",
bg='#0066FF'
))
raise e
threading.Thread(target=lambda: improve(text), daemon=True).start()
def update_improved_text(self, text_widget, new_text):
text_widget.config(state='normal')
text_widget.delete("1.0", tk.END)
text_widget.insert("1.0", new_text)
text_widget.config(state='disabled')
def show_agent_input(self):
# Create new popup for agent input
agent_popup = tk.Toplevel(self.root)
self.active_popups.append(agent_popup)
agent_popup.title("SmolAgent")
# Create input area
input_frame = tk.Frame(agent_popup)
input_frame.pack(padx=10, pady=5, fill='both', expand=True)
tk.Label(input_frame, text="What would you like me to do?").pack()
text_input = tk.Text(input_frame, height=4, width=50, wrap=tk.WORD)
text_input.pack(pady=5)
# Create output area
output_frame = tk.Frame(agent_popup)
output_frame.pack(padx=10, pady=5, fill='both', expand=True)
tk.Label(output_frame, text="Response:").pack()
output_text = tk.Text(output_frame, height=8, width=50, wrap=tk.WORD)
output_text.pack(pady=5)
output_text.config(state='disabled')
def process_agent_request():
query = text_input.get("1.0", "end-1c")
output_text.config(state='normal')
output_text.delete("1.0", tk.END)
output_text.insert("1.0", "Processing request...\n")
output_text.config(state='disabled')
def run_agent():
full_response = []
for response in self.agent.process(query):
full_response.append(response)
self.root.after(0, lambda t="\n".join(full_response): self.update_agent_output(output_text, t))
threading.Thread(target=run_agent, daemon=True).start()
# Add Submit button
submit_btn = tk.Button(agent_popup, text="Submit", command=process_agent_request)
submit_btn.pack(pady=5)
# Position window
agent_popup.update_idletasks()
screen_width = agent_popup.winfo_screenwidth()
screen_height = agent_popup.winfo_screenheight()
popup_width = agent_popup.winfo_width()
popup_height = agent_popup.winfo_height()
x = (screen_width - popup_width) // 2
y = (screen_height - popup_height) // 2
agent_popup.geometry(f"+{x}+{y}")
def update_agent_output(self, text_widget, new_text):
text_widget.config(state='normal')
text_widget.delete("1.0", tk.END)
text_widget.insert("1.0", new_text)
text_widget.config(state='disabled')
def show_chat_window(self):
chat_window = tk.Toplevel(self.root)
self.active_popups.append(chat_window)
chat_window.title("SmolChat")
# Configure the chat window to be resizable
chat_window.geometry("800x800")
chat_window.minsize(600, 600)
# Create split view with history panel
history_panel = tk.Frame(chat_window, width=200, padx=5, pady=10)
history_panel.pack(side=tk.LEFT, fill=tk.Y)
history_panel.pack_propagate(False) # Maintain width
# Create main chat area
main_frame = tk.Frame(chat_window, padx=10, pady=10)
main_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# Create chat display first
chat_display = tk.Text(main_frame, wrap=tk.WORD)
chat_display.pack(fill=tk.BOTH, expand=True)
chat_display.config(state='disabled')
# Add input field and send button
input_frame = tk.Frame(main_frame)
input_frame.pack(fill=tk.X, pady=(10, 0))
chat_input = tk.Text(input_frame, height=3, wrap=tk.WORD)
chat_input.pack(side=tk.LEFT, fill=tk.X, expand=True)
send_btn = tk.Button(input_frame, text="Send",
command=lambda: self.process_chat_message(
chat_input.get("1.0", "end-1c").strip(),
chat_display))
send_btn.pack(side=tk.RIGHT, padx=(10, 0))
# Bind Enter key to send message
chat_input.bind("<Return>", lambda e: [
self.process_chat_message(
chat_input.get("1.0", "end-1c").strip(),
chat_display),
"break" # Prevent the default newline behavior
][1])
# Now add the New Chat button (after chat_display is created)
new_chat_btn = tk.Button(history_panel, text="New Chat",
command=lambda: self.start_new_chat(chat_display))
new_chat_btn.pack(fill=tk.X, pady=(0, 10))
# Add listbox for chat history
history_label = tk.Label(history_panel, text="Previous Chats")
history_label.pack()
chat_listbox = tk.Listbox(history_panel, height=20)
chat_listbox.pack(fill=tk.BOTH, expand=True)
# Get and sort chats by modification time (newest first)
saved_chats = self.chatter.get_saved_chats()
sorted_chats = sorted(
saved_chats,
key=lambda x: os.path.getmtime(os.path.join("saved_chats", f"chat_{x}.json")),
reverse=True
)
# Populate chat history with sorted chats
for chat_id in sorted_chats:
chat_listbox.insert(tk.END, chat_id)
# Bind selection event
chat_listbox.bind('<<ListboxSelect>>',
lambda e: self.load_selected_chat(chat_listbox, chat_display))
# Add scrollbar to listbox
history_scrollbar = tk.Scrollbar(history_panel, command=chat_listbox.yview)
history_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
chat_listbox.config(yscrollcommand=history_scrollbar.set)
# Store references to UI elements that need to be disabled during chat
self.chat_controls = {
'listbox': chat_listbox,
'new_chat_btn': new_chat_btn
}
# Add text tags with softer colors
chat_display.tag_configure("assistant_name", foreground="#E57373") # Soft red
chat_display.tag_configure("user_name", foreground="#7986CB") # Soft blue
def load_selected_chat(self, listbox: tk.Listbox, chat_display: tk.Text):
selection = listbox.curselection()
if selection:
chat_id = listbox.get(selection[0])
self.chatter.load_chat(chat_id)
self.display_chat_history(chat_display)
def start_new_chat(self, chat_display):
# Only save the current chat if it has been modified since loading
if self.chatter.has_current_chat() and self.chatter.is_chat_modified():
# Get full chat history as a single string
chat_history = "\n".join([f"{msg.role}: {msg.content}"
for msg in self.chatter.get_chat_history()])
# If we're continuing an existing chat, use its ID
current_chat_id = self.chatter.get_current_chat_id()
if current_chat_id:
# Save using existing ID
self.chatter.save_current_chat(current_chat_id, overwrite=True)
else:
# Generate new title for new chat
summary = ""
for chunk in self.titler.process(chat_history):
summary = chunk
summary_title = summary[:50].strip().replace("/", "-").replace("\\", "-")
self.chatter.save_current_chat(summary_title, overwrite=True)
# Start new chat
self.chatter.start_new_chat()
# Clear and update display
self.display_chat_history(chat_display)
# Update the chat history listbox with sorted chats
listbox = self.chat_controls['listbox']
listbox.delete(0, tk.END)
saved_chats = self.chatter.get_saved_chats()
sorted_chats = sorted(
saved_chats,
key=lambda x: os.path.getmtime(os.path.join("saved_chats", f"chat_{x}.json")),
reverse=True
)
for chat_id in sorted_chats:
listbox.insert(tk.END, chat_id)
def process_chat_message(self, message: str, chat_display: tk.Text):
if not message.strip(): # Skip empty messages
return
# Disable chat controls while processing
self.chat_controls['listbox'].config(state='disabled')
self.chat_controls['new_chat_btn'].config(state='disabled')
chat_display.config(state='normal')
# Add extra newline before user message for spacing
chat_display.insert(tk.END, "") # Start new line
chat_display.insert(tk.END, self.username, "user_name") # Add colored username
chat_display.insert(tk.END, f": {message}\n") # Add message
chat_display.insert(tk.END, "\n") # Add spacing
chat_display.insert(tk.END, self.chatter.name, "assistant_name") # Add colored AI name
chat_display.insert(tk.END, ": ") # Add separator
# Clear the input field (get its reference from chat_display's master)
input_frame = chat_display.master.children['!frame']
chat_input = input_frame.children['!text']
chat_input.delete("1.0", tk.END)
# Initialize an empty string to store the full response
self.current_response = ""
chat_display.see(tk.END)
chat_display.config(state='disabled')
def chat_response():
try:
for chunk in self.chatter.process(message):
# Only send the new part of the response
if chunk.startswith(self.current_response):
new_text = chunk[len(self.current_response):]
if new_text: # Only update if there's new text
self.current_response = chunk
self.root.after(0, lambda t=new_text: self.update_chat_display(chat_display, t))
self.root.after(0, lambda t="\n\n": self.update_chat_display(chat_display, t))
finally:
# Re-enable chat controls after response is complete
self.root.after(0, self.enable_chat_controls)
threading.Thread(target=chat_response, daemon=True).start()
def enable_chat_controls(self):
"""Re-enable chat controls after response is complete"""
self.chat_controls['listbox'].config(state='normal')
self.chat_controls['new_chat_btn'].config(state='normal')
def update_chat_display(self, chat_display: tk.Text, new_text: str):
chat_display.config(state='normal')
chat_display.insert(tk.END, new_text)
chat_display.see(tk.END)
chat_display.config(state='disabled')
def display_chat_history(self, chat_display: tk.Text):
chat_display.config(state='normal')
chat_display.delete("1.0", tk.END)
# Configure text tags with softer colors
chat_display.tag_configure("assistant_name", foreground="#E57373") # Soft red
chat_display.tag_configure("user_name", foreground="#7986CB") # Soft blue
for message in self.chatter.get_chat_history():
if message.role == "user":
chat_display.insert(tk.END, "\n") # Add spacing
chat_display.insert(tk.END, self.username, "user_name") # Change "You" to username
chat_display.insert(tk.END, f": {message.content}\n")
else:
chat_display.insert(tk.END, "\n") # Add spacing
chat_display.insert(tk.END, self.chatter.name, "assistant_name")
chat_display.insert(tk.END, f": {message.content}\n")
chat_display.config(state='disabled')
chat_display.see(tk.END)
# Run the app
root = tk.Tk()
# Set default font size for all tkinter widgets
default_font = ('Segoe UI', 14) # Changed from TkDefaultFont to Segoe UI
root.option_add("*Font", default_font)
root.option_add("*Entry.Font", default_font)
root.option_add("*Text.Font", default_font)
root.option_add("*Button.Font", default_font)
root.option_add("*Label.Font", default_font)
app = TextPopupApp(root)
root.mainloop()
tkmacosx>=1.0.5
pynput>=1.7.7
llama-cpp-python>=0.3.1
pyperclip>=1.9.0
transformers>=4.46.2
pygments>=2.18.0
\ No newline at end of file
from .base import SmolTool
from typing import Generator, List, Dict, Any, Callable
import json
import re
from datetime import datetime
import random
from transformers import tool, CodeAgent
import requests
import webbrowser
@tool
def get_random_number_between(min: int, max: int) -> int:
"""
Gets a random number between min and max.
Args:
min: The minimum number.
max: The maximum number.
Returns:
A random number between min and max.
"""
return random.randint(min, max)
@tool
def get_weather(city: str) -> str:
"""
Returns the weather forecast for a given city.
Args:
city: The name of the city.
Returns:
A string with a mock weather forecast.
"""
url = 'https://wttr.in/{}?format=+%C,+%t'.format(city)
res = requests.get(url).text
return f"The weather in {city} is {res.split(',')[0]} with a high of {res.split(',')[1][:-2]} degrees Celsius."
@tool
def get_current_time() -> str:
"""
This is a tool that returns the current time.
It returns the current time as HH:MM.
"""
return f"The current time is {datetime.now().hour}:{datetime.now().minute}."
@tool
def open_webbrowser(url: str) -> str:
"""
This is a tool that opens a web browser to the given website.
If the user asks to open a website or a browser, you should use this tool.
Args:
url: The url to open.
"""
webbrowser.open(url)
return f"I opened {url.replace('https://', '').replace('www.', '')} in the browser."
class SmolToolAgent(SmolTool):
def __init__(self):
self.tools = [get_random_number_between, get_current_time, open_webbrowser, get_weather]
self.toolbox = {tool.name: tool for tool in self.tools}
self.json_code_agent = CodeAgent(tools=self.tools, llm_engine=self.llm_engine, system_prompt=self._get_system_prompt())
super().__init__(
model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
system_prompt=self._get_system_prompt(),
prefix_text=""
)
def llm_engine(self, messages, stop_sequences=["Task", "<|endoftext|>"]) -> str:
output = ""
for chunk in self.model.create_chat_completion(
messages=messages,
max_tokens=2048,
temperature=0.0,
top_p=1.0,
top_k=50,
repeat_penalty=1.0,
stream=True
):
content = chunk['choices'][0]['delta'].get('content')
if content:
if content in ["<end_action>", "<|endoftext|>"]:
break
output += content
return output
def _get_system_prompt(self) -> str:
return """You are an expert in composing functions. You are given a question and a set of possible functions.
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
If none of the functions can be used, point it out and refuse to answer.
If the given question lacks the parameters required by the function, also point it out.
You have access to the following tools:
<<tool_descriptions>>
<<managed_agents_descriptions>>
You can use imports in your code, but only from the following list of modules: <<authorized_imports>>
The output MUST strictly adhere to the following format, and NO other text MUST be included.
The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make the tool calls an empty list '[]'.
<tool_call>[
{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
... (more tool calls as required)
]</tool_call>"""
def _parse_response(self, text: str) -> List[Dict[str, Any]]:
pattern = r"<tool_call>(.*?)</tool_call>"
matches = re.findall(pattern, text, re.DOTALL)
if matches:
return json.loads(matches[0])
return text
def _call_tools(self, tool_calls: List[Dict[str, Any]]) -> List[str]:
tool_responses = []
for tool_call in tool_calls:
if tool_call["name"] in self.toolbox:
tool_responses.append(
self.toolbox[tool_call["name"]](**tool_call["arguments"])
)
else:
tool_responses.append(f"Tool {tool_call['name']} not found.")
return tool_responses
def process(self, text: str) -> Generator[str, None, None]:
response = self.json_code_agent.run(text, return_generated_code=True)
# Parse and execute the tool calls
try:
tool_calls = self._parse_response(response)
if tool_calls in [response, [], ""]:
yield response
return
tool_responses = self._call_tools(tool_calls)
except Exception as e:
print("error", e)
yield response
return
# Yield each tool response
for response in tool_responses:
yield str(response)
\ No newline at end of file
from abc import ABC, abstractmethod
from typing import Generator, List, Dict, Any, Union, Tuple
from llama_cpp import Llama
class SmolTool(ABC):
# Class-level cache for model instances
_model_cache: Dict[Tuple[str, str], Llama] = {}
def __init__(self, model_repo: str, model_filename: str, system_prompt: str, prefix_text: str = "", n_ctx: int = 8192):
self.system_prompt = system_prompt
self.prefix_text = prefix_text
# Create a cache key from the model repo and filename
cache_key = (model_repo, model_filename)
# Track if this is a new model load
is_new_model = cache_key not in self._model_cache
# Try to get the model from cache, or create and cache a new one
if is_new_model:
print(f"Loading model {model_filename} from {model_repo}...")
self._model_cache[cache_key] = Llama.from_pretrained(
repo_id=model_repo,
filename=model_filename,
n_ctx=n_ctx,
verbose=False
)
self.model = self._model_cache[cache_key]
# Only warm up for newly loaded models
if is_new_model:
self._warm_up()
def _warm_up(self):
"""Warm up the model with a test prompt"""
print(f"Warming up {self.__class__.__name__}...")
test_text = "This is a test message to warm up the model."
# Consume the generator to complete the warm-up
for _ in self.process(test_text):
pass
print(f"{self.__class__.__name__} ready!")
@abstractmethod
def process(self, text: str) -> Generator[str, None, None]:
"""Process the input text and yield results as they're generated"""
pass
def _create_chat_completion(
self,
messages: List[Dict[str, str]],
temperature: float = 0.4,
top_p: float = 0.9,
top_k: int = 50,
repeat_penalty: float = 1.2,
max_tokens: int = 256
) -> Generator[str, None, None]:
"""Helper method to create chat completions with standard parameters"""
output = ""
for chunk in self.model.create_chat_completion(
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repeat_penalty=repeat_penalty,
stream=True
):
content = chunk['choices'][0]['delta'].get('content')
if content:
if content in ["<end_action>", "<|endoftext|>"]:
break
output += content
yield output
\ No newline at end of file
from .base import SmolTool
from typing import Generator, List, Dict
from dataclasses import dataclass
from datetime import datetime
import json
import os
@dataclass
class ChatMessage:
role: str # "user" or "assistant"
content: str
timestamp: datetime
# Add methods to convert to/from dict for JSON serialization
def to_dict(self):
return {
'role': self.role,
'content': self.content,
'timestamp': self.timestamp.isoformat()
}
@classmethod
def from_dict(cls, data):
return cls(
role=data['role'],
content=data['content'],
timestamp=datetime.fromisoformat(data['timestamp'])
)
class SmolChatter(SmolTool):
def __init__(self):
self.chat_history: List[ChatMessage] = []
self.chat_archive: Dict[str, List[ChatMessage]] = {}
self.current_chat_id = None
self.chats_dir = "saved_chats"
self._original_chat_state = None # To track modifications
self.name = "SmolLM2-1.7B"
# Create chats directory if it doesn't exist
if not os.path.exists(self.chats_dir):
os.makedirs(self.chats_dir)
super().__init__(
model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
system_prompt="You are a helpful AI assistant named SmolLM, trained by Hugging Face..",
)
def start_new_chat(self):
"""Start a new chat with a unique ID"""
self.current_chat_id = datetime.now().strftime("%Y%m%d_%H%M%S")
self.chat_history = []
self._original_chat_state = None
def has_current_chat(self) -> bool:
"""Check if there are any messages in the current chat"""
return len(self.chat_history) > 0
def save_current_chat(self, title: str = None, overwrite: bool = False):
"""Save the current chat to disk if it has any messages"""
if not self.chat_history:
return
if title:
# If overwriting, use existing chat_id if it matches the title
if not overwrite or self.current_chat_id != title:
self.current_chat_id = title
elif not self.current_chat_id:
self.current_chat_id = datetime.now().strftime("%Y%m%d_%H%M%S")
# Convert chat history to serializable format
chat_data = {
'id': self.current_chat_id,
'messages': [msg.to_dict() for msg in self.chat_history]
}
# Save to file
filename = f"{self.chats_dir}/chat_{self.current_chat_id}.json"
with open(filename, 'w') as f:
json.dump(chat_data, f)
# Update original state to reflect saved state
self._original_chat_state = [msg.to_dict() for msg in self.chat_history]
def load_chat(self, chat_id: str):
"""Load a specific chat from disk"""
filename = f"{self.chats_dir}/chat_{chat_id}.json"
try:
with open(filename, 'r') as f:
data = json.load(f)
self.current_chat_id = data['id']
self.chat_history = [ChatMessage.from_dict(msg) for msg in data['messages']]
# Store original state for modification tracking
self._original_chat_state = [msg.to_dict() for msg in self.chat_history]
except FileNotFoundError:
print(f"Chat {chat_id} not found")
def is_chat_modified(self) -> bool:
"""Check if the current chat has been modified since loading"""
if self._original_chat_state is None:
# New chat that hasn't been saved yet
return len(self.chat_history) > 0
current_state = [msg.to_dict() for msg in self.chat_history]
return current_state != self._original_chat_state
def get_saved_chats(self) -> List[str]:
"""Get list of saved chat IDs"""
chats = []
for filename in os.listdir(self.chats_dir):
if filename.startswith('chat_') and filename.endswith('.json'):
chat_id = filename[5:-5] # Remove 'chat_' prefix and '.json' suffix
chats.append(chat_id)
return sorted(chats, reverse=True) # Most recent first
def _warm_up(self):
super()._warm_up()
self.clear_chat_history()
def process(self, text: str) -> Generator[str, None, None]:
# Add user message to history
self.chat_history.append(ChatMessage(
role="user",
content=text,
timestamp=datetime.now()
))
# Build messages including chat history
messages = [{"role": "system", "content": self.system_prompt}]
# Include last 5 messages for context
for msg in self.chat_history:
messages.append({"role": msg.role, "content": msg.content})
# Generate response
response = ""
for chunk in self._create_chat_completion(messages, max_tokens=1024):
response = chunk
yield chunk
# Add assistant's response to history
self.chat_history.append(ChatMessage(
role="assistant",
content=response,
timestamp=datetime.now()
))
def get_chat_history(self) -> List[ChatMessage]:
return self.chat_history
def clear_chat_history(self):
self.chat_history = []
def get_current_chat_id(self) -> str:
"""Get the ID of the current chat"""
return self.current_chat_id
\ No newline at end of file
from .base import SmolTool
from typing import Generator
class SmolRewriter(SmolTool):
def __init__(self):
super().__init__(
model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
system_prompt="You are an AI writing assistant. Your task is to rewrite the user's email to make it more professional and approachable while maintaining its main points and key message. Do not return any text other than the rewritten message.",
prefix_text="Rewrite the message below to make it more professional and approachable while maintaining its main points and key message. Do not add any new information or return any text other than the rewritten message\nThe message:"
)
def process(self, text: str) -> Generator[str, None, None]:
messages = [
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": f"{self.prefix_text}\n{text}"}
]
yield from self._create_chat_completion(messages, temperature=0.4, repeat_penalty=1.0, top_k=0, max_tokens=1024)
\ No newline at end of file
from .base import SmolTool
from typing import Generator, Optional
from dataclasses import dataclass
from datetime import datetime
from typing import List
@dataclass
class SummaryMessage:
role: str # "user" or "assistant"
content: str
timestamp: datetime
class SmolSummarizer(SmolTool):
def __init__(self):
self.name = "SmolLM2-1.7B"
super().__init__(
model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
system_prompt="Concisely summarize the main points of the input text in up to three sentences, focusing on key information and events.",
)
def process(self, text: str, question: Optional[str] = None) -> Generator[str, None, None]:
if question is None:
print("Summarizing text")
prompt = f"{self.prefix_text}\n{text}"
messages = [
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": prompt},
{"role": "assistant", "content": "This is a short summary of the text:"}
]
else:
print("Answering question")
prompt = f"Original text:\n{text}\n\nQuestion: {question}"
messages = [
{"role": "user", "content": prompt},
]
for chunk in self._create_chat_completion(messages, max_tokens=1024, temperature=0.1, top_p=0.9):
yield chunk
from .base import SmolTool
from typing import Generator
class SmolTitler(SmolTool):
def __init__(self):
super().__init__(
model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
system_prompt="",
prefix_text="Create a title for this conversation:",
)
def process(self, text: str) -> Generator[str, None, None]:
messages = [
{"role": "user", "content": f"{self.prefix_text}\n{text}"}
]
yield from self._create_chat_completion(messages, max_tokens=128, temperature=0.6, top_p=0.9, top_k=0, repeat_penalty=1.1)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment