"docs/vscode:/vscode.git/clone" did not exist on "38e530d77e5a194d4e5f91356cc1a191207a3b29"
chat-persistent.sh 4.93 KB
Newer Older
mashun1's avatar
v1  
mashun1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash

set -euo pipefail

cd "$(dirname "$0")/.." || exit

if [[ -z "${PROMPT_CACHE_FILE+x}" || -z "${CHAT_SAVE_DIR+x}" ]]; then
    echo >&2 "error: PROMPT_CACHE_FILE and CHAT_SAVE_DIR must be provided"
    exit 1
fi

MODEL="${MODEL:-./models/llama-13b/ggml-model-q4_0.gguf}"
PROMPT_TEMPLATE="${PROMPT_TEMPLATE:-./prompts/chat.txt}"
USER_NAME="${USER_NAME:-User}"
AI_NAME="${AI_NAME:-ChatLLaMa}"
DATE_TIME="$(date +%H:%M)"
DATE_YEAR="$(date +%Y)"

LOG="${CHAT_SAVE_DIR}/main.log"
LOG_BG="${CHAT_SAVE_DIR}/main-bg.log"
CUR_PROMPT_FILE="${CHAT_SAVE_DIR}/current-prompt.txt"
CUR_PROMPT_CACHE="${CHAT_SAVE_DIR}/current-cache.bin"
NEXT_PROMPT_FILE="${CHAT_SAVE_DIR}/next-prompt.txt"
NEXT_PROMPT_CACHE="${CHAT_SAVE_DIR}/next-cache.bin"

SESSION_SIZE_MSG_PATTERN='main: session file matches [[:digit:]]+ / [[:digit:]]+'
SAMPLE_TIME_MSG_PATTERN='sample time =[[:space:]]+[[:digit:]]+.[[:digit:]]+ ms /[[:space:]]+[[:digit:]]+'
SED_DELETE_MESSAGES="/^(${USER_NAME}:|${AI_NAME}:|\\.\\.\\.)/,\$d"

CTX_SIZE=2048
CTX_ROTATE_POINT=$((CTX_SIZE * 3 / 5)) # REVIEW
OPTS=(--model "$MODEL" --ctx_size "$CTX_SIZE" --repeat_last_n 256 "$@")

# An unbuffered `tail -c+N`
skip_bytes() {
    LANG=C IFS= read -r -n "$1" -d '' c
    while LANG=C IFS= read -r -n 1 -d '' c; do
        printf '%s' "$c"
    done
}

mkdir -p "$CHAT_SAVE_DIR"
echo >"$LOG"
trap "tail -n100 ${LOG}" EXIT

if [[ ! -e "$CUR_PROMPT_FILE" ]]; then
    sed -e "s/\[\[USER_NAME\]\]/${USER_NAME}/g" \
        -e "s/\[\[AI_NAME\]\]/${AI_NAME}/g" \
        -e "s/\[\[DATE_TIME\]\]/${DATE_TIME}/g" \
        -e "s/\[\[DATE_YEAR\]\]/${DATE_YEAR}/g" \
        "$PROMPT_TEMPLATE" >"$CUR_PROMPT_FILE"
fi

if [[ ! -e "$NEXT_PROMPT_FILE" ]]; then
    sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE"
fi

if [[ "$(tail -c4 "$NEXT_PROMPT_FILE")" != "..." ]]; then
    echo '...' >>"$NEXT_PROMPT_FILE"
fi

if [[ ! -e "$PROMPT_CACHE_FILE" ]]; then
    echo 'Prompt cache does not exist, building...'
    # Default batch_size to 64 here for better user feedback during initial prompt processing
xuxzh1's avatar
init  
xuxzh1 committed
65
    ./llama-cli 2>>"$LOG" \
mashun1's avatar
v1  
mashun1 committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
        --batch_size 64 \
        "${OPTS[@]}" \
        --prompt-cache "$PROMPT_CACHE_FILE" \
        --file "$CUR_PROMPT_FILE" \
        --n_predict 1
    echo
    echo 'Done!'
fi

if [[ ! -e "$CUR_PROMPT_CACHE" ]]; then
    cp "$PROMPT_CACHE_FILE" "$CUR_PROMPT_CACHE"
fi
if [[ ! -e "$NEXT_PROMPT_CACHE" ]]; then
    cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE"
fi

printf '%s ' "$(< "$CUR_PROMPT_FILE")"
n_tokens=0

while read -e line; do
    # Limit generation to remaining context, with a buffer and estimating 2 chars/token for input
    n_predict=$((CTX_SIZE - n_tokens - ${#line} / 2 - 32))

    # Swap prompts when we're about to run out of context
    if ((n_predict <= 0)); then
        wait # for background main (below) to finish with next prompt
        mv "$NEXT_PROMPT_FILE"  "$CUR_PROMPT_FILE"
        mv "$NEXT_PROMPT_CACHE" "$CUR_PROMPT_CACHE"

        sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE"
        echo '...' >>"$NEXT_PROMPT_FILE"
        cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE"

        n_tokens=0
        n_predict=$((CTX_SIZE / 2))
    fi

    echo " ${line}" >>"$CUR_PROMPT_FILE"
    if ((n_tokens > CTX_ROTATE_POINT)); then
        echo " ${line}" >>"$NEXT_PROMPT_FILE"
    fi

    n_prompt_len_pre=$(($(wc -c <"$CUR_PROMPT_FILE")))

    printf '%s: ' "$AI_NAME" >>"$CUR_PROMPT_FILE"

xuxzh1's avatar
init  
xuxzh1 committed
112
    ./llama-cli 2>>"$LOG" "${OPTS[@]}" \
mashun1's avatar
v1  
mashun1 committed
113
114
115
116
117
            --prompt-cache "$CUR_PROMPT_CACHE" \
            --prompt-cache-all \
            --file "$CUR_PROMPT_FILE" \
            --reverse-prompt "${USER_NAME}:" \
            --n_predict "$n_predict" |
xuxzh1's avatar
init  
xuxzh1 committed
118
        skip_bytes 1 |                  # skip BOS token added by ./llama-cli
mashun1's avatar
v1  
mashun1 committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        tee "$CUR_PROMPT_FILE.tmp" |    # save prompt + generation to tmp file
        skip_bytes "$n_prompt_len_pre"  # print generation

    mv "$CUR_PROMPT_FILE.tmp" "$CUR_PROMPT_FILE"

    # if we hit n_predict instead of reverse-prompt, we need to add the prompt
    if [[ "$(tail -n1 "$CUR_PROMPT_FILE")" != "${USER_NAME}:" ]]; then
        printf '\n%s:' "$USER_NAME"
        printf '\n%s:' "$USER_NAME" >> "$CUR_PROMPT_FILE"
    fi

    printf ' '

    # HACK get num tokens from debug message
    # TODO get both messages in one go
    if  ! session_size_msg="$(tail -n30 "$LOG" | grep -oE "$SESSION_SIZE_MSG_PATTERN")" ||
        ! sample_time_msg="$(tail -n10 "$LOG" | grep -oE "$SAMPLE_TIME_MSG_PATTERN")"; then
xuxzh1's avatar
init  
xuxzh1 committed
136
        echo >&2 "Couldn't get number of tokens from ./llama-cli output!"
mashun1's avatar
v1  
mashun1 committed
137
138
139
140
141
142
143
144
145
146
        exit 1
    fi

    n_tokens=$(($(cut -d/ -f2 <<<"$session_size_msg") + $(cut -d/ -f2 <<<"$sample_time_msg")))

    if ((n_tokens > CTX_ROTATE_POINT)); then
        tail -c+$((n_prompt_len_pre + 1)) "$CUR_PROMPT_FILE" >>"$NEXT_PROMPT_FILE"
    fi

    # Update cache for next prompt in background, ideally during user input
xuxzh1's avatar
init  
xuxzh1 committed
147
    ./llama-cli >>"$LOG_BG" 2>&1 "${OPTS[@]}" \
mashun1's avatar
v1  
mashun1 committed
148
149
150
151
          --prompt-cache "$NEXT_PROMPT_CACHE" \
          --file "$NEXT_PROMPT_FILE" \
          --n_predict 1 &
done