loop-read-validator.sh 12.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
#!/usr/bin/env bash
#
# PreToolUse Hook: Validate Read access for RLCR loop files
#
# Blocks Claude from reading:
# - Wrong round's prompt/summary/contract files (outdated information)
# - Round files from wrong locations (not in .humanize/rlcr/)
# - Round files from old session directories
# - Todos files (should use native Task tools instead)
# - goal-tracker.md from old RLCR sessions
#

set -euo pipefail

# Load shared functions
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
source "$SCRIPT_DIR/lib/loop-common.sh"

# ========================================
# Parse Hook Input
# ========================================

HOOK_INPUT=$(cat)

# Validate JSON input structure
if ! validate_hook_input "$HOOK_INPUT"; then
    exit 1
fi

# Check for deeply nested JSON (potential DoS)
if is_deeply_nested "$HOOK_INPUT" 30; then
    exit 1
fi

TOOL_NAME="$VALIDATED_TOOL_NAME"

if [[ "$TOOL_NAME" != "Read" ]]; then
    exit 0
fi

# Require file_path for Read tool
if ! require_tool_input_field "$HOOK_INPUT" "file_path"; then
    exit 1
fi

FILE_PATH=$(echo "$HOOK_INPUT" | jq -r '.tool_input.file_path // ""')
FILE_PATH_LOWER=$(to_lower "$FILE_PATH")

# Extract session_id from hook input for session-aware loop filtering
HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT")

# ========================================
# Block Todos Files
# ========================================

if is_round_file_type "$FILE_PATH_LOWER" "todos"; then
    PROJECT_ROOT="$(resolve_project_root)" || exit 0
    LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr"
    LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")
    if [[ -z "$LOOP_DIR" ]] || ! is_allowlisted_file "$FILE_PATH" "$LOOP_DIR"; then
        todos_blocked_message "Read" >&2
        exit 2
    fi
fi

# ========================================
# Methodology Analysis Phase Read Restriction
# ========================================
# During methodology analysis, restrict reads of files within the loop
# directory to only the artifacts the analysis agent needs. This prevents
# project-specific information from leaking into the analysis report.
# Files outside the loop directory are allowed (Claude needs system files).
# This check MUST come before the summary/prompt early exit below,
# otherwise non-summary/prompt files in the loop dir escape restriction.

PROJECT_ROOT="${PROJECT_ROOT:-$(resolve_project_root 2>/dev/null || true)}"
[[ -z "$PROJECT_ROOT" ]] && exit 0
LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}"
# Use only the session-matched loop. Do NOT fall back to an unfiltered search,
# as that would incorrectly restrict unrelated sessions opened in the same repo.
# Limitation: Spawned agents (different session_id) are not restricted by hooks;
# their sanitization is enforced by the analysis prompt.
ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}"
_MA_CHECK_DIR="$ACTIVE_LOOP_DIR"

if [[ -n "$_MA_CHECK_DIR" ]]; then
    _MA_STATE=$(resolve_active_state_file "$_MA_CHECK_DIR")
    if [[ "$_MA_STATE" == *"/methodology-analysis-state.md" ]]; then
        # Canonicalize to prevent path traversal
        # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir
        _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "")
        if [[ -z "$_ma_real_path" ]]; then
            _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "")
            [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")"
        fi
        _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "")
        # Fallback to raw paths when realpath is unavailable (older macOS/BSD)
        # Ensure paths are absolute so prefix guards cannot be bypassed.
        # Reject paths with ".." segments to prevent traversal bypasses
        # when we cannot canonicalize (fail closed).
        if [[ -z "$_ma_real_path" ]]; then
            if [[ "$FILE_PATH" == *".."* ]]; then
                echo "# Read Blocked During Methodology Analysis

Path contains traversal segments that cannot be resolved without realpath." >&2
                exit 2
            fi
            # Fail closed if the file is a symlink we cannot resolve; the raw
            # path would skip the project-root prefix guard, allowing a symlink
            # outside the project to point back at restricted project content.
            if [[ -L "$FILE_PATH" ]]; then
                echo "# Read Blocked During Methodology Analysis

Path is a symlink that cannot be resolved without realpath." >&2
                exit 2
            fi
            if [[ "$FILE_PATH" == /* ]]; then
                _ma_real_path="$FILE_PATH"
            else
                _ma_real_path="$PROJECT_ROOT/$FILE_PATH"
            fi
        fi
        if [[ -z "$_ma_real_loop" ]]; then
            if [[ "$_MA_CHECK_DIR" == /* ]]; then
                _ma_real_loop="$_MA_CHECK_DIR"
            else
                _ma_real_loop="$PROJECT_ROOT/$_MA_CHECK_DIR"
            fi
        fi
        if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then
            _ma_basename=$(basename "$_ma_real_path")
            # Allowlist: only methodology artifacts (not raw development records).
            # Raw records (round-*-summary.md, round-*-review-result.md) are
            # intentionally excluded so the originating session cannot read
            # project-specific content and must rely solely on the sanitized
            # methodology-analysis-report.md for all user-facing output.
            # The spawned Opus agent reads raw records directly (not restricted
            # by hooks due to different session_id -- see limitation comment above).
            case "$_ma_basename" in
                methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md)
                    exit 0
                    ;;
                *)
                    echo "# Read Blocked During Methodology Analysis

Only methodology artifacts can be read from the loop directory during this phase.
Allowed: methodology-analysis-report.md, methodology-analysis-done.md, methodology-analysis-state.md" >&2
                    exit 2
                    ;;
            esac
        fi
        # Files within the project root are blocked (project-specific information)
        # Files outside the project root are allowed (system files, config, etc.)
        _ma_project_real=$(realpath "$PROJECT_ROOT" 2>/dev/null || echo "$PROJECT_ROOT")
        if [[ -n "$_ma_project_real" ]]; then
            _ma_path_check="${_ma_real_path:-$FILE_PATH}"
            if [[ "$_ma_path_check" == "$_ma_project_real/"* ]] || \
               [[ "$_ma_path_check" == "$PROJECT_ROOT/"* ]]; then
                echo "# Read Blocked During Methodology Analysis

Reading project files is not allowed during the methodology analysis phase.
Only methodology artifacts within the loop directory can be read.
Allowed: methodology-analysis-report.md, methodology-analysis-done.md, methodology-analysis-state.md" >&2
                exit 2
            fi
        fi
        exit 0
    fi
fi

# ========================================
# Check for Restricted RLCR Files
# ========================================

IS_GOAL_TRACKER=$(is_goal_tracker_path "$FILE_PATH_LOWER" && echo "true" || echo "false")
IS_ROUND_FILE=$(
    if is_round_file_type "$FILE_PATH_LOWER" "summary" || \
       is_round_file_type "$FILE_PATH_LOWER" "prompt" || \
       is_round_file_type "$FILE_PATH_LOWER" "contract"; then
        echo "true"
    else
        echo "false"
    fi
)

IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false")
if [[ "$IS_ROUND_FILE" != "true" ]] && ! { [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; }; then
    exit 0
fi

CLAUDE_FILENAME=$(basename "$FILE_PATH")

# ========================================
# Find Active Loop and Current Round
# ========================================

# Re-use ACTIVE_LOOP_DIR if already set by methodology analysis check above
ACTIVE_LOOP_DIR="${ACTIVE_LOOP_DIR:-${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}}"

if [[ -z "$ACTIVE_LOOP_DIR" ]]; then
    exit 0
fi

# Detect loop phase from state file
STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR")
IS_FINALIZE_PHASE=false
if [[ "$STATE_FILE_TO_PARSE" == *"/finalize-state.md" ]]; then
    IS_FINALIZE_PHASE=true
fi

# Parse state file using strict validation (fail closed on malformed state)
if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then
    echo "Error: Malformed state file, blocking operation for safety" >&2
    exit 1
fi
CURRENT_ROUND="$STATE_CURRENT_ROUND"

if [[ "$IS_FINALIZE_PHASE" == "true" ]] && is_round_file_type "$FILE_PATH_LOWER" "contract"; then
    finalize_contract_blocked_message "read" >&2
    exit 2
fi

# ========================================
# Validate Goal Tracker Path
# ========================================

if [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; then
    CORRECT_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md"
    NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH")
    NORMALIZED_CORRECT_PATH=$(_normalize_path "$CORRECT_PATH")

    if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_CORRECT_PATH" ]]; then
        FALLBACK="# Wrong Goal Tracker Path

Read the active loop goal tracker instead: {{CORRECT_PATH}}"
        load_and_render_safe "$TEMPLATE_DIR" "block/wrong-file-location.md" "$FALLBACK" \
            "FILE_PATH=$FILE_PATH" \
            "ACTIVE_LOOP_DIR=$ACTIVE_LOOP_DIR" \
            "CURRENT_ROUND=$CURRENT_ROUND" \
            "CORRECT_PATH=$CORRECT_PATH" >&2
        exit 2
    fi

    exit 0
fi

# ========================================
# Extract Round Number and File Type
# ========================================

CLAUDE_ROUND=$(extract_round_number "$CLAUDE_FILENAME")
if [[ -z "$CLAUDE_ROUND" ]]; then
    exit 0
fi

# Determine file type from filename
FILE_TYPE=""
if is_round_file_type "$FILE_PATH_LOWER" "summary"; then
    FILE_TYPE="summary"
elif is_round_file_type "$FILE_PATH_LOWER" "prompt"; then
    FILE_TYPE="prompt"
elif is_round_file_type "$FILE_PATH_LOWER" "contract"; then
    FILE_TYPE="contract"
fi

# ========================================
# Validate File Location
# ========================================

if [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then
    CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-${FILE_TYPE}.md"
    FALLBACK="# Wrong File Location

Reading {{FILE_PATH}} is blocked. Read from the active loop: {{ACTIVE_LOOP_DIR}}"
    load_and_render_safe "$TEMPLATE_DIR" "block/wrong-file-location.md" "$FALLBACK" \
        "FILE_PATH=$FILE_PATH" \
        "ACTIVE_LOOP_DIR=$ACTIVE_LOOP_DIR" \
        "CURRENT_ROUND=$CURRENT_ROUND" >&2
    exit 2
fi

# ========================================
# Validate Round Number
# ========================================

if [[ "$CLAUDE_ROUND" != "$CURRENT_ROUND" ]] && ! is_allowlisted_file "$FILE_PATH" "$ACTIVE_LOOP_DIR"; then
    FALLBACK="# Wrong Round File

You tried to read round-{{CLAUDE_ROUND}}-{{FILE_TYPE}}.md but current round is **{{CURRENT_ROUND}}**.

Read from: {{ACTIVE_LOOP_DIR}}"
    load_and_render_safe "$TEMPLATE_DIR" "block/wrong-round-file.md" "$FALLBACK" \
        "CLAUDE_ROUND=$CLAUDE_ROUND" \
        "FILE_TYPE=$FILE_TYPE" \
        "CURRENT_ROUND=$CURRENT_ROUND" \
        "ACTIVE_LOOP_DIR=$ACTIVE_LOOP_DIR" \
        "FILE_PATH=$FILE_PATH" >&2
    exit 2
fi

# ========================================
# Validate Directory Path
# ========================================

CORRECT_PATH="$ACTIVE_LOOP_DIR/$CLAUDE_FILENAME"

# Compare prefix-canonical forms -- see loop-write-validator.sh for the
# rationale; the same reasoning applies to read paths. A planted symlink
# at the leaf would otherwise let a Read follow the link outside the loop
# dir and still pass this validator.
_READ_FILE_REAL=$(canonicalize_path_prefix "$FILE_PATH")
_READ_CORRECT_REAL=$(canonicalize_path_prefix "$CORRECT_PATH")
if [[ "${_READ_FILE_REAL:-$FILE_PATH}" != "${_READ_CORRECT_REAL:-$CORRECT_PATH}" ]]; then
    FALLBACK="# Wrong Directory Path

You tried to {{ACTION}} {{FILE_PATH}} but the correct path is {{CORRECT_PATH}}"
    load_and_render_safe "$TEMPLATE_DIR" "block/wrong-directory-path.md" "$FALLBACK" \
        "ACTION=read" \
        "FILE_PATH=$FILE_PATH" \
        "CORRECT_PATH=$CORRECT_PATH" >&2
    exit 2
fi

exit 0