""" Donut Copyright (c) 2022-present NAVER Corp. MIT License """ from collections import OrderedDict class TextReader: def __init__(self, path, cache_size=2 ** 28, block_size=2 ** 20): self.fp = open(path, "r", encoding="utf-8") self.length = 0 self.offsets = [0] self.cache = OrderedDict() self.cache_size = cache_size self.block_size = block_size self.bucket_size = cache_size // block_size self.idx = 0 while True: text = self.fp.read(self.block_size) if not text: break self.length += len(text) self.offsets.append(self.fp.tell()) def __len__(self): return self.length def __iter__(self): return self def __next__(self): char = self.get() self.next() return char def move(self, idx): self.idx = idx def next(self): self.idx = (self.idx + 1) % self.length def prev(self): self.idx = (self.idx - 1) % self.length def get(self): key = self.idx // self.block_size if key in self.cache: text = self.cache[key] else: if len(self.cache) >= self.bucket_size: self.cache.popitem(last=False) offset = self.offsets[key] self.fp.seek(offset, 0) text = self.fp.read(self.block_size) self.cache[key] = text self.cache.move_to_end(key) char = text[self.idx % self.block_size] return char