Commit 95f334fb authored by icecraft's avatar icecraft
Browse files

doc: remove dummy log

parent b50f742f
import concurrent.futures
import glob
import os
import threading
import fitz
......@@ -83,6 +80,7 @@ def process_pdf_batch(pdf_jobs, idx):
images.append(tmp)
return (idx, images)
def batch_build_dataset(pdf_paths, k, lang=None):
"""Process multiple PDFs by partitioning them into k balanced parts and
processing each part in parallel.
......@@ -122,9 +120,6 @@ def batch_build_dataset(pdf_paths, k, lang=None):
# Partition the jobs based on page countEach job has 1 page
partitions = partition_array_greedy(pdf_info, k)
for i, partition in enumerate(partitions):
print(f'Partition {i+1}: {len(partition)} pdfs')
# Process each partition in parallel
all_images_h = {}
......@@ -146,7 +141,6 @@ def batch_build_dataset(pdf_paths, k, lang=None):
for i, future in enumerate(concurrent.futures.as_completed(futures)):
try:
idx, images = future.result()
print(f'Partition {i+1} completed: processed {len(images)} images')
all_images_h[idx] = images
except Exception as e:
print(f'Error processing partition: {e}')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment