Commit 95f334fb authored by icecraft's avatar icecraft
Browse files

doc: remove dummy log

parent b50f742f
import concurrent.futures import concurrent.futures
import glob
import os
import threading
import fitz import fitz
...@@ -83,6 +80,7 @@ def process_pdf_batch(pdf_jobs, idx): ...@@ -83,6 +80,7 @@ def process_pdf_batch(pdf_jobs, idx):
images.append(tmp) images.append(tmp)
return (idx, images) return (idx, images)
def batch_build_dataset(pdf_paths, k, lang=None): def batch_build_dataset(pdf_paths, k, lang=None):
"""Process multiple PDFs by partitioning them into k balanced parts and """Process multiple PDFs by partitioning them into k balanced parts and
processing each part in parallel. processing each part in parallel.
...@@ -122,9 +120,6 @@ def batch_build_dataset(pdf_paths, k, lang=None): ...@@ -122,9 +120,6 @@ def batch_build_dataset(pdf_paths, k, lang=None):
# Partition the jobs based on page countEach job has 1 page # Partition the jobs based on page countEach job has 1 page
partitions = partition_array_greedy(pdf_info, k) partitions = partition_array_greedy(pdf_info, k)
for i, partition in enumerate(partitions):
print(f'Partition {i+1}: {len(partition)} pdfs')
# Process each partition in parallel # Process each partition in parallel
all_images_h = {} all_images_h = {}
...@@ -146,7 +141,6 @@ def batch_build_dataset(pdf_paths, k, lang=None): ...@@ -146,7 +141,6 @@ def batch_build_dataset(pdf_paths, k, lang=None):
for i, future in enumerate(concurrent.futures.as_completed(futures)): for i, future in enumerate(concurrent.futures.as_completed(futures)):
try: try:
idx, images = future.result() idx, images = future.result()
print(f'Partition {i+1} completed: processed {len(images)} images')
all_images_h[idx] = images all_images_h[idx] = images
except Exception as e: except Exception as e:
print(f'Error processing partition: {e}') print(f'Error processing partition: {e}')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment