// Normalize image to float32 - careful with pytorch .to(model.device, dtype=torch.float16) - this sometimes reduces precision (32>16>32), sometimes not
autobest_grid_size=uhd_find_best_resize(std::make_pair(grid_width,grid_height),scale_resolution,patch_size,allow_upscale);// (new line) => fixes conversion for make_tuple to make_pair
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
// res_imgs memory is being allocated here, previous allocations will be freed if found
std::vector<clip_image_u8_ptr>patches=divide_to_patches_u8(*temp,params.image_size);// prepare spatial sorted main patches of image_size each (336 in llava-1.6)