GDAL Float to Binary Conversion

Recently, I had to frequently share deep learning model outputs with a client. However, with each dataset being several GB in size, this increasinly became a challenge. By default many outputs, such as binary segmentation results, are stored as floating point. Often, such data can be converted to an integer data type (e.g., uint8) without considerable loss in information. In my case, this reduced the data size to some MB. Because I needed this operation so frequently, I am archiving the relevant script in this post.

from osgeo import gdal
import os
import numpy as np


def write_raster(path, array, reference_dataset, data_type):
    driver = gdal.GetDriverByName("GTiff")
    out_dataset = driver.Create(path, array.shape[1], array.shape[0], 1, data_type, ['COMPRESS=DEFLATE'])
    out_dataset.SetGeoTransform(reference_dataset.GetGeoTransform())
    out_dataset.SetProjection(reference_dataset.GetProjection())
    out_dataset.GetRasterBand(1).WriteArray(array)
    out_dataset.FlushCache()
    out_dataset = None


source_dir = "path/to/original/tif/files/"
target_dir = "path/to/converted/tif/files/"


if not os.path.exists(target_dir):
   os.makedirs(target_dir)

   
files = os.listdir(source_dir)
files_tif = [file for file in files if file.endswith("tif")]


for file_tif in files_tif:
    file_path = source_dir + file_tif
    file_path_binary = target_dir + file_tif
    
    gdal_ds = gdal.Open(file_path)
    gdal_arr = gdal_ds.ReadAsArray()
    
    gdal_arr_binary = (gdal_arr > 0.5).astype(np.uint8)
    
    out_file_path = file_path_binary[:-4] + "_binary.tif"
    
    write_raster(out_file_path, gdal_arr_binary, gdal_ds, gdal.GDT_Byte)

Leave a Reply Cancel reply