import pyarrow.dataset as ds
import pyarrow.fs as fs

# Create s3 filesystem
s3 = fs.S3FileSystem(access_key="<access_key>",
                     secret_key="<secret_key>",
                     region="<region>")

# Path to remote datalake
datalake_path = "css-parquet-test/AABBCCDD/CAN2_GnssSpeed"

# Create dateset containing multiple files using remote filesystem
ds_gnss_speed = ds.dataset(datalake_path, filesystem=s3, format="parquet")

# Read all files into memory
gnss_speed = ds_gnss_speed.to_table()