pygndc Tutorial — Reading & Analyzing .gndc Files
pygndc (Geographic Neural Data Cube) is a Python SDK for working with neurally compressed geospatial time-series data. A single .gndc file with small size can replace hundreds of GeoTIFF files while retaining high reconstruction quality.
pip install pygndc
Optional dependencies for ecosystem integration:
pip install pygndc[xarray] # xarray support
pip install pygndc[all] # xarray + geopandas + scipy
device="cpu" when opening a file.
The recommended entry point is pygndc.open():
import pygndc
# Simple open
ds = pygndc.open("satellite.gndc")
# Use as context manager (auto-releases GPU memory on exit)
with pygndc.open("satellite.gndc") as ds:
frame = ds.read(t=0)
# ... do work ...
# GPU memory is freed here
# Force CPU (no GPU required)
ds = pygndc.open("satellite.gndc", device="cpu")
# Force pure-PyTorch backend (no tiny-cuda-nn needed)
ds = pygndc.open("satellite.gndc", backend="torch")
When you're done, always close the dataset to release GPU memory:
ds.close()
pygndc ships with a built-in GUI application — GeoNDC Neural Earth Viewer — for visually exploring .gndc files without writing any code.
# From Python
import pygndc
pygndc.start_viewer()
# From the command line
pygndc viewer -i satellite.gndc
.gndc file via the File menu. The viewer reconstructs frames on-the-fly using the neural model.Embed the viewer in your own Tkinter application:
import tkinter as tk
from pygndc.viewer import GNDCInteractiveViewer
root = tk.Tk()
root.geometry("1800x1200")
app = GNDCInteractiveViewer(root)
root.mainloop()
GNDCDataset exposes GDAL/rasterio-style metadata properties:
with pygndc.open("satellite.gndc") as ds:
# Quick overview
print(ds)
# GNDCDataset: satellite.gndc
# Shape: T=46, H=500, W=500, C=4
# CRS: EPSG:4326
# Bounds: (116.0, 39.0, 117.0, 40.0)
# Resolution: (0.002, 0.002)
# Compact view
repr(ds)
# <GNDCDataset 'satellite.gndc' (T=46, 500x500, 4bands) EPSG:4326>
| Property | Type | Description |
|---|---|---|
ds.shape | (T, H, W, C) | Full data dimensions |
ds.height, ds.width | int | Spatial dimensions |
ds.n_bands | int | Number of spectral bands |
ds.n_timestamps | int | Number of time steps |
ds.crs | rasterio.crs.CRS | Coordinate reference system |
ds.transform | Affine | Pixel-to-geo affine transform |
ds.bounds | BoundingBox | Geographic extent (left, bottom, right, top) |
ds.resolution | (res_x, res_y) | Pixel size in CRS units |
ds.timestamps | list[datetime] | Observation timestamps |
ds.nodata | float | NoData value (always NaN) |
ds.band_names | list[str] or None | Band names if available |
ds.has_mask | bool | Whether a validity mask exists |
ds.has_residuals | bool | Whether residual corrections exist |
ds.meta | dict | All metadata in one dictionary |
# Examples
print(f"CRS: {ds.crs}") # EPSG:4326
print(f"Bounds: {ds.bounds}") # BoundingBox(left=116.0, ...)
print(f"Resolution: {ds.resolution}") # (0.002, 0.002)
print(f"Time range: {ds.timestamps[0]} ~ {ds.timestamps[-1]}")
# Read the first frame — returns shape (H, W, C)
frame = ds.read(t=0)
# Read by date string (finds the nearest observed timestamp)
frame = ds.read(t="2024-06-15")
# Read by datetime object
from datetime import datetime
frame = ds.read(t=datetime(2024, 6, 15))
All read() calls return numpy arrays with shape (H, W, C) — channel-last, consistent with numpy/matplotlib conventions.
# Read only bands 0 and 3 (0-based indices)
frame = ds.read(t=0, bands=[0, 3]) # → (H, W, 2)
# Read a single band
red = ds.read(t=0, bands=[0]) # → (H, W, 1)
# Read a spatial sub-window: (col_off, row_off, width, height)
patch = ds.read(t=0, window=(100, 200, 50, 50)) # → (50, 50, C)
This is a neural compression superpower — unlike traditional formats, the neural model can reconstruct at any point in time, not just at observed timestamps:
# Reconstruct at an arbitrary date (no observed data needed)
frame = ds.read_at_time("2024-06-15")
frame = ds.read_at_time("2024-06-15 12:00:00")
# Also supports bands and window
frame = ds.read_at_time("2024-07-01", bands=[0, 1], window=(0, 0, 256, 256))
# Extract by geographic bounding box: (min_lon, min_lat, max_lon, max_lat)
region = ds.read_region(bbox=(116.3, 39.4, 116.6, 39.7), t=0) # → (h, w, C)
For many analysis tasks you only need one location's time series, not the whole image. pygndc provides efficient point queries that avoid reconstructing full frames.
# Single time step → (C,)
vals = ds.sample(lon=116.5, lat=39.9, t=0)
print(f"Band values: {vals}")
# Full time series → (T, C)
series = ds.sample(lon=116.5, lat=39.9)
print(f"Time series shape: {series.shape}") # (46, 4)
# Full time series for pixel (row=100, col=200) → (T, C)
series = ds.pixel_series(row=100, col=200)
pixel_series() is highly efficient: it builds a single (T, dims) tensor and runs one batched forward pass.
import matplotlib.pyplot as plt
series = ds.pixel_series(row=100, col=200) # (T, C)
dates = ds.timestamps
plt.figure(figsize=(10, 4))
for band in range(ds.n_bands):
plt.plot(dates, series[:, band], label=f"Band {band}")
plt.xlabel("Date")
plt.ylabel("Reflectance")
plt.legend()
plt.title("Pixel Time Series")
plt.tight_layout()
plt.show()
Compute vegetation indices directly from the dataset:
# NDVI — specify which bands are Red and NIR (0-based)
ndvi = ds.ndvi(t=0, red_band=0, nir_band=1) # → (H, W)
# EVI
evi = ds.evi(t=0, blue_band=0, green_band=1, nir_band=2) # → (H, W)
# You can also use the standalone functions
from pygndc import compute_ndvi, compute_ndwi, compute_ndmi
frame = ds.read(t=0)
ndvi = compute_ndvi(frame[:, :, 0], frame[:, :, 1])
Because data is stored as a neural network, spatial and temporal derivatives can be computed analytically via automatic differentiation — no finite-difference noise.
# Get both dx and dy
dx, dy = ds.gradient(t=0, mode="spatial") # each → (H, W, C)
# Or compute gradient magnitude directly
mag = ds.gradient(t=0, mode="mag") # → (H, W, C)
# NDVI velocity (1st temporal derivative)
velocity = ds.temporal_derivative(t=0, order=1, red_idx=0, nir_idx=1) # → (H, W)
# NDVI acceleration (2nd temporal derivative)
accel = ds.temporal_derivative(t=0, order=2, red_idx=0, nir_idx=1) # → (H, W)
# Single frame
ds.to_tif("output/frame_0.tif", t=0)
ds.to_tif("output/frame_june.tif", t="2024-06-15")
# Export all frames (with progress bar)
ds.to_tifs("output/all_frames/", progress=True)
# Get a rasterio MemoryFile — use it in any rasterio workflow
memfile = ds.to_rasterio(t=0)
with memfile.open() as src:
print(src.profile)
data = src.read() # rasterio convention: (C, H, W)
Requires pip install xarray:
# Full dataset
xds = ds.to_xarray()
print(xds)
# Partial (select time steps)
xds = ds.to_xarray(times=[0, 5, 10, 20])
The returned xarray.Dataset has proper time, y, x coordinates and CRS attributes.
arr = ds.to_numpy(t=0) # → (H, W, C), same as ds.read(t=0)
import matplotlib.pyplot as plt
with pygndc.open("satellite.gndc") as ds:
frame = ds.read(t=0)
# Display an RGB composite (assuming bands 2, 1, 0 are R, G, B)
rgb = frame[:, :, [2, 1, 0]]
rgb = (rgb - rgb.min()) / (rgb.max() - rgb.min()) # normalize to [0, 1]
plt.figure(figsize=(8, 8))
plt.imshow(rgb)
plt.title(f"RGB Composite — {ds.timestamps[0].strftime('%Y-%m-%d')}")
plt.axis("off")
plt.show()
with pygndc.open("satellite.gndc") as ds:
ndvi = ds.ndvi(t=0, red_band=0, nir_band=1)
plt.figure(figsize=(8, 8))
plt.imshow(ndvi, cmap="RdYlGn", vmin=-0.2, vmax=0.8)
plt.colorbar(label="NDVI")
plt.title("NDVI")
plt.axis("off")
plt.show()
pygndc provides a CLI for common operations:
# Show model info
pygndc info satellite.gndc
pygndc info satellite.gndc --json
# Decompress all frames to TIFs
pygndc decompress -i satellite.gndc -o output/
# Decompress with temporal interpolation (every 5 days)
pygndc decompress -i satellite.gndc -o output/ \
--start 2024-01-01 --end 2024-12-31 --interval 5
# Compute NDVI and save as TIF
pygndc ndvi -i satellite.gndc -o ndvi.tif \
--timestamp 0 --red-band 0 --nir-band 1
# Compute gradient magnitude
pygndc derivative -i satellite.gndc -o grad.tif \
--mode mag --timestamp 0
# Point query
pygndc sample -i satellite.gndc --lon 116.5 --lat 39.9
pygndc sample -i satellite.gndc --lon 116.5 --lat 39.9 --timestamp 0
# Export pixel time series to CSV
pygndc timeseries -i satellite.gndc \
--row 100 --col 200 -o timeseries.csv
The high-level GNDCDataset is built on top of GNDCReader. You can use the reader directly for lower-level control:
from pygndc import GNDCReader
reader = GNDCReader(backend="auto", device="cuda")
reader.load("satellite.gndc")
# Access raw metadata
meta = reader.dataset_meta
t_norms = meta['t_norms'] # normalized time values
data_range = meta['data_range'] # min/max for denormalization
# Reconstruct a frame (returns denormalized, masked numpy array)
frame = reader.reconstruct_single_frame(t_idx=0) # → (H, W, C)
# Reconstruct at arbitrary normalized time
frame = reader._reconstruct_arbitrary_time(t_norm=0.5)
# Efficient pixel time series
series = reader.reconstruct_pixel_series(row=100, col=200) # → (T, C)
# Spatial derivatives via autograd
grad = reader.compute_spatial_derivatives(target_t_norm=0.5, mode="mag")
# Export all as TIFs
reader.reconstruct_and_save_all("output/")
# Clean up
reader.reset()
import pygndc
with pygndc.open("satellite.gndc") as ds:
# Metadata
print(ds.crs, ds.bounds, ds.shape, ds.resolution)
# Read frames — all return (H, W, C)
frame = ds.read(t=0)
frame = ds.read(t="2024-06-15", bands=[0, 3])
frame = ds.read_at_time("2024-06-15 12:00:00") # continuous interpolation
region = ds.read_region(bbox=(116, 39, 117, 40), t=0)
# Point queries — no full-frame reconstruction needed
vals = ds.sample(lon=116.5, lat=39.9, t=0) # → (C,)
series = ds.sample(lon=116.5, lat=39.9) # → (T, C)
series = ds.pixel_series(row=100, col=200) # → (T, C)
# Vegetation indices
ndvi = ds.ndvi(t=0, red_band=0, nir_band=1) # → (H, W)
# Analytic gradients
dx, dy = ds.gradient(t=0, mode="spatial")
# Export
ds.to_tif("frame.tif", t=0)
ds.to_tifs("output/")
memfile = ds.to_rasterio(t=0)
xds = ds.to_xarray()