import cdsapi
import pandas as pd
import xarray as xr
from pathlib import Path

###
# ECMWF CDSAPI credentials must be set in ~/.cdsapirc
# See instructions at https://cds.climate.copernicus.eu/how-to-api
###

# CDS API
CDS_AREA = [50.50, 13.75, 50.25, 14.00]
CDS_VARIABLES = [
    "surface_solar_radiation_downwards"
]

cdsapi_client = cdsapi.Client()

def download_from_cdsapi(years, months, days=None, download_path='./output.grib'):
    """
    Download ERA5 data from ECMWF CDS API
    """
    if days is None:
        days = [f"{day:02d}" for day in range(1, 32)]  # all days in a month

    if Path(download_path).exists():
        print(f"{download_path} already exists, skipping download")
        return download_path

    print(f"Downloading ERA5 into {download_path}")

    cdsapi_client.retrieve(
        'reanalysis-era5-single-levels',
        {
            'product_type': 'reanalysis',
            'variable': CDS_VARIABLES,
            'year': years,
            'month': months,
            'day': days,
            'time': [f"{hour:02d}:00" for hour in range(24)],  # all hours
            'format': 'grib',
            'area': CDS_AREA,
        },
        str(download_path)
    )

    print(f"Data downloaded into {download_path}")
    return download_path


def process_grib(grib_files, output_csv=None, latitude=50.25, longitude=13.75):
    """
    Process GRIB files and extract only relevant variables (valid_time, latitude, longitude, ssrd_j, ssrd_wh)
    """
    data_list = []

    for file in grib_files:
        print(f"Processing {file}...")

        # Open the GRIB file
        ds = xr.open_dataset(file, engine="cfgrib")

        # Extract the relevant variables
        try:
            df = ds[["ssrd", "latitude", "longitude"]].to_dataframe().reset_index()

            # Filter data for given latitude and longitude
            filtered_df = df[(df["latitude"] == latitude) & (df["longitude"] == longitude)]

            # Select only the relevant columns
            filtered_df = filtered_df[["valid_time", "latitude", "longitude", "ssrd"]]
            filtered_df = filtered_df.rename(columns={"ssrd": "ssrd_j"})

            # Replace missing values with 0
            filtered_df["ssrd_j"] = filtered_df["ssrd_j"].fillna(0)

            # Convert J/m2 to Wh/m2
            filtered_df["ssrd_wh"] = filtered_df["ssrd_j"] * 0.000277777778  # 1 J = 0.000277777778 Wh

            data_list.append(filtered_df)
        except KeyError as e:
            print(f"Error: Variable {e} not found in {file}")

    # Concat all data frames
    if data_list:
        full_data = pd.concat(data_list)
    else:
        print("No data processed. Exiting.")
        return

    if output_csv is not None:
        full_data.to_csv(output_csv, index=False)
        print(f"Data exported to {output_csv}")

    return full_data


if __name__ == "__main__":
    files = []

    for year in range(2015, 2025):
        for month in range(1, 13):
            if (year == 2024) and (month == 12):  # skip incomplete month since simulating in december 2024
                continue

            files.append(
                download_from_cdsapi(
                    years=[f"{year:04d}"], months=[f"{month:02d}"],
                    download_path=f"./gribs/{year:04d}_{month:02d}_output.grib"
                )
            )

    solar_data = process_grib(grib_files=files, output_csv="./solar_radiance_data.csv")
