Source code for eodal.utils.stacking
"""
Stacking of `pandas.DataFrame` objects.
Copyright (C) 2022 Lukas Valentin Graf
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
from __future__ import annotations
import os
import glob
import pandas as pd
from pathlib import Path
from typing import Optional
[docs]
def stack_dataframes(
in_dir: Path,
search_pattern: str,
start_date: Optional[int] = None,
end_date: Optional[int] = None,
**kwargs,
) -> pd.DataFrame:
"""
stacks a list of pandas dataframes into a single big one
to allow for calculating multitemporal statistics and more
convenient handling of pixels and field polygons
:param in_dir:
directory in which to search for CSV files to be read into memory
:param search_pattern:
wild-card expression for searching for CSV files with pixel reflectance
values (e.g., '*10m.csv')
:param start_date:
start date in the format YYYYMMDD to use for filtering CSV files. If None
(Default), all files are stacked
:param end_date:
end date in the format YYYYMMDD to use for filtering CSV files. If None
(Default), all files are stacked
:param **kwargs:
keyword arguments to pass to pandas.read_csv()
"""
# get a list of all CSV files matching the search pattern
csv_files = glob.glob(str(in_dir.joinpath(search_pattern)))
# loop over files and read them into dataframes
all_df = []
for csv_file in csv_files:
if start_date is not None and end_date is not None:
date_file = int(os.path.basename(csv_file)[0:8])
if date_file < start_date or date_file > end_date:
continue
tmp_df = pd.read_csv(csv_file, **kwargs)
all_df.append(tmp_df)
# concat the obtained list of dataframes into a single one and return
return pd.concat(all_df)