Source code for eodal.metadata.sentinel2.utils
"""
Metadata filtering utilities for Sentinel-2 data
Copyright (C) 2022 Lukas Valentin Graf
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
from __future__ import annotations
import pandas as pd
from typing import Optional, Tuple
[docs]
def identify_updated_scenes(
metadata_df: pd.DataFrame, return_highest_baseline: Optional[bool] = True
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Returns those S2 entries in a pandas ``DataFrame`` retrieved from a query in
eodal's metadata base that originate from the same orbit and data take
but were processed by different PDGS Processing Baseline number (the 'Nxxxx'
in the ``product_uri`` entry in the scene metadata or .SAFE name).
:param metadata_df:
dataframe from metadata base query in which to search for mapper with
the same sensing date and data take but different baseline versions
:param return_highest_baseline:
if True (default) return those mapper with the highest baseline. Otherwise
return the baseline most products belong to
:return:
Tuple with two entries. The first entries contains a ``DataFrame`` with
those S2 mapper belonging to either the highest PDGS baseline or the most
common baseline version. The other "older" mapper are in the second
tuple item.
"""
# get a copy of the input to work with
metadata = metadata_df.copy()
# check product uri and extract the processing baseline
metadata["baseline"] = metadata.product_uri.apply(
lambda x: int(x.split("_")[3][1:4])
)
# get either the highest baseline version or the baseline most datasets
# belong to depending on the user input
if return_highest_baseline:
baseline_sel = metadata.baseline.unique().max()
else:
baseline_sel = metadata.baseline.mode()
# return only those data-set belonging to the selected baseline version
return (
metadata[metadata.baseline == baseline_sel],
metadata[metadata.baseline != baseline_sel],
)