Source code for analysis.correlate

import anemoi as an
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy.odr.odrpack as odrpack

import warnings

def compare_sorted_df_columns(cols_1, cols_2):
    return sorted(cols_1) == sorted(cols_2)

[docs]def valid_ws_correlation_data(data, ref_ws_col='ref', site_ws_col='site'): """Perform checks on wind speed correlation data. :Parameters: data: DataFrame DataFrame with wind speed columns ref_ws_col and site_ws_col ref_ws_col: string, default 'ref' Reference anemometer data column to use. site_ws_col: string, default 'site' Site anemometer data column to use. """ if ref_ws_col == site_ws_col: raise ValueError("Error: Reference and site wind speed columns cannot have the same name.") return False if not compare_sorted_df_columns(data.columns.tolist(), [ref_ws_col, site_ws_col]): raise ValueError("Error: the correlation data don't match the expected format.") return False if not data.shape[0] > 6: warnings.warn("Warning: trying to correalate between less than six points.") return False if (data.loc[:, ref_ws_col] == data.loc[:, site_ws_col]).sum() == data.shape[0]: warnings.warn("Warning: it seems you are trying to correalate a single mast against itself.") return False return True
def return_correlation_results_frame(ref_label='ref', site_label='site'): results = pd.DataFrame(columns=['slope', 'offset', 'R2', 'uncert', 'points'], index=pd.MultiIndex.from_tuples([(ref_label, site_label)], names=['ref', 'site']) ) return results
[docs]def return_correlation_data_from_masts(ref_mast, site_mast): """Return a DataFrame of reference and site data for correlations. Will be extracted from each MetMast object using the primary anemometers and wind vanes. :Parameters: ref_mast: MetMast Anemoi MetMast object site_mast: MetMast Anemoi MetMast object :Returns: out: DataFrame with columns ref, site, and dir """ ref_data = ref_mast.return_primary_ano_vane_data() ref_data.columns = ['ref', 'dir'] site_data = site_mast.return_primary_ano_vane_data() site_data.columns = ['site', 'site_dir'] data = pd.concat([ref_data,], axis=1).dropna() data = data.loc[:, ['ref', 'site', 'dir']] if not valid_ws_correlation_data(data=data, ref_ws_col='ref', site_ws_col='site'): warning_string = "Warning: {} and {} don't seem to have valid concurrent data for a correlation.".format(, warnings.warn(warning_string) return data
[docs]def calculate_R2(data, ref_ws_col='ref', site_ws_col='site'): """Return a single R2 between two wind speed columns :Parameters: data: DataFrame DataFrame with wind speed columns ref_ws_col and site_ws_col ref_ws_col: string, default 'ref' Reference anemometer data column to use. site_ws_col: string, default 'site' Site anemometer data column to use. """ data = data.loc[:, [ref_ws_col, site_ws_col]].dropna() if not valid_ws_correlation_data(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): return np.nan r2 = data[ref_ws_col].corr(data[site_ws_col]) ** 2 return r2
[docs]def calculate_IEC_uncertainty(data, ref_ws_col='ref', site_ws_col='site'): """Calculate the IEC correlation uncertainty between two wind speed columns :Parameters: data: DataFrame DataFrame with wind speed columns ref_ws_col and site_ws_col ref_ws_col: string, default 'ref' Reference anemometer data column to use. site_ws_col: string, default 'site' Site anemometer data column to use. """ data = data.loc[:, [ref_ws_col, site_ws_col]].dropna() if not valid_ws_correlation_data(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): return np.nan X = data.loc[:, ref_ws_col].values Y = data.loc[:, site_ws_col].values uncert = np.std(Y / X) * 100 / len(X) return uncert * 100.0
[docs]def calculate_EDF_uncertainty(data, ref_ws_col='ref', site_ws_col='site'): """Calculate the EDF estimated correlation uncertainty between two wind speed columns. Assumes a correlation forced through the origin :Parameters: data: DataFrame DataFrame with wind speed columns ref_ws_col and site_ws_col ref_ws_col: string, default 'ref' Reference anemometer data column to use. site_ws_col: string, default 'site' Site anemometer data column to use. """ data = data.loc[:, [ref_ws_col, site_ws_col]].dropna() if not valid_ws_correlation_data(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): return np.nan X = data.loc[:, ref_ws_col].values Y = data.loc[:, site_ws_col].values Sxx = np.sum(X ** 2) Syy = np.sum(Y ** 2) Sxy = np.sum(X * Y) B = 0.5 * (Sxx - Syy) / Sxy SU = -B + np.sqrt(B ** 2 + 1) e2 = np.sum((Y - SU * X) ** 2) / (1 + SU ** 2) Xsi2 = e2 / (data.shape[0] - 1) uncert = np.sqrt( (Xsi2 * SU ** 2) * (Sxx * Sxy ** 2 + 0.25 * ((Sxx - Syy) ** 2) * Sxx) / ((B ** 2 + 1.0) * Sxy ** 4)) return uncert * 100.0
[docs]def ws_correlation_least_squares_model(data, ref_ws_col='ref', site_ws_col='site', force_through_origin=False): """Calculate the slope and offset between two wind speed columns using ordinary least squares regression. :Parameters: data: DataFrame DataFrame with wind speed columns ref and site, and direction data dir ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) :Returns: out: DataFrame slope, offset, R2, uncert, points """ data = data.loc[:, [ref_ws_col, site_ws_col]].dropna() results = return_correlation_results_frame(ref_label=ref_ws_col, site_label=site_ws_col) if not valid_ws_correlation_data(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): return results points = data.shape[0] R2 = calculate_R2(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col) uncert = calculate_IEC_uncertainty(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col) if force_through_origin: data.loc[:, 'offset'] = 0 else: data.loc[:, 'offset'] = 1 X = data.loc[:, [ref_ws_col, 'offset']].values Y = data.loc[:, site_ws_col].values slope, offset = np.linalg.lstsq(X, Y)[0] results.loc[pd.IndexSlice[ref_ws_col, site_ws_col], ['slope', 'offset', 'R2', 'uncert', 'points']] = np.array( [slope, offset, R2, uncert, points]) return results
def f_with_offset(B, x): return B[0] * x + B[1] def f_without_offset(B, x): return B[0] * x
[docs]def ws_correlation_orthoginal_distance_model(data, ref_ws_col='ref', site_ws_col='site', force_through_origin=False): """Calculate the slope and offset between two wind speed columns using orthoganal distance regression. :Parameters: data: DataFrame DataFrame with wind speed columns ref and site, and direction data dir ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) :Returns: out: DataFrame slope, offset, R2, uncert, points """ data = data.loc[:, [ref_ws_col, site_ws_col]].dropna().astype(np.float) results = return_correlation_results_frame(ref_label=ref_ws_col, site_label=site_ws_col) if not valid_ws_correlation_data(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): return results points = data.shape[0] R2 = calculate_R2(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col) uncert = calculate_IEC_uncertainty(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col) X = data.loc[:, ref_ws_col].values Y = data.loc[:, site_ws_col].values data_mean = data.mean() slope_estimate_via_ratio = data_mean[site_ws_col] / data_mean[ref_ws_col] realdata = odrpack.RealData(X, Y) if force_through_origin: linear = odrpack.Model(f_without_offset) odr = odrpack.ODR(realdata, linear, beta0=[slope_estimate_via_ratio]) slope =[0] offset = 0 else: linear = odrpack.Model(f_with_offset) odr = odrpack.ODR(realdata, linear, beta0=[slope_estimate_via_ratio, 0.0]) slope, offset =[0],[1] results.loc[pd.IndexSlice[ref_ws_col, site_ws_col], ['slope', 'offset', 'R2', 'uncert', 'points']] = np.array( [slope, offset, R2, uncert, points]) return results
[docs]def ws_correlation_robust_linear_model(data, ref_ws_col='ref', site_ws_col='site', force_through_origin=False): """Calculate the slope and offset between two wind speed columns using robust linear model. :Parameters: data: DataFrame DataFrame with wind speed columns ref and site, and direction data dir ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) :Returns: out: DataFrame slope, offset, R2, uncert, points """ data = data.loc[:, [ref_ws_col, site_ws_col]].dropna().astype(np.float) results = return_correlation_results_frame(ref_label=ref_ws_col, site_label=site_ws_col) if not valid_ws_correlation_data(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): return results points = data.shape[0] R2 = calculate_R2(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col) uncert = calculate_IEC_uncertainty(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col) X = data.loc[:, ref_ws_col].values Y = data.loc[:, site_ws_col].values if not force_through_origin: X = sm.add_constant(X) else: X = [np.zeros(X.shape[0]), X] X = np.column_stack(X) mod = sm.RLM(Y, X) resrlm = offset, slope = resrlm.params R2 = sm.WLS(mod.endog, mod.exog, results.loc[pd.IndexSlice[ref_ws_col, site_ws_col], ['slope', 'offset', 'R2', 'uncert', 'points']] = np.array( [slope, offset, R2, uncert, points]) return results
[docs]def ws_correlation_method(data, ref_ws_col='ref', site_ws_col='site', method='ODR', force_through_origin=False): """Calculate the slope and offset, for a given correlation method, between two wind speed columns. :Parameters: data: DataFrame DataFrame with wind speed columns ref and site, and direction data dir ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from method: string, default 'ODR' Correlation method to use. * Orthoginal distance regression: 'ODR' * Ordinary least squares: 'OLS' * Robust linear models: 'RLM' force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) :Returns: out: DataFrame slope, offset, R2, uncert, points """ if method == 'ODR': results = ws_correlation_orthoginal_distance_model(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col, force_through_origin=force_through_origin) elif method == 'OLS': results = ws_correlation_least_squares_model(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col, force_through_origin=force_through_origin) elif method == 'RLM': results = ws_correlation_robust_linear_model(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col, force_through_origin=force_through_origin) return results
[docs]def ws_correlation_binned_by_direction(data, ref_ws_col='ref', site_ws_col='site', ref_dir_col='dir', dir_sectors=16, method='ODR', force_through_origin=False): """Calculate the slope and offset, binned by direction, between two wind speed columns. :Parameters: data: DataFrame DataFrame with wind speed columns ref and site, and direction data dir ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from ref_dir_col: string, default None (primary wind vane assumed) Reference wind vane data to use. Extracted from dir_sectors: int, default 16 Number of equally spaced direction sectors method: string, default 'ODR' Correlation method to use. * Orthoginal distance regression: 'ODR' * Ordinary least squares: 'OLS' * Robust linear models: 'RLM' force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) :Returns: out: DataFrame slope, offset, R2, uncert, points """ data = data.loc[:, [ref_ws_col, site_ws_col, ref_dir_col]].dropna().astype(np.float) results = return_correlation_results_frame(ref_label=ref_ws_col, site_label=site_ws_col) dir_bins = np.arange(1, dir_sectors + 1) results = pd.concat([results] * dir_sectors, axis=0) results.index = pd.Index(dir_bins, name='dir_bin') data['dir_bin'] = an.analysis.wind_rose.append_dir_bin(data[ref_dir_col], dir_sectors=dir_sectors) for dir_bin in dir_bins: dir_bin_data = data.loc[data['dir_bin'] == dir_bin, [ref_ws_col, site_ws_col]] points = dir_bin_data.shape[0] if not valid_ws_correlation_data(data=dir_bin_data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): results.loc[dir_bin, 'points'] = points else: uncert = calculate_IEC_uncertainty(data=dir_bin_data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col) dir_bin_results = ws_correlation_method(data=dir_bin_data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col, method=method, force_through_origin=force_through_origin) results.loc[dir_bin, ['slope', 'offset', 'R2', 'uncert', 'points']] = dir_bin_results.values return results
[docs]def ws_correlation_binned_by_month(data, ref_ws_col='ref', site_ws_col='site', method='ODR', force_through_origin=False): """Calculate the slope and offset, binned by month, between two wind speed columns. :Parameters: data: DataFrame DataFrame with wind speed columns ref and site, and direction data dir ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from method: string, default 'ODR' Correlation method to use. * Orthoginal distance regression: 'ODR' * Ordinary least squares: 'OLS' * Robust linear models: 'RLM' force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) :Returns: out: DataFrame slope, offset, R2, uncert, points """ data = data.loc[:, [ref_ws_col, site_ws_col]].dropna().astype(np.float) results = return_correlation_results_frame(ref_label=ref_ws_col, site_label=site_ws_col) if not valid_ws_correlation_data(data=data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): return results months = np.arange(1, 13) results = pd.concat([results] * 12, axis=0) results.index = pd.Index(months, name='month') for month in months: monthly_data = data.loc[data.index.month == month, [ref_ws_col, site_ws_col]] points = monthly_data.shape[0] if not valid_ws_correlation_data(data=monthly_data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col): results.loc[month, 'points'] = points else: uncert = calculate_IEC_uncertainty(data=monthly_data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col) monthly_results = ws_correlation_method(data=monthly_data, ref_ws_col=ref_ws_col, site_ws_col=site_ws_col, method=method, force_through_origin=force_through_origin) results.loc[month, ['slope', 'offset', 'R2', 'uncert', 'points']] = monthly_results.values return results
### MAST CORRELATIONS ### ''' Basic outline is that for every correlate method you have to pass it reference and site mast objects along with the needed sensor names '''
[docs]def masts_10_minute(ref_mast, site_mast, ref_ws_col=None, site_ws_col=None, method='ODR', force_through_origin=False): """Calculate the slope and offset between two met masts. :Parameters: ref_mast: MetMast MetMast object site_mast: MetMast MetMast object ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from method: string, default 'ODR' Correlation method to use. * Orthoginal distance regression: 'ODR' * Ordinary least squares: 'OLS' * Robust linear models: 'RLM' force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) :Returns: out: DataFrame slope, offset, R2, uncert, points """ ref_ws_col = ref_mast.check_and_return_mast_ws_sensor(ref_ws_col) site_ws_col = site_mast.check_and_return_mast_ws_sensor(site_ws_col) ref_ws_data = ref_mast.return_sensor_data([ref_ws_col]) site_ws_data = site_mast.return_sensor_data([site_ws_col]) data = pd.concat([ref_ws_data, site_ws_data], axis=1, join='inner').dropna().astype(np.float) data.columns = ['ref', 'site'] results = return_correlation_results_frame(, valid_results = ws_correlation_method(data=data, ref_ws_col='ref', site_ws_col='site', method=method, force_through_origin=force_through_origin) results.loc[pd.IndexSlice[,], ['slope', 'offset', 'R2', 'uncert', 'points']] = valid_results.values return results
[docs]def masts_10_minute_by_direction(ref_mast, site_mast, ref_ws_col=None, ref_dir_col=None, site_ws_col=None, site_dir_col=None, method='ODR', force_through_origin=False, dir_sectors=16): """Calculate the slope and offset, binned by direction, between two met masts. :Parameters: ref_mast: MetMast MetMast object site_mast: MetMast MetMast object ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from ref_dir_col: string, default None (primary wind vane assumed) Reference anemometer data to use. Extracted from site_dir_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from method: string, default 'ODR' Correlation method to use. * Orthoginal distance regression: 'ODR' * Ordinary least squares: 'OLS' * Robust linear models: 'RLM' dir_sectors: int, default 16 Number of equally spaced direction sectors force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) :Returns: out: DataFrame slope, offset, R2, uncert, points """ ref_ws_col = ref_mast.check_and_return_mast_ws_sensor(ref_ws_col) ref_dir_col = ref_mast.check_and_return_mast_dir_sensor(ref_dir_col) site_ws_col = site_mast.check_and_return_mast_ws_sensor(site_ws_col) site_dir_col = site_mast.check_and_return_mast_dir_sensor(site_dir_col) ref_ws_data = ref_mast.return_sensor_data([ref_ws_col]) ref_dir_data = ref_mast.return_sensor_data([ref_dir_col]) site_ws_data = site_mast.return_sensor_data([site_ws_col]) data = pd.concat([ref_ws_data, site_ws_data, ref_dir_data], axis=1, join='inner').dropna().astype(np.float) data.columns = ['ref', 'site', 'dir'] results = ws_correlation_binned_by_direction(data, dir_sectors=dir_sectors, method=method, force_through_origin=force_through_origin) results = results.reset_index() results['ref'] = results['site'] = results = results.set_index(['ref', 'site', 'dir_bin']) return results
[docs]def masts_daily(ref_mast, site_mast, ref_ws_col=None, site_ws_col=None, method='ODR', force_through_origin=False, minimum_recovery_rate=0.7): """Calculate the slope and offset for daily data between two met masts. :Parameters: ref_mast: MetMast MetMast object site_mast: MetMast MetMast object ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from method: string, default 'ODR' Correlation method to use. * Orthoginal distance regression: 'ODR' * Ordinary least squares: 'OLS' * Robust linear models: 'RLM' force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) minimum_recovery_rate: float, default 0.7 Minimum allowable recovery rate until resampled data are excluded. For example, by defalt, when resampling 10-minute data to daily averages you would need at least 101 valid records to have a valid daily average. :Returns: out: DataFrame slope, offset, R2, uncert, points """ ref_ws_col = ref_mast.check_and_return_mast_ws_sensor(ref_ws_col) site_ws_col = site_mast.check_and_return_mast_ws_sensor(site_ws_col) ref_ws_data = ref_mast.return_sensor_data([ref_ws_col]) site_ws_data = site_mast.return_sensor_data([site_ws_col]) if minimum_recovery_rate > 1: minimum_recovery_rate = minimum_recovery_rate / 100.0 ref_data_daily_mean = an.utils.mast_data.resample_mast_data(ref_ws_data, freq='daily', minimum_recovery_rate=minimum_recovery_rate) site_data_daily_mean = an.utils.mast_data.resample_mast_data(site_ws_data, freq='daily', minimum_recovery_rate=minimum_recovery_rate) data_daily = pd.concat([ref_data_daily_mean, site_data_daily_mean], axis=1).dropna().astype(np.float) data_daily.columns = ['ref', 'site'] data_daily['dir'] = np.nan results = ws_correlation_method(data_daily, method=method, force_through_origin=force_through_origin) results.index = pd.MultiIndex.from_tuples([(,], names=['ref', 'site']) return results
[docs]def masts_daily_by_month(ref_mast, site_mast, ref_ws_col=None, site_ws_col=None, method='ODR', force_through_origin=False, minimum_recovery_rate=0.7): """Calculate the slope and offset for daily data, binned by month, between two met masts. :Parameters: ref_mast: MetMast MetMast object site_mast: MetMast MetMast object ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from method: string, default 'ODR' Correlation method to use. * Orthoginal distance regression: 'ODR' * Ordinary least squares: 'OLS' * Robust linear models: 'RLM' force_through_origin: boolean, default False Force the correlation through the origin (offset equal to zero) minimum_recovery_rate: float, default 0.7 Minimum allowable recovery rate until resampled data are excluded. For example, by defalt, when resampling 10-minute data to daily averages you would need at least 101 valid records to have a valid daily average. :Returns: out: DataFrame slope, offset, R2, uncert, points for each month """ ref_ws_col = ref_mast.check_and_return_mast_ws_sensor(ref_ws_col) site_ws_col = site_mast.check_and_return_mast_ws_sensor(site_ws_col) ref_ws_data = ref_mast.return_sensor_data([ref_ws_col]) site_ws_data = site_mast.return_sensor_data([site_ws_col]) if minimum_recovery_rate > 1: minimum_recovery_rate = minimum_recovery_rate / 100.0 ref_data_daily_mean = an.utils.mast_data.resample_mast_data(ref_ws_data, freq='daily', minimum_recovery_rate=minimum_recovery_rate) site_data_daily_mean = an.utils.mast_data.resample_mast_data(site_ws_data, freq='daily', minimum_recovery_rate=minimum_recovery_rate) data_daily = pd.concat([ref_data_daily_mean, site_data_daily_mean], axis=1).dropna().astype(np.float) data_daily.columns = ['ref', 'site'] data_daily['dir'] = np.nan results = ws_correlation_binned_by_month(data_daily, method='ODR', force_through_origin=force_through_origin) results = results.reset_index() results['ref'] = results['site'] = results = results.set_index(['ref', 'site', 'month']) return results
[docs]def apply_10min_results_by_direction(ref_mast, site_mast, corr_results, ref_ws_col=None, ref_dir_col=None, site_ws_col=None, splice=True): """Applies the slopes and offsets from a 10-minute correaltion, binned by direction, between two met masts. :Parameters: ref_mast: MetMast MetMast object site_mast: MetMast MetMast object corr_results: DataFrame slope, offset, R2, uncert, points for each direction sector ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from ref_dir_col: string, default None (primary vane assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from splice: Boolean, default True Returns site data where available and gap-fills any missing periods between the site mast and the reference mast's measurement period. Otherwise, returns purely sythesized data without taking into account the measured wind speeds. :Returns: out: time series DataFrame predicted wind speeds at the site """ ref_ws_col = ref_mast.check_and_return_mast_ws_sensor(ref_ws_col) ref_dir_col = ref_mast.check_and_return_mast_dir_sensor(ref_dir_col) site_ws_col = site_mast.check_and_return_mast_ws_sensor(site_ws_col) ref_ws_data = ref_mast.return_sensor_data([ref_ws_col]) ref_dir_data = ref_mast.return_sensor_data([ref_dir_col]) site_ws_data = site_mast.return_sensor_data([site_ws_col]) data = pd.concat([ref_ws_data, site_ws_data, ref_dir_data], axis=1, join='inner').dropna().astype(np.float) data.columns = ['ref', 'site', 'dir'] ref_data = pd.concat([ref_ws_data, ref_dir_data], axis=1, join='inner').dropna().astype(np.float) ref_data.columns = ['ref', 'dir'] ref_mast_name = site_mast_name = corr_to_apply = corr_results.loc[pd.IndexSlice[ref_mast_name, site_mast_name], :] ref_data['dir_bin'] = an.analysis.wind_rose.append_dir_bin(ref_data.dir, dir_sectors=corr_to_apply.shape[0]) ref_data['slope'] = ref_data['offset'] = syn = (ref_data.ref * ref_data.slope + ref_data.offset).to_frame('syn') syn_data = pd.concat(['site'), syn], axis=1) syn_data['syn_splice'] = syn_data.syn fill_index = syn_data.loc[fill_index, 'syn_splice'] = syn_data.loc[fill_index, 'site'] if splice: syn_data = syn_data.syn_splice.to_frame('syn') else: syn_data = syn_data.syn.to_frame('syn') return syn_data
[docs]def apply_daily_results_by_month_to_mast_data(mast_data, corr_results, ref_ws_col='ref', site_ws_col='site', splice=True): """Applies the slopes and offsets from a daily correaltion, binned by month, to a DataFrame of wind speed data. :Parameters: mast_data: DataFrame timeseries of wind speed data corr_results: DataFrame slope, offset, R2, uncert, points for each month ref_ws_col: string, default 'ref' Reference anemometer data to use. Extracted from mast_data DataFrame. site_ws_col: string, default 'site' Site anemometer data to use. Extracted from mast_data DataFrame splice: Boolean, default True Returns site data where available and gap-fills any missing periods between the site mast and the reference mast's measurement period. Otherwise, returns purely sythesized data without taking into account the measured wind speeds. :Returns: out: time series DataFrame predicted wind speeds at the site """ if corr_results.index.nlevels > 1: corr_results = corr_results.loc[ref_ws_col, :] corr_data = mast_data.loc[:, [ref_ws_col, site_ws_col]].dropna(how='all') corr_data['month'] = corr_data.index.month corr_data['slope'] = corr_data['offset'] = corr_data['syn'] = corr_data[ref_ws_col] * corr_data.slope + corr_data.offset corr_data['syn_splice'] = corr_data[site_ws_col] corr_data.loc[corr_data.syn_splice.isnull(), 'syn_splice'] = corr_data.loc[corr_data.syn_splice.isnull(), 'syn'] if splice: syn_data = corr_data.syn_splice.to_frame('syn') else: syn_data = corr_data.syn.to_frame('syn') return syn_data
[docs]def apply_daily_results_by_month(ref_mast, site_mast, corr_results, ref_ws_col=None, site_ws_col=None, splice=True): """Applies the slopes and offsets from a daily correaltion, binned by month, between two met masts. If the reference or site masts don't have daily time series the method resamples to daily frequency, requiring 70% data coverage within each day to be a valid day. :Parameters: ref_mast: MetMast MetMast object site_mast: MetMast MetMast object corr_results: DataFrame slope, offset, R2, uncert, points for each direction sector ref_ws_col: string, default None (primary anemometer assumed) Reference anemometer data to use. Extracted from ref_dir_col: string, default None (primary vane assumed) Reference anemometer data to use. Extracted from site_ws_col: string, default None (primary anemometer assumed) Site anemometer data to use. Extracted from splice: Boolean, default True Returns site data where available and gap-fills any missing periods between the site mast and the reference mast's measurement period. Otherwise, returns purely sythesized data without taking into account the measured wind speeds. :Returns: out: time series DataFrame predicted wind speeds at the site """ ref_ws_col = ref_mast.check_and_return_mast_ws_sensor(ref_ws_col) site_ws_col = site_mast.check_and_return_mast_ws_sensor(site_ws_col) ref_ws_data = ref_mast.return_sensor_data([ref_ws_col]) site_ws_data = site_mast.return_sensor_data([site_ws_col]) ref_mast_name = site_mast_name = data = pd.concat([ref_ws_data, site_ws_data], axis=1, join='inner').dropna().astype(np.float) data.columns = ['ref', 'site'] if site_mast.infer_time_step() != 'daily': site_data = an.utils.mast_data.resample_mast_data('site'), freq='daily').dropna() if ref_mast.infer_time_step() != 'daily': ref_data = an.utils.mast_data.resample_mast_data(data.ref.to_frame('ref'), freq='daily').dropna() corr_to_apply = corr_results.loc[pd.IndexSlice[ref_mast_name, site_mast_name], :] ref_data['month'] = ref_data.index.month ref_data['slope'] = ref_data['offset'] = syn = (ref_data.ref * ref_data.slope + ref_data.offset).to_frame('syn') syn_data = pd.concat([site_data, syn], axis=1) syn_data['syn_splice'] = syn_data.syn fill_index = syn_data.loc[fill_index, 'syn_splice'] = syn_data.loc[fill_index, 'site'] if splice: syn_data = syn_data.syn_splice.to_frame('syn') else: syn_data = syn_data.syn.to_frame('syn') return syn_data