diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py index 93d04f82b..f94bce9c3 100644 --- a/flixopt/clustering/base.py +++ b/flixopt/clustering/base.py @@ -49,6 +49,75 @@ def _select_dims(da: xr.DataArray, period: Any = None, scenario: Any = None) -> return da +def combine_slices( + slices: dict[tuple, np.ndarray], + extra_dims: list[str], + dim_coords: dict[str, list], + output_dim: str, + output_coord: Any, + attrs: dict | None = None, +) -> xr.DataArray: + """Combine {(dim_values): 1D_array} dict into a DataArray. + + This utility simplifies the common pattern of iterating over extra dimensions + (like period, scenario), processing each slice, and combining results. + + Args: + slices: Dict mapping dimension value tuples to 1D numpy arrays. + Keys are tuples like ('period1', 'scenario1') matching extra_dims order. + extra_dims: Dimension names in order (e.g., ['period', 'scenario']). + dim_coords: Dict mapping dimension names to coordinate values. + output_dim: Name of the output dimension (typically 'time'). + output_coord: Coordinate values for output dimension. + attrs: Optional DataArray attributes. + + Returns: + DataArray with dims [output_dim, *extra_dims]. + + Raises: + ValueError: If slices is empty. + KeyError: If a required key is missing from slices. + + Example: + >>> slices = { + ... ('P1', 'base'): np.array([1, 2, 3]), + ... ('P1', 'high'): np.array([4, 5, 6]), + ... ('P2', 'base'): np.array([7, 8, 9]), + ... ('P2', 'high'): np.array([10, 11, 12]), + ... } + >>> result = combine_slices( + ... slices, + ... extra_dims=['period', 'scenario'], + ... dim_coords={'period': ['P1', 'P2'], 'scenario': ['base', 'high']}, + ... output_dim='time', + ... output_coord=[0, 1, 2], + ... ) + >>> result.dims + ('time', 'period', 'scenario') + """ + if not slices: + raise ValueError('slices cannot be empty') + + first = next(iter(slices.values())) + n_output = len(first) + shape = [n_output] + [len(dim_coords[d]) for d in extra_dims] + data = np.empty(shape, dtype=first.dtype) + + for combo in np.ndindex(*shape[1:]): + key = tuple(dim_coords[d][i] for d, i in zip(extra_dims, combo, strict=True)) + try: + data[(slice(None),) + combo] = slices[key] + except KeyError: + raise KeyError(f'Missing slice for key {key} (extra_dims={extra_dims})') from None + + return xr.DataArray( + data, + dims=[output_dim] + extra_dims, + coords={output_dim: output_coord, **dim_coords}, + attrs=attrs or {}, + ) + + def _cluster_occurrences(cr: TsamClusteringResult) -> np.ndarray: """Compute cluster occurrences from ClusteringResult.""" counts = Counter(cr.cluster_assignments) @@ -266,143 +335,84 @@ def n_segments(self) -> int | None: @property def cluster_assignments(self) -> xr.DataArray: - """Build multi-dimensional cluster_assignments DataArray. + """Maps each original cluster to its typical cluster index. Returns: - DataArray with dims [original_cluster] or [original_cluster, period?, scenario?]. + DataArray with dims [original_cluster, period?, scenario?]. """ - if not self.dim_names: - # Simple case: no extra dimensions - # Note: Don't include coords - they cause issues when used as isel() indexer - return xr.DataArray( - np.array(self._results[()].cluster_assignments), - dims=['original_cluster'], - name='cluster_assignments', - ) - - # Multi-dimensional case - # Note: Don't include coords - they cause issues when used as isel() indexer - periods = self._get_dim_values('period') - scenarios = self._get_dim_values('scenario') - - return self._build_multi_dim_array( + # Note: No coords on original_cluster - they cause issues when used as isel() indexer + return self._build_property_array( lambda cr: np.array(cr.cluster_assignments), base_dims=['original_cluster'], - base_coords={}, # No coords on original_cluster - periods=periods, - scenarios=scenarios, name='cluster_assignments', ) @property def cluster_occurrences(self) -> xr.DataArray: - """Build multi-dimensional cluster_occurrences DataArray. + """How many original clusters map to each typical cluster. Returns: - DataArray with dims [cluster] or [cluster, period?, scenario?]. + DataArray with dims [cluster, period?, scenario?]. """ - if not self.dim_names: - return xr.DataArray( - _cluster_occurrences(self._results[()]), - dims=['cluster'], - coords={'cluster': range(self.n_clusters)}, - name='cluster_occurrences', - ) - - periods = self._get_dim_values('period') - scenarios = self._get_dim_values('scenario') - - return self._build_multi_dim_array( + return self._build_property_array( _cluster_occurrences, base_dims=['cluster'], base_coords={'cluster': range(self.n_clusters)}, - periods=periods, - scenarios=scenarios, name='cluster_occurrences', ) @property def cluster_centers(self) -> xr.DataArray: - """Which original period is the representative (center) for each cluster. + """Which original cluster is the representative (center) for each typical cluster. Returns: - DataArray with dims [cluster] containing original period indices. + DataArray with dims [cluster, period?, scenario?]. """ - if not self.dim_names: - return xr.DataArray( - np.array(self._results[()].cluster_centers), - dims=['cluster'], - coords={'cluster': range(self.n_clusters)}, - name='cluster_centers', - ) - - periods = self._get_dim_values('period') - scenarios = self._get_dim_values('scenario') - - return self._build_multi_dim_array( + return self._build_property_array( lambda cr: np.array(cr.cluster_centers), base_dims=['cluster'], base_coords={'cluster': range(self.n_clusters)}, - periods=periods, - scenarios=scenarios, name='cluster_centers', ) @property def segment_assignments(self) -> xr.DataArray | None: - """For each timestep within a cluster, which intra-period segment it belongs to. - - Only available if segmentation was configured during clustering. + """For each timestep within a cluster, which segment it belongs to. Returns: - DataArray with dims [cluster, time] or None if no segmentation. + DataArray with dims [cluster, time, period?, scenario?], or None if not segmented. """ - first = self._first_result - if first.segment_assignments is None: + if self._first_result.segment_assignments is None: return None - - if not self.dim_names: - # segment_assignments is tuple of tuples: (cluster0_assignments, cluster1_assignments, ...) - data = np.array(first.segment_assignments) - return xr.DataArray( - data, - dims=['cluster', 'time'], - coords={'cluster': range(self.n_clusters)}, - name='segment_assignments', - ) - - # Multi-dim case would need more complex handling - # For now, return None for multi-dim - return None + timesteps = self._first_result.n_timesteps_per_period + return self._build_property_array( + lambda cr: np.array(cr.segment_assignments), + base_dims=['cluster', 'time'], + base_coords={'cluster': range(self.n_clusters), 'time': range(timesteps)}, + name='segment_assignments', + ) @property def segment_durations(self) -> xr.DataArray | None: - """Duration of each intra-period segment in hours. - - Only available if segmentation was configured during clustering. + """Duration of each segment in timesteps. Returns: - DataArray with dims [cluster, segment] or None if no segmentation. + DataArray with dims [cluster, segment, period?, scenario?], or None if not segmented. """ - first = self._first_result - if first.segment_durations is None: + if self._first_result.segment_durations is None: return None + n_segments = self._first_result.n_segments - if not self.dim_names: - # segment_durations is tuple of tuples: (cluster0_durations, cluster1_durations, ...) - # Each cluster may have different segment counts, so we need to handle ragged arrays - durations = first.segment_durations - n_segments = first.n_segments - data = np.array([list(d) + [np.nan] * (n_segments - len(d)) for d in durations]) - return xr.DataArray( - data, - dims=['cluster', 'segment'], - coords={'cluster': range(self.n_clusters), 'segment': range(n_segments)}, - name='segment_durations', - attrs={'units': 'hours'}, - ) + def _get_padded_durations(cr: TsamClusteringResult) -> np.ndarray: + """Pad ragged segment durations to uniform shape.""" + return np.array([list(d) + [np.nan] * (n_segments - len(d)) for d in cr.segment_durations]) - return None + return self._build_property_array( + _get_padded_durations, + base_dims=['cluster', 'segment'], + base_coords={'cluster': range(self.n_clusters), 'segment': range(n_segments)}, + name='segment_durations', + ) @property def segment_centers(self) -> xr.DataArray | None: @@ -420,6 +430,59 @@ def segment_centers(self) -> xr.DataArray | None: # tsam's segment_centers may be None even with segments configured return None + @property + def position_within_segment(self) -> xr.DataArray | None: + """Position of each timestep within its segment (0-indexed). + + For each (cluster, time) position, returns how many timesteps into the + segment that position is. Used for interpolation within segments. + + Returns: + DataArray with dims [cluster, time] or [cluster, time, period?, scenario?]. + Returns None if no segmentation. + """ + segment_assignments = self.segment_assignments + if segment_assignments is None: + return None + + def _compute_positions(seg_assigns: np.ndarray) -> np.ndarray: + """Compute position within segment for each (cluster, time).""" + n_clusters, n_times = seg_assigns.shape + positions = np.zeros_like(seg_assigns) + for c in range(n_clusters): + pos = 0 + prev_seg = -1 + for t in range(n_times): + seg = seg_assigns[c, t] + if seg != prev_seg: + pos = 0 + prev_seg = seg + positions[c, t] = pos + pos += 1 + return positions + + # Handle extra dimensions by applying _compute_positions to each slice + extra_dims = [d for d in segment_assignments.dims if d not in ('cluster', 'time')] + + if not extra_dims: + positions = _compute_positions(segment_assignments.values) + return xr.DataArray( + positions, + dims=['cluster', 'time'], + coords=segment_assignments.coords, + name='position_within_segment', + ) + + # Multi-dimensional case: compute for each period/scenario slice + result = xr.apply_ufunc( + _compute_positions, + segment_assignments, + input_core_dims=[['cluster', 'time']], + output_core_dims=[['cluster', 'time']], + vectorize=True, + ) + return result.rename('position_within_segment') + # === Serialization === def to_dict(self) -> dict: @@ -468,58 +531,41 @@ def _get_dim_values(self, dim: str) -> list | None: idx = self._dim_names.index(dim) return sorted(set(k[idx] for k in self._results.keys())) - def _build_multi_dim_array( + def _build_property_array( self, get_data: callable, base_dims: list[str], - base_coords: dict, - periods: list | None, - scenarios: list | None, - name: str, + base_coords: dict | None = None, + name: str | None = None, ) -> xr.DataArray: - """Build a multi-dimensional DataArray from per-result data.""" - has_periods = periods is not None - has_scenarios = scenarios is not None - - slices = {} - if has_periods and has_scenarios: - for p in periods: - for s in scenarios: - slices[(p, s)] = xr.DataArray( - get_data(self._results[(p, s)]), - dims=base_dims, - coords=base_coords, - ) - elif has_periods: - for p in periods: - slices[(p,)] = xr.DataArray( - get_data(self._results[(p,)]), - dims=base_dims, - coords=base_coords, - ) - elif has_scenarios: - for s in scenarios: - slices[(s,)] = xr.DataArray( - get_data(self._results[(s,)]), - dims=base_dims, - coords=base_coords, - ) - - # Combine slices into multi-dimensional array - if has_periods and has_scenarios: - period_arrays = [] - for p in periods: - scenario_arrays = [slices[(p, s)] for s in scenarios] - period_arrays.append(xr.concat(scenario_arrays, dim=pd.Index(scenarios, name='scenario'))) - result = xr.concat(period_arrays, dim=pd.Index(periods, name='period')) - elif has_periods: - result = xr.concat([slices[(p,)] for p in periods], dim=pd.Index(periods, name='period')) - else: - result = xr.concat([slices[(s,)] for s in scenarios], dim=pd.Index(scenarios, name='scenario')) + """Build a DataArray property, handling both single and multi-dimensional cases.""" + base_coords = base_coords or {} + periods = self._get_dim_values('period') + scenarios = self._get_dim_values('scenario') - # Ensure base dims come first - dim_order = base_dims + [d for d in result.dims if d not in base_dims] - return result.transpose(*dim_order).rename(name) + # Build list of (dim_name, values) for dimensions that exist + extra_dims = [] + if periods is not None: + extra_dims.append(('period', periods)) + if scenarios is not None: + extra_dims.append(('scenario', scenarios)) + + # Simple case: no extra dimensions + if not extra_dims: + return xr.DataArray(get_data(self._results[()]), dims=base_dims, coords=base_coords, name=name) + + # Multi-dimensional: stack data for each combination + first_data = get_data(next(iter(self._results.values()))) + shape = list(first_data.shape) + [len(vals) for _, vals in extra_dims] + data = np.empty(shape, dtype=first_data.dtype) # Preserve dtype + + for combo in np.ndindex(*[len(vals) for _, vals in extra_dims]): + key = tuple(extra_dims[i][1][idx] for i, idx in enumerate(combo)) + data[(...,) + combo] = get_data(self._results[key]) + + dims = base_dims + [dim_name for dim_name, _ in extra_dims] + coords = {**base_coords, **{dim_name: vals for dim_name, vals in extra_dims}} + return xr.DataArray(data, dims=dims, coords=coords, name=name) @staticmethod def _key_to_str(key: tuple) -> str: @@ -847,7 +893,8 @@ def expand_data( """Expand aggregated data back to original timesteps. Uses the timestep_mapping to map each original timestep to its - representative value from the aggregated data. + representative value from the aggregated data. Fully vectorized using + xarray's advanced indexing - no loops over period/scenario dimensions. Args: aggregated: DataArray with aggregated (cluster, time) or (time,) dimension. @@ -859,63 +906,31 @@ def expand_data( if original_time is None: original_time = self.original_timesteps - timestep_mapping = self.timestep_mapping - has_cluster_dim = 'cluster' in aggregated.dims + timestep_mapping = self.timestep_mapping # Already multi-dimensional DataArray - # For segmented systems, the time dimension size is n_segments, not timesteps_per_cluster. - # The timestep_mapping uses timesteps_per_cluster for creating indices, but when - # indexing into aggregated data with (cluster, time) shape, we need the actual - # time dimension size. - if has_cluster_dim and self.is_segmented and self.n_segments is not None: - time_dim_size = self.n_segments + if 'cluster' not in aggregated.dims: + # No cluster dimension: use mapping directly as time index + expanded = aggregated.isel(time=timestep_mapping) else: - time_dim_size = self.timesteps_per_cluster - - def _expand_slice(mapping: np.ndarray, data: xr.DataArray) -> np.ndarray: - """Expand a single slice using the mapping.""" - if has_cluster_dim: - cluster_ids = mapping // time_dim_size - time_within = mapping % time_dim_size - return data.values[cluster_ids, time_within] - return data.values[mapping] - - # Simple case: no period/scenario dimensions - extra_dims = [d for d in timestep_mapping.dims if d != 'original_time'] - if not extra_dims: - expanded_values = _expand_slice(timestep_mapping.values, aggregated) - return xr.DataArray( - expanded_values, - coords={'time': original_time}, - dims=['time'], - attrs=aggregated.attrs, - ) + # Has cluster dimension: compute cluster and time indices from mapping + # For segmented systems, time dimension is n_segments, not timesteps_per_cluster + if self.is_segmented and self.n_segments is not None: + time_dim_size = self.n_segments + else: + time_dim_size = self.timesteps_per_cluster - # Multi-dimensional: expand each slice and recombine - dim_coords = {d: list(timestep_mapping.coords[d].values) for d in extra_dims} - expanded_slices = {} - for combo in np.ndindex(*[len(v) for v in dim_coords.values()]): - selector = {d: dim_coords[d][i] for d, i in zip(extra_dims, combo, strict=True)} - mapping = _select_dims(timestep_mapping, **selector).values - data_slice = ( - _select_dims(aggregated, **selector) if any(d in aggregated.dims for d in selector) else aggregated - ) - expanded_slices[tuple(selector.values())] = xr.DataArray( - _expand_slice(mapping, data_slice), - coords={'time': original_time}, - dims=['time'], - ) + cluster_indices = timestep_mapping // time_dim_size + time_indices = timestep_mapping % time_dim_size - # Concatenate along extra dimensions - result_arrays = expanded_slices - for dim in reversed(extra_dims): - dim_vals = dim_coords[dim] - grouped = {} - for key, arr in result_arrays.items(): - rest_key = key[:-1] if len(key) > 1 else () - grouped.setdefault(rest_key, []).append(arr) - result_arrays = {k: xr.concat(v, dim=pd.Index(dim_vals, name=dim)) for k, v in grouped.items()} - result = list(result_arrays.values())[0] - return result.transpose('time', ...).assign_attrs(aggregated.attrs) + # xarray's advanced indexing handles broadcasting across period/scenario dims + expanded = aggregated.isel(cluster=cluster_indices, time=time_indices) + + # Clean up: drop coordinate artifacts from isel, then rename original_time -> time + # The isel operation may leave 'cluster' and 'time' as non-dimension coordinates + expanded = expanded.drop_vars(['cluster', 'time'], errors='ignore') + expanded = expanded.rename({'original_time': 'time'}).assign_coords(time=original_time) + + return expanded.transpose('time', ...).assign_attrs(aggregated.attrs) def build_expansion_divisor( self, @@ -930,6 +945,8 @@ def build_expansion_divisor( For each original timestep, returns the number of original timesteps that map to the same (cluster, segment) - i.e., the segment duration in timesteps. + Fully vectorized using xarray's advanced indexing - no loops over period/scenario. + Args: original_time: Original time coordinates. Defaults to self.original_timesteps. @@ -943,88 +960,24 @@ def build_expansion_divisor( if original_time is None: original_time = self.original_timesteps - n_original = self.n_original_clusters - timesteps_per_cluster = self.timesteps_per_cluster - cluster_assignments = self.cluster_assignments # Maps original clusters to typical clusters + timestep_mapping = self.timestep_mapping # Already multi-dimensional + segment_durations = self.results.segment_durations # [cluster, segment, period?, scenario?] - def _build_divisor_for_result( - assignments: np.ndarray, - clustering_result: TsamClusteringResult, - ) -> np.ndarray: - """Build divisor for a single period/scenario using its clustering result.""" - n_timesteps = n_original * timesteps_per_cluster - divisor = np.empty(n_timesteps) + # Decode cluster and segment indices from timestep_mapping + # For segmented systems, encoding is: cluster_id * n_segments + segment_idx + time_dim_size = self.n_segments + cluster_indices = timestep_mapping // time_dim_size + segment_indices = timestep_mapping % time_dim_size # This IS the segment index - # Get segment info from the specific clustering result - # segment_durations: tuple of tuples, one per cluster - # segment_assignments: tuple of tuples, one per cluster - seg_durations_per_cluster = clustering_result.segment_durations - seg_assignments_per_cluster = clustering_result.segment_assignments + # Get duration for each segment directly + # segment_durations[cluster, segment] -> duration + divisor = segment_durations.isel(cluster=cluster_indices, segment=segment_indices) - for orig_cluster_idx in range(n_original): - typical_cluster_idx = int(assignments[orig_cluster_idx]) + # Clean up coordinates and rename + divisor = divisor.drop_vars(['cluster', 'time', 'segment'], errors='ignore') + divisor = divisor.rename({'original_time': 'time'}).assign_coords(time=original_time) - # Get segment durations and assignments for this typical cluster - seg_durs = seg_durations_per_cluster[typical_cluster_idx] - seg_assigns = seg_assignments_per_cluster[typical_cluster_idx] - - # For each timestep within this cluster - cluster_start = orig_cluster_idx * timesteps_per_cluster - for t in range(timesteps_per_cluster): - seg_idx = int(seg_assigns[t]) - divisor[cluster_start + t] = seg_durs[seg_idx] - - return divisor - - # Handle extra dimensions (period, scenario) - extra_dims = self.results.dim_names - - if not extra_dims: - # Simple case: no period/scenario dimensions - result = self.results.sel() # Get the single result - divisor_values = _build_divisor_for_result(cluster_assignments.values, result) - return xr.DataArray( - divisor_values, - dims=['time'], - coords={'time': original_time}, - name='expansion_divisor', - ) - - # Multi-dimensional: build divisor for each period/scenario - dim_coords = self.results.coords - divisor_slices = {} - - for combo in np.ndindex(*[len(v) for v in dim_coords.values()]): - selector = {d: dim_coords[d][i] for d, i in zip(extra_dims, combo, strict=True)} - - # Get cluster assignments for this period/scenario - if any(d in cluster_assignments.dims for d in selector): - assignments = _select_dims(cluster_assignments, **selector).values - else: - assignments = cluster_assignments.values - - # Get the clustering result for this period/scenario - result = self.results.sel(**selector) - divisor_values = _build_divisor_for_result(assignments, result) - - divisor_slices[tuple(selector.values())] = xr.DataArray( - divisor_values, - dims=['time'], - coords={'time': original_time}, - ) - - # Concatenate along extra dimensions - result_arrays = divisor_slices - for dim in reversed(extra_dims): - dim_vals = dim_coords[dim] - grouped = {} - for key, arr in result_arrays.items(): - rest_key = key[:-1] if len(key) > 1 else () - grouped.setdefault(rest_key, []).append(arr) - result_arrays = {k: xr.concat(v, dim=pd.Index(dim_vals, name=dim)) for k, v in grouped.items()} - - result = list(result_arrays.values())[0] - return result.transpose('time', ...).assign_attrs({'name': 'expansion_divisor'}) + return divisor.transpose('time', ...).rename('expansion_divisor') def get_result( self, @@ -1131,24 +1084,10 @@ def _build_timestep_mapping(self) -> xr.DataArray: """Build timestep_mapping DataArray.""" n_original = len(self.original_timesteps) original_time_coord = self.original_timesteps.rename('original_time') - - if not self.dim_names: - # Simple case: no extra dimensions - mapping = _build_timestep_mapping(self.results[()], n_original) - return xr.DataArray( - mapping, - dims=['original_time'], - coords={'original_time': original_time_coord}, - name='timestep_mapping', - ) - - # Multi-dimensional case: combine slices into multi-dim array - return self.results._build_multi_dim_array( + return self.results._build_property_array( lambda cr: _build_timestep_mapping(cr, n_original), base_dims=['original_time'], base_coords={'original_time': original_time_coord}, - periods=self.results._get_dim_values('period'), - scenarios=self.results._get_dim_values('scenario'), name='timestep_mapping', ) diff --git a/flixopt/components.py b/flixopt/components.py index 8c17bc6eb..481135d1c 100644 --- a/flixopt/components.py +++ b/flixopt/components.py @@ -16,8 +16,8 @@ from .elements import Component, ComponentModel, Flow from .features import InvestmentModel, PiecewiseModel from .interface import InvestParameters, PiecewiseConversion, StatusParameters -from .modeling import BoundingPatterns -from .structure import FlowSystemModel, register_class_for_io +from .modeling import BoundingPatterns, _scalar_safe_isel, _scalar_safe_isel_drop, _scalar_safe_reduce +from .structure import FlowSystemModel, VariableCategory, register_class_for_io if TYPE_CHECKING: import linopy @@ -570,8 +570,12 @@ def _plausibility_checks(self) -> None: # Initial charge state should not constrain investment decision # If initial > (min_cap * rel_max), investment is forced to increase capacity # If initial < (max_cap * rel_min), investment is forced to decrease capacity - min_initial_at_max_capacity = maximum_capacity * self.relative_minimum_charge_state.isel(time=0) - max_initial_at_min_capacity = minimum_capacity * self.relative_maximum_charge_state.isel(time=0) + min_initial_at_max_capacity = maximum_capacity * _scalar_safe_isel( + self.relative_minimum_charge_state, {'time': 0} + ) + max_initial_at_min_capacity = minimum_capacity * _scalar_safe_isel( + self.relative_maximum_charge_state, {'time': 0} + ) # Only perform numeric comparisons if using a numeric initial_charge_state if not initial_equals_final and self.initial_charge_state is not None: @@ -940,8 +944,13 @@ def _create_storage_variables(self): upper=ub, coords=self._model.get_coords(extra_timestep=True), short_name='charge_state', + category=VariableCategory.CHARGE_STATE, + ) + self.add_variables( + coords=self._model.get_coords(), + short_name='netto_discharge', + category=VariableCategory.NETTO_DISCHARGE, ) - self.add_variables(coords=self._model.get_coords(), short_name='netto_discharge') def _add_netto_discharge_constraint(self): """Add constraint: netto_discharge = discharging - charging.""" @@ -972,6 +981,7 @@ def _add_investment_model(self): label_of_element=self.label_of_element, label_of_model=self.label_of_element, parameters=self.element.capacity_in_flow_hours, + size_category=VariableCategory.STORAGE_SIZE, ), short_name='investment', ) @@ -1100,24 +1110,47 @@ def _relative_charge_state_bounds(self) -> tuple[xr.DataArray, xr.DataArray]: Returns: Tuple of (minimum_bounds, maximum_bounds) DataArrays extending to final timestep """ - final_coords = {'time': [self._model.flow_system.timesteps_extra[-1]]} + timesteps_extra = self._model.flow_system.timesteps_extra + + # Get the original bounds (may be scalar or have time dim) + rel_min = self.element.relative_minimum_charge_state + rel_max = self.element.relative_maximum_charge_state # Get final minimum charge state if self.element.relative_minimum_final_charge_state is None: - min_final = self.element.relative_minimum_charge_state.isel(time=-1, drop=True) + min_final_value = _scalar_safe_isel_drop(rel_min, 'time', -1) else: - min_final = self.element.relative_minimum_final_charge_state - min_final = min_final.expand_dims('time').assign_coords(time=final_coords['time']) + min_final_value = self.element.relative_minimum_final_charge_state # Get final maximum charge state if self.element.relative_maximum_final_charge_state is None: - max_final = self.element.relative_maximum_charge_state.isel(time=-1, drop=True) + max_final_value = _scalar_safe_isel_drop(rel_max, 'time', -1) + else: + max_final_value = self.element.relative_maximum_final_charge_state + + # Build bounds arrays for timesteps_extra (includes final timestep) + # Handle case where original data may be scalar (no time dim) + if 'time' in rel_min.dims: + # Original has time dim - concat with final value + min_final_da = ( + min_final_value.expand_dims('time') if 'time' not in min_final_value.dims else min_final_value + ) + min_final_da = min_final_da.assign_coords(time=[timesteps_extra[-1]]) + min_bounds = xr.concat([rel_min, min_final_da], dim='time') + else: + # Original is scalar - broadcast to full time range (constant value) + min_bounds = rel_min.expand_dims(time=timesteps_extra) + + if 'time' in rel_max.dims: + # Original has time dim - concat with final value + max_final_da = ( + max_final_value.expand_dims('time') if 'time' not in max_final_value.dims else max_final_value + ) + max_final_da = max_final_da.assign_coords(time=[timesteps_extra[-1]]) + max_bounds = xr.concat([rel_max, max_final_da], dim='time') else: - max_final = self.element.relative_maximum_final_charge_state - max_final = max_final.expand_dims('time').assign_coords(time=final_coords['time']) - # Concatenate with original bounds - min_bounds = xr.concat([self.element.relative_minimum_charge_state, min_final], dim='time') - max_bounds = xr.concat([self.element.relative_maximum_charge_state, max_final], dim='time') + # Original is scalar - broadcast to full time range (constant value) + max_bounds = rel_max.expand_dims(time=timesteps_extra) return min_bounds, max_bounds @@ -1286,6 +1319,7 @@ def _add_investment_model(self): label_of_element=self.label_of_element, label_of_model=self.label_of_element, parameters=self.element.capacity_in_flow_hours, + size_category=VariableCategory.STORAGE_SIZE, ), short_name='investment', ) @@ -1342,6 +1376,7 @@ def _add_intercluster_linking(self) -> None: coords=boundary_coords, dims=boundary_dims, short_name='SOC_boundary', + category=VariableCategory.SOC_BOUNDARY, ) # 3. Link SOC_boundary to investment size @@ -1472,8 +1507,8 @@ def _add_linking_constraints( # relative_loss_per_hour is per-hour, so we need total hours per cluster # Use sum over time to handle both regular and segmented systems # Keep as DataArray to respect per-period/scenario values - rel_loss = self.element.relative_loss_per_hour.mean('time') - hours_per_cluster = self._model.timestep_duration.sum('time') + rel_loss = _scalar_safe_reduce(self.element.relative_loss_per_hour, 'time', 'mean') + hours_per_cluster = _scalar_safe_reduce(self._model.timestep_duration, 'time', 'mean') decay_n = (1 - rel_loss) ** hours_per_cluster lhs = soc_after - soc_before * decay_n - delta_soc_ordered @@ -1517,8 +1552,8 @@ def _add_combined_bound_constraints( # Get self-discharge rate for decay calculation # relative_loss_per_hour is per-hour, so we need to convert offsets to hours # Keep as DataArray to respect per-period/scenario values - rel_loss = self.element.relative_loss_per_hour.mean('time') - mean_timestep_duration = self._model.timestep_duration.mean('time') + rel_loss = _scalar_safe_reduce(self.element.relative_loss_per_hour, 'time', 'mean') + mean_timestep_duration = _scalar_safe_reduce(self._model.timestep_duration, 'time', 'mean') # Use actual time dimension size (may be smaller than timesteps_per_cluster for segmented systems) actual_time_size = charge_state.sizes['time'] diff --git a/flixopt/core.py b/flixopt/core.py index 392d25c02..5bbf7d86e 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -366,6 +366,55 @@ def _broadcast_dataarray_to_target_specification( broadcasted = source_data.broadcast_like(target_template) return broadcasted.transpose(*target_dims) + @staticmethod + def _validate_dataarray_dims( + data: xr.DataArray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...] + ) -> xr.DataArray: + """ + Validate that DataArray dims are a subset of target dims (without broadcasting). + + This method validates compatibility without expanding to full dimensions, + allowing data to remain in compact form. Broadcasting happens later at + the linopy interface (FlowSystemModel.add_variables). + + Also transposes data to canonical dimension order (matching target_dims order). + + Args: + data: DataArray to validate + target_coords: Target coordinates {dim_name: coordinate_index} + target_dims: Target dimension names in canonical order + + Returns: + DataArray with validated dims, transposed to canonical order + + Raises: + ConversionError: If data has dimensions not in target_dims, + or coordinate values don't match + """ + # Validate: all data dimensions must exist in target + extra_dims = set(data.dims) - set(target_dims) + if extra_dims: + raise ConversionError(f'Data has dimensions {extra_dims} not in target dimensions {target_dims}') + + # Validate: coordinate compatibility for overlapping dimensions + for dim in data.dims: + if dim in data.coords and dim in target_coords: + data_coords = data.coords[dim] + target_coords_for_dim = target_coords[dim] + + if not np.array_equal(data_coords.values, target_coords_for_dim.values): + raise ConversionError( + f'Coordinate mismatch for dimension "{dim}". Data and target coordinates have different values.' + ) + + # Transpose to canonical dimension order (subset of target_dims that data has) + if data.dims: + canonical_order = tuple(d for d in target_dims if d in data.dims) + if data.dims != canonical_order: + data = data.transpose(*canonical_order) + + return data + @classmethod def to_dataarray( cls, @@ -480,8 +529,9 @@ def to_dataarray( f'Unsupported data type: {type(data).__name__}. Supported types: {", ".join(supported_types)}' ) - # Broadcast intermediate result to target specification - return cls._broadcast_dataarray_to_target_specification(intermediate, validated_coords, target_dims) + # Validate dims are compatible (no broadcasting - data stays compact) + # Broadcasting happens at FlowSystemModel.add_variables() via _ensure_coords + return cls._validate_dataarray_dims(intermediate, validated_coords, target_dims) @staticmethod def _validate_and_prepare_target_coordinates( diff --git a/flixopt/effects.py b/flixopt/effects.py index 3a2322988..b32a4edd8 100644 --- a/flixopt/effects.py +++ b/flixopt/effects.py @@ -17,7 +17,15 @@ from .core import PlausibilityError from .features import ShareAllocationModel -from .structure import Element, ElementContainer, ElementModel, FlowSystemModel, Submodel, register_class_for_io +from .structure import ( + Element, + ElementContainer, + ElementModel, + FlowSystemModel, + Submodel, + VariableCategory, + register_class_for_io, +) if TYPE_CHECKING: from collections.abc import Iterator @@ -377,6 +385,7 @@ def _do_modeling(self): upper=self.element.maximum_total if self.element.maximum_total is not None else np.inf, coords=self._model.get_coords(['period', 'scenario']), name=self.label_full, + category=VariableCategory.TOTAL, ) self.add_constraints( @@ -394,6 +403,7 @@ def _do_modeling(self): upper=self.element.maximum_over_periods if self.element.maximum_over_periods is not None else np.inf, coords=self._model.get_coords(['scenario']), short_name='total_over_periods', + category=VariableCategory.TOTAL_OVER_PERIODS, ) self.add_constraints(self.total_over_periods == weighted_total, short_name='total_over_periods') diff --git a/flixopt/elements.py b/flixopt/elements.py index 0cee53738..e2def702d 100644 --- a/flixopt/elements.py +++ b/flixopt/elements.py @@ -20,6 +20,7 @@ Element, ElementModel, FlowSystemModel, + VariableCategory, register_class_for_io, ) @@ -672,6 +673,7 @@ def _do_modeling(self): upper=self.absolute_flow_rate_bounds[1], coords=self._model.get_coords(), short_name='flow_rate', + category=VariableCategory.FLOW_RATE, ) self._constraint_flow_rate() @@ -687,6 +689,7 @@ def _do_modeling(self): ), coords=['period', 'scenario'], short_name='total_flow_hours', + category=VariableCategory.TOTAL, ) # Weighted sum over all periods constraint @@ -717,6 +720,7 @@ def _do_modeling(self): ), coords=['scenario'], short_name='flow_hours_over_periods', + category=VariableCategory.TOTAL_OVER_PERIODS, ) # Load factor constraints @@ -726,7 +730,12 @@ def _do_modeling(self): self._create_shares() def _create_status_model(self): - status = self.add_variables(binary=True, short_name='status', coords=self._model.get_coords()) + status = self.add_variables( + binary=True, + short_name='status', + coords=self._model.get_coords(), + category=VariableCategory.STATUS, + ) self.add_submodels( StatusModel( model=self._model, @@ -746,6 +755,7 @@ def _create_investment_model(self): label_of_element=self.label_of_element, parameters=self.element.size, label_of_model=self.label_of_element, + size_category=VariableCategory.FLOW_SIZE, ), 'investment', ) @@ -957,11 +967,17 @@ def _do_modeling(self): imbalance_penalty = self.element.imbalance_penalty_per_flow_hour * self._model.timestep_duration self.virtual_supply = self.add_variables( - lower=0, coords=self._model.get_coords(), short_name='virtual_supply' + lower=0, + coords=self._model.get_coords(), + short_name='virtual_supply', + category=VariableCategory.VIRTUAL_FLOW, ) self.virtual_demand = self.add_variables( - lower=0, coords=self._model.get_coords(), short_name='virtual_demand' + lower=0, + coords=self._model.get_coords(), + short_name='virtual_demand', + category=VariableCategory.VIRTUAL_FLOW, ) # Σ(inflows) + virtual_supply = Σ(outflows) + virtual_demand @@ -1028,7 +1044,12 @@ def _do_modeling(self): # Create component status variable and StatusModel if needed if self.element.status_parameters: - status = self.add_variables(binary=True, short_name='status', coords=self._model.get_coords()) + status = self.add_variables( + binary=True, + short_name='status', + coords=self._model.get_coords(), + category=VariableCategory.STATUS, + ) if len(all_flows) == 1: self.add_constraints(status == all_flows[0].submodel.status.status, short_name='status') else: diff --git a/flixopt/features.py b/flixopt/features.py index bb9864d64..e85636435 100644 --- a/flixopt/features.py +++ b/flixopt/features.py @@ -11,7 +11,7 @@ import numpy as np from .modeling import BoundingPatterns, ModelingPrimitives, ModelingUtilities -from .structure import FlowSystemModel, Submodel +from .structure import FlowSystemModel, Submodel, VariableCategory if TYPE_CHECKING: from collections.abc import Collection @@ -37,6 +37,7 @@ class InvestmentModel(Submodel): label_of_element: The label of the parent (Element). Used to construct the full label of the model. parameters: The parameters of the feature model. label_of_model: The label of the model. This is needed to construct the full label of the model. + size_category: Category for the size variable (FLOW_SIZE, STORAGE_SIZE, or SIZE for generic). """ parameters: InvestParameters @@ -47,9 +48,11 @@ def __init__( label_of_element: str, parameters: InvestParameters, label_of_model: str | None = None, + size_category: VariableCategory = VariableCategory.SIZE, ): self.piecewise_effects: PiecewiseEffectsModel | None = None self.parameters = parameters + self._size_category = size_category super().__init__(model, label_of_element=label_of_element, label_of_model=label_of_model) def _do_modeling(self): @@ -69,6 +72,7 @@ def _create_variables_and_constraints(self): lower=size_min if self.parameters.mandatory else 0, upper=size_max, coords=self._model.get_coords(['period', 'scenario']), + category=self._size_category, ) if not self.parameters.mandatory: @@ -76,6 +80,7 @@ def _create_variables_and_constraints(self): binary=True, coords=self._model.get_coords(['period', 'scenario']), short_name='invested', + category=VariableCategory.INVESTED, ) BoundingPatterns.bounds_with_state( self, @@ -193,7 +198,12 @@ def _do_modeling(self): # Create a separate binary 'inactive' variable when needed for downtime tracking or explicit use # When not needed, the expression (1 - self.status) can be used instead if self.parameters.use_downtime_tracking: - inactive = self.add_variables(binary=True, short_name='inactive', coords=self._model.get_coords()) + inactive = self.add_variables( + binary=True, + short_name='inactive', + coords=self._model.get_coords(), + category=VariableCategory.INACTIVE, + ) self.add_constraints(self.status + inactive == 1, short_name='complementary') # 3. Total duration tracking @@ -207,12 +217,23 @@ def _do_modeling(self): ), short_name='active_hours', coords=['period', 'scenario'], + category=VariableCategory.TOTAL, ) # 4. Switch tracking using existing pattern if self.parameters.use_startup_tracking: - self.add_variables(binary=True, short_name='startup', coords=self.get_coords()) - self.add_variables(binary=True, short_name='shutdown', coords=self.get_coords()) + self.add_variables( + binary=True, + short_name='startup', + coords=self.get_coords(), + category=VariableCategory.STARTUP, + ) + self.add_variables( + binary=True, + short_name='shutdown', + coords=self.get_coords(), + category=VariableCategory.SHUTDOWN, + ) # Determine previous_state: None means relaxed (no constraint at t=0) previous_state = self._previous_status.isel(time=-1) if self._previous_status is not None else None @@ -233,6 +254,7 @@ def _do_modeling(self): upper=self.parameters.startup_limit, coords=self._model.get_coords(('period', 'scenario')), short_name='startup_count', + category=VariableCategory.STARTUP_COUNT, ) # Sum over all temporal dimensions (time, and cluster if present) startup_temporal_dims = [d for d in self.startup.dims if d not in ('period', 'scenario')] @@ -387,12 +409,14 @@ def _do_modeling(self): binary=True, short_name='inside_piece', coords=self._model.get_coords(dims=self.dims), + category=VariableCategory.INSIDE_PIECE, ) self.lambda0 = self.add_variables( lower=0, upper=1, short_name='lambda0', coords=self._model.get_coords(dims=self.dims), + category=VariableCategory.LAMBDA0, ) self.lambda1 = self.add_variables( @@ -400,6 +424,7 @@ def _do_modeling(self): upper=1, short_name='lambda1', coords=self._model.get_coords(dims=self.dims), + category=VariableCategory.LAMBDA1, ) # Create constraints @@ -495,6 +520,7 @@ def _do_modeling(self): coords=self._model.get_coords(self.dims), binary=True, short_name='zero_point', + category=VariableCategory.ZERO_POINT, ) rhs = self.zero_point else: @@ -619,6 +645,7 @@ def _do_modeling(self): coords=self._model.get_coords([dim for dim in self._dims if dim != 'time']), name=self.label_full, short_name='total', + category=VariableCategory.TOTAL, ) # eq: sum = sum(share_i) # skalar self._eq_total = self.add_constraints(self.total == 0, name=self.label_full) @@ -629,6 +656,7 @@ def _do_modeling(self): upper=np.inf if (self._max_per_hour is None) else self._max_per_hour * self._model.timestep_duration, coords=self._model.get_coords(self._dims), short_name='per_timestep', + category=VariableCategory.PER_TIMESTEP, ) self._eq_total_per_timestep = self.add_constraints(self.total_per_timestep == 0, short_name='per_timestep') @@ -668,10 +696,13 @@ def add_share( if name in self.shares: self.share_constraints[name].lhs -= expression else: + # Temporal shares (with 'time' dim) are segment totals that need division + category = VariableCategory.SHARE if 'time' in dims else None self.shares[name] = self.add_variables( coords=self._model.get_coords(dims), name=f'{name}->{self.label_full}', short_name=name, + category=category, ) self.share_constraints[name] = self.add_constraints( diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 3d7fe3acd..0171afbf8 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -29,7 +29,14 @@ from .elements import Bus, Component, Flow from .optimize_accessor import OptimizeAccessor from .statistics_accessor import StatisticsAccessor -from .structure import CompositeContainerMixin, Element, ElementContainer, FlowSystemModel, Interface +from .structure import ( + CompositeContainerMixin, + Element, + ElementContainer, + FlowSystemModel, + Interface, + VariableCategory, +) from .topology_accessor import TopologyAccessor from .transform_accessor import TransformAccessor @@ -249,6 +256,10 @@ def __init__( # Solution dataset - populated after optimization or loaded from file self._solution: xr.Dataset | None = None + # Variable categories for segment expansion handling + # Populated when model is built, used by transform.expand() + self._variable_categories: dict[str, VariableCategory] = {} + # Aggregation info - populated by transform.cluster() self.clustering: Clustering | None = None @@ -678,6 +689,10 @@ def to_dataset(self, include_solution: bool = True, include_original_data: bool Convert the FlowSystem to an xarray Dataset. Ensures FlowSystem is connected before serialization. + Data is stored in minimal form (scalars stay scalar, 1D arrays stay 1D) without + broadcasting to full model dimensions. This provides significant memory savings + for multi-period and multi-scenario models. + If a solution is present and `include_solution=True`, it will be included in the dataset with variable names prefixed by 'solution|' to avoid conflicts with FlowSystem configuration variables. Solution time coordinates are renamed @@ -736,9 +751,26 @@ def to_dataset(self, include_solution: bool = True, include_original_data: bool ds[f'clustering|{name}'] = arr ds.attrs['clustering'] = json.dumps(clustering_ref) + # Serialize variable categories for segment expansion handling + if self._variable_categories: + # Convert enum values to strings for JSON serialization + categories_dict = {name: cat.value for name, cat in self._variable_categories.items()} + ds.attrs['variable_categories'] = json.dumps(categories_dict) + # Add version info ds.attrs['flixopt_version'] = __version__ + # Ensure model coordinates are always present in the Dataset + # (even if no data variable uses them, they define the model structure) + model_coords = {'time': self.timesteps} + if self.periods is not None: + model_coords['period'] = self.periods + if self.scenarios is not None: + model_coords['scenario'] = self.scenarios + if self.clusters is not None: + model_coords['cluster'] = self.clusters + ds = ds.assign_coords(model_coords) + return ds @classmethod @@ -898,6 +930,20 @@ def from_dataset(cls, ds: xr.Dataset) -> FlowSystem: if hasattr(clustering, 'representative_weights'): flow_system.cluster_weight = clustering.representative_weights + # Restore variable categories if present + if 'variable_categories' in reference_structure: + categories_dict = json.loads(reference_structure['variable_categories']) + # Convert string values back to VariableCategory enum with safe fallback + restored_categories = {} + for name, value in categories_dict.items(): + try: + restored_categories[name] = VariableCategory(value) + except ValueError: + # Unknown category value (e.g., renamed/removed enum) - skip it + # The variable will be treated as uncategorized during expansion + logger.warning(f'Unknown VariableCategory value "{value}" for "{name}", skipping') + flow_system._variable_categories = restored_categories + # Reconnect network to populate bus inputs/outputs (not stored in NetCDF). flow_system.connect_and_transform() @@ -1605,6 +1651,9 @@ def solve(self, solver: _Solver) -> FlowSystem: # Store solution on FlowSystem for direct Element access self.solution = self.model.solution + # Copy variable categories for segment expansion handling + self._variable_categories = self.model.variable_categories.copy() + logger.info(f'Optimization solved successfully. Objective: {self.model.objective.value:.4f}') return self @@ -1635,6 +1684,69 @@ def solution(self, value: xr.Dataset | None) -> None: self._solution = value self._statistics = None # Invalidate cached statistics + @property + def variable_categories(self) -> dict[str, VariableCategory]: + """Variable categories for filtering and segment expansion. + + Returns: + Dict mapping variable names to their VariableCategory. + """ + return self._variable_categories + + def get_variables_by_category(self, *categories: VariableCategory, from_solution: bool = True) -> list[str]: + """Get variable names matching any of the specified categories. + + Args: + *categories: One or more VariableCategory values to filter by. + from_solution: If True, only return variables present in solution. + If False, return all registered variables matching categories. + + Returns: + List of variable names matching any of the specified categories. + + Example: + >>> fs.get_variables_by_category(VariableCategory.FLOW_RATE) + ['Boiler(Q_th)|flow_rate', 'CHP(Q_th)|flow_rate', ...] + >>> fs.get_variables_by_category(VariableCategory.SIZE, VariableCategory.INVESTED) + ['Boiler(Q_th)|size', 'Boiler(Q_th)|invested', ...] + """ + category_set = set(categories) + + if self._variable_categories: + # Use registered categories + matching = [name for name, cat in self._variable_categories.items() if cat in category_set] + elif self._solution is not None: + # Fallback for old files without categories: match by suffix pattern + # Category values match the variable suffix (e.g., FLOW_RATE.value = 'flow_rate') + matching = [] + for cat in category_set: + # Handle new sub-categories that map to old |size suffix + if cat == VariableCategory.FLOW_SIZE: + flow_labels = set(self.flows.keys()) + matching.extend( + v + for v in self._solution.data_vars + if v.endswith('|size') and v.rsplit('|', 1)[0] in flow_labels + ) + elif cat == VariableCategory.STORAGE_SIZE: + storage_labels = set(self.storages.keys()) + matching.extend( + v + for v in self._solution.data_vars + if v.endswith('|size') and v.rsplit('|', 1)[0] in storage_labels + ) + else: + # Standard suffix matching + suffix = f'|{cat.value}' + matching.extend(v for v in self._solution.data_vars if v.endswith(suffix)) + else: + matching = [] + + if from_solution and self._solution is not None: + solution_vars = set(self._solution.data_vars) + matching = [v for v in matching if v in solution_vars] + return matching + @property def is_locked(self) -> bool: """Check if the FlowSystem is locked (has a solution). @@ -1661,6 +1773,7 @@ def _invalidate_model(self) -> None: self._connected_and_transformed = False self._topology = None # Invalidate topology accessor (and its cached colors) self._flow_carriers = None # Invalidate flow-to-carrier mapping + self._variable_categories.clear() # Clear stale categories for segment expansion for element in self.values(): element.submodel = None element._variable_names = [] diff --git a/flixopt/io.py b/flixopt/io.py index 164a9fb2e..22347ec08 100644 --- a/flixopt/io.py +++ b/flixopt/io.py @@ -548,6 +548,7 @@ def save_dataset_to_netcdf( raise ValueError(f'Invalid file extension for path {path}. Only .nc and .nc4 are supported') ds = ds.copy(deep=True) + ds.attrs = {'attrs': json.dumps(ds.attrs)} # Convert all DataArray attrs to JSON strings @@ -572,6 +573,49 @@ def save_dataset_to_netcdf( ) +def _reduce_constant_arrays(ds: xr.Dataset) -> xr.Dataset: + """ + Reduce constant dimensions in arrays for more efficient storage. + + For each array, checks each dimension and removes it if values are constant + along that dimension. This handles cases like: + - Shape (8760,) all identical → scalar + - Shape (8760, 2) constant along time → shape (2,) + - Shape (8760, 2, 3) constant along time → shape (2, 3) + + This is useful for datasets saved with older versions where data was + broadcast to full dimensions. + + Args: + ds: Dataset with potentially constant arrays. + + Returns: + Dataset with constant dimensions reduced. + """ + new_data_vars = {} + + for name, da in ds.data_vars.items(): + if not da.dims or da.size == 0: + new_data_vars[name] = da + continue + + # Try to reduce each dimension + reduced = da + for dim in list(da.dims): + if dim not in reduced.dims: + continue # Already removed + # Check if constant along this dimension + first_slice = reduced.isel({dim: 0}) + is_constant = (reduced == first_slice).all() + if is_constant: + # Remove this dimension by taking first slice + reduced = first_slice + + new_data_vars[name] = reduced + + return xr.Dataset(new_data_vars, coords=ds.coords, attrs=ds.attrs) + + def load_dataset_from_netcdf(path: str | pathlib.Path) -> xr.Dataset: """ Load a dataset from a netcdf file. Load all attrs from 'attrs' attributes. @@ -741,20 +785,25 @@ def convert_old_dataset( ds: xr.Dataset, key_renames: dict[str, str] | None = None, value_renames: dict[str, dict] | None = None, + reduce_constants: bool = True, ) -> xr.Dataset: - """Convert an old FlowSystem dataset to use new parameter names. + """Convert an old FlowSystem dataset to the current format. + + This function performs two conversions: + 1. Renames parameters in the reference structure to current naming conventions + 2. Reduces constant arrays to minimal dimensions (e.g., broadcasted scalars back to scalars) - This function updates the reference structure in a dataset's attrs to use - the current parameter naming conventions. This is useful for loading - FlowSystem files saved with older versions of flixopt. + This is useful for loading FlowSystem files saved with older versions of flixopt. Args: - ds: The dataset to convert (will be modified in place) + ds: The dataset to convert key_renames: Custom key renames to apply. If None, uses PARAMETER_RENAMES. value_renames: Custom value renames to apply. If None, uses VALUE_RENAMES. + reduce_constants: If True (default), reduce constant arrays to minimal dimensions. + Old files may have scalars broadcasted to full (time, period, scenario) shape. Returns: - The converted dataset (same object, modified in place) + The converted dataset Examples: Convert an old netCDF file to new format: @@ -765,7 +814,7 @@ def convert_old_dataset( # Load old file ds = io.load_dataset_from_netcdf('old_flow_system.nc4') - # Convert parameter names + # Convert to current format ds = io.convert_old_dataset(ds) # Now load as FlowSystem @@ -782,6 +831,10 @@ def convert_old_dataset( # Convert the attrs (reference_structure) ds.attrs = _rename_keys_recursive(ds.attrs, key_renames, value_renames) + # Reduce constant arrays to minimal dimensions + if reduce_constants: + ds = _reduce_constant_arrays(ds) + return ds diff --git a/flixopt/modeling.py b/flixopt/modeling.py index f22ffbc04..3adce5338 100644 --- a/flixopt/modeling.py +++ b/flixopt/modeling.py @@ -1,15 +1,81 @@ import logging +from typing import Any import linopy import numpy as np import xarray as xr from .config import CONFIG -from .structure import Submodel +from .structure import Submodel, VariableCategory logger = logging.getLogger('flixopt') +def _scalar_safe_isel(data: xr.DataArray | Any, indexers: dict) -> xr.DataArray | Any: + """Apply isel if data has the required dimensions, otherwise return data as-is. + + This allows parameters to remain compact (scalar or lower-dimensional) while still + being usable in constraint expressions that use .isel() for slicing. + + Args: + data: DataArray or scalar value + indexers: Dictionary of {dim: indexer} for isel + + Returns: + Sliced DataArray if dims exist, otherwise original data + """ + if not isinstance(data, xr.DataArray): + return data + # Only apply isel if data has all the required dimensions + if all(dim in data.dims for dim in indexers): + return data.isel(indexers) + return data + + +def _scalar_safe_isel_drop(data: xr.DataArray | Any, dim: str, index: int) -> xr.DataArray | Any: + """Apply isel with drop=True if data has the dimension, otherwise return data as-is. + + Useful for cases like selecting the last value of a potentially reduced array: + - If data has time dimension: returns data.isel(time=-1, drop=True) + - If data is reduced (no time dimension): returns data unchanged (already represents constant) + + Args: + data: DataArray or scalar value + dim: Dimension name to select from + index: Index to select (e.g., -1 for last, 0 for first) + + Returns: + Selected value with dimension dropped if dim exists, otherwise original data + """ + if not isinstance(data, xr.DataArray): + return data + if dim in data.dims: + return data.isel({dim: index}, drop=True) + return data + + +def _scalar_safe_reduce(data: xr.DataArray | Any, dim: str, method: str = 'mean') -> xr.DataArray | Any: + """Apply reduction (mean/sum/etc) over dimension if it exists, otherwise return data as-is. + + Useful for aggregating over time dimension when data may be scalar (constant): + - If data has time dimension: returns getattr(data, method)(dim) + - If data is reduced (no time dimension): returns data unchanged (already represents constant) + + Args: + data: DataArray or scalar value + dim: Dimension name to reduce over + method: Reduction method ('mean', 'sum', 'min', 'max', etc.) + + Returns: + Reduced value if dim exists, otherwise original data + """ + if not isinstance(data, xr.DataArray): + return data + if dim in data.dims: + return getattr(data, method)(dim) + return data + + class ModelingUtilitiesAbstract: """Utility functions for modeling - leveraging xarray for temporal data""" @@ -204,6 +270,7 @@ def expression_tracking_variable( short_name: str = None, bounds: tuple[xr.DataArray, xr.DataArray] = None, coords: str | list[str] | None = None, + category: VariableCategory = None, ) -> tuple[linopy.Variable, linopy.Constraint]: """Creates a variable constrained to equal a given expression. @@ -218,6 +285,7 @@ def expression_tracking_variable( short_name: Short name for display purposes bounds: Optional (lower_bound, upper_bound) tuple for the tracker variable coords: Coordinate dimensions for the variable (None uses all model coords) + category: Category for segment expansion handling. See VariableCategory. Returns: Tuple of (tracker_variable, tracking_constraint) @@ -226,7 +294,9 @@ def expression_tracking_variable( raise ValueError('ModelingPrimitives.expression_tracking_variable() can only be used with a Submodel') if not bounds: - tracker = model.add_variables(name=name, coords=model.get_coords(coords), short_name=short_name) + tracker = model.add_variables( + name=name, coords=model.get_coords(coords), short_name=short_name, category=category + ) else: tracker = model.add_variables( lower=bounds[0] if bounds[0] is not None else -np.inf, @@ -234,6 +304,7 @@ def expression_tracking_variable( name=name, coords=model.get_coords(coords), short_name=short_name, + category=category, ) # Constraint: tracker = expression @@ -303,6 +374,7 @@ def consecutive_duration_tracking( coords=state.coords, name=name, short_name=short_name, + category=VariableCategory.DURATION, ) constraints = {} @@ -340,7 +412,7 @@ def consecutive_duration_tracking( constraints['lb'] = model.add_constraints( duration >= (state.isel({duration_dim: slice(None, -1)}) - state.isel({duration_dim: slice(1, None)})) - * minimum_duration.isel({duration_dim: slice(None, -1)}), + * _scalar_safe_isel(minimum_duration, {duration_dim: slice(None, -1)}), name=f'{duration.name}|lb', ) @@ -351,7 +423,7 @@ def consecutive_duration_tracking( if not isinstance(previous_duration, xr.DataArray) else float(previous_duration.max().item()) ) - min0 = float(minimum_duration.isel({duration_dim: 0}).max().item()) + min0 = float(_scalar_safe_isel(minimum_duration, {duration_dim: 0}).max().item()) if prev > 0 and prev < min0: constraints['initial_lb'] = model.add_constraints( state.isel({duration_dim: 0}) == 1, name=f'{duration.name}|initial_lb' diff --git a/flixopt/statistics_accessor.py b/flixopt/statistics_accessor.py index 90ad875b7..0092d4989 100644 --- a/flixopt/statistics_accessor.py +++ b/flixopt/statistics_accessor.py @@ -31,6 +31,7 @@ from .color_processing import ColorType, hex_to_rgba, process_colors from .config import CONFIG from .plot_result import PlotResult +from .structure import VariableCategory if TYPE_CHECKING: from .flow_system import FlowSystem @@ -523,12 +524,12 @@ def flow_rates(self) -> xr.Dataset: """ self._require_solution() if self._flow_rates is None: - flow_rate_vars = [v for v in self._fs.solution.data_vars if v.endswith('|flow_rate')] + flow_rate_vars = self._fs.get_variables_by_category(VariableCategory.FLOW_RATE) flow_carriers = self._fs.flow_carriers # Cached lookup carrier_units = self.carrier_units # Cached lookup data_vars = {} for v in flow_rate_vars: - flow_label = v.replace('|flow_rate', '') + flow_label = v.rsplit('|', 1)[0] # Extract label from 'label|flow_rate' da = self._fs.solution[v].copy() # Add carrier and unit as attributes carrier = flow_carriers.get(flow_label) @@ -567,11 +568,8 @@ def flow_sizes(self) -> xr.Dataset: """Flow sizes as a Dataset with flow labels as variable names.""" self._require_solution() if self._flow_sizes is None: - flow_labels = set(self._fs.flows.keys()) - size_vars = [ - v for v in self._fs.solution.data_vars if v.endswith('|size') and v.replace('|size', '') in flow_labels - ] - self._flow_sizes = xr.Dataset({v.replace('|size', ''): self._fs.solution[v] for v in size_vars}) + flow_size_vars = self._fs.get_variables_by_category(VariableCategory.FLOW_SIZE) + self._flow_sizes = xr.Dataset({v.rsplit('|', 1)[0]: self._fs.solution[v] for v in flow_size_vars}) return self._flow_sizes @property @@ -579,13 +577,8 @@ def storage_sizes(self) -> xr.Dataset: """Storage capacity sizes as a Dataset with storage labels as variable names.""" self._require_solution() if self._storage_sizes is None: - storage_labels = set(self._fs.storages.keys()) - size_vars = [ - v - for v in self._fs.solution.data_vars - if v.endswith('|size') and v.replace('|size', '') in storage_labels - ] - self._storage_sizes = xr.Dataset({v.replace('|size', ''): self._fs.solution[v] for v in size_vars}) + storage_size_vars = self._fs.get_variables_by_category(VariableCategory.STORAGE_SIZE) + self._storage_sizes = xr.Dataset({v.rsplit('|', 1)[0]: self._fs.solution[v] for v in storage_size_vars}) return self._storage_sizes @property @@ -600,10 +593,8 @@ def charge_states(self) -> xr.Dataset: """All storage charge states as a Dataset with storage labels as variable names.""" self._require_solution() if self._charge_states is None: - charge_vars = [v for v in self._fs.solution.data_vars if v.endswith('|charge_state')] - self._charge_states = xr.Dataset( - {v.replace('|charge_state', ''): self._fs.solution[v] for v in charge_vars} - ) + charge_vars = self._fs.get_variables_by_category(VariableCategory.CHARGE_STATE) + self._charge_states = xr.Dataset({v.rsplit('|', 1)[0]: self._fs.solution[v] for v in charge_vars}) return self._charge_states @property diff --git a/flixopt/structure.py b/flixopt/structure.py index b54914bd3..952d2c7b3 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -13,6 +13,7 @@ import warnings from dataclasses import dataclass from difflib import get_close_matches +from enum import Enum from typing import ( TYPE_CHECKING, Any, @@ -40,6 +41,107 @@ logger = logging.getLogger('flixopt') + +def _ensure_coords( + data: xr.DataArray | float | int, + coords: xr.Coordinates | dict, +) -> xr.DataArray | float: + """Broadcast data to coords if needed. + + This is used at the linopy interface to ensure bounds are properly broadcasted + to the target variable shape. Linopy needs at least one bound to have all + dimensions to determine the variable shape. + + Note: Infinity values (-inf, inf) are kept as scalars because linopy uses + special checks like `if (lower != -inf)` that fail with DataArrays. + """ + # Handle both dict and xr.Coordinates + if isinstance(coords, dict): + coord_dims = list(coords.keys()) + else: + coord_dims = list(coords.dims) + + # Keep infinity values as scalars (linopy uses them for special checks) + if not isinstance(data, xr.DataArray): + if np.isinf(data): + return data + # Finite scalar - create full DataArray + return xr.DataArray(data, coords=coords, dims=coord_dims) + + if set(data.dims) == set(coord_dims): + # Has all dims - ensure correct order + if data.dims != tuple(coord_dims): + return data.transpose(*coord_dims) + return data + + # Broadcast to full coords (broadcast_like ensures correct dim order) + template = xr.DataArray(coords=coords, dims=coord_dims) + return data.broadcast_like(template) + + +class VariableCategory(Enum): + """Fine-grained variable categories - names mirror variable names. + + Each variable type has its own category for precise handling during + segment expansion and statistics calculation. + """ + + # === State variables === + CHARGE_STATE = 'charge_state' # Storage SOC (interpolate between boundaries) + SOC_BOUNDARY = 'soc_boundary' # Intercluster SOC boundaries + + # === Rate/Power variables === + FLOW_RATE = 'flow_rate' # Flow rate (kW) + NETTO_DISCHARGE = 'netto_discharge' # Storage net discharge + VIRTUAL_FLOW = 'virtual_flow' # Bus penalty slack variables + + # === Binary state === + STATUS = 'status' # On/off status (persists through segment) + INACTIVE = 'inactive' # Complementary inactive status + + # === Binary events === + STARTUP = 'startup' # Startup event + SHUTDOWN = 'shutdown' # Shutdown event + + # === Effect variables === + PER_TIMESTEP = 'per_timestep' # Effect per timestep + SHARE = 'share' # All temporal contributions (flow, active, startup) + TOTAL = 'total' # Effect total (per period/scenario) + TOTAL_OVER_PERIODS = 'total_over_periods' # Effect total over all periods + + # === Investment === + SIZE = 'size' # Generic investment size (for backwards compatibility) + FLOW_SIZE = 'flow_size' # Flow investment size + STORAGE_SIZE = 'storage_size' # Storage capacity size + INVESTED = 'invested' # Invested yes/no binary + + # === Counting/Duration === + STARTUP_COUNT = 'startup_count' # Count of startups + DURATION = 'duration' # Duration tracking (uptime/downtime) + + # === Piecewise linearization === + INSIDE_PIECE = 'inside_piece' # Binary segment selection + LAMBDA0 = 'lambda0' # Interpolation weight + LAMBDA1 = 'lambda1' # Interpolation weight + ZERO_POINT = 'zero_point' # Zero point handling + + # === Other === + OTHER = 'other' # Uncategorized + + +# === Logical Groupings for Segment Expansion === +# Default behavior (not listed): repeat value within segment + +EXPAND_INTERPOLATE: set[VariableCategory] = {VariableCategory.CHARGE_STATE} +"""State variables that should be interpolated between segment boundaries.""" + +EXPAND_DIVIDE: set[VariableCategory] = {VariableCategory.PER_TIMESTEP, VariableCategory.SHARE} +"""Segment totals that should be divided by expansion factor to preserve sums.""" + +EXPAND_FIRST_TIMESTEP: set[VariableCategory] = {VariableCategory.STARTUP, VariableCategory.SHUTDOWN} +"""Binary events that should appear only at the first timestep of the segment.""" + + CLASS_REGISTRY = {} @@ -97,6 +199,25 @@ def __init__(self, flow_system: FlowSystem): self.flow_system = flow_system self.effects: EffectCollectionModel | None = None self.submodels: Submodels = Submodels({}) + self.variable_categories: dict[str, VariableCategory] = {} + + def add_variables( + self, + lower: xr.DataArray | float = -np.inf, + upper: xr.DataArray | float = np.inf, + coords: xr.Coordinates | None = None, + **kwargs, + ) -> linopy.Variable: + """Override to ensure bounds are broadcasted to coords shape. + + Linopy uses the union of all DataArray dimensions to determine variable shape. + This override ensures at least one bound has all target dimensions when coords + is provided, allowing internal data to remain compact (scalars, 1D arrays). + """ + if coords is not None: + lower = _ensure_coords(lower, coords) + upper = _ensure_coords(upper, coords) + return super().add_variables(lower=lower, upper=upper, coords=coords, **kwargs) def do_modeling(self): # Create all element models @@ -1606,12 +1727,14 @@ def __init__(self, model: FlowSystemModel, label_of_element: str, label_of_model def add_variables( self, short_name: str = None, + category: VariableCategory = None, **kwargs: Any, ) -> linopy.Variable: """Create and register a variable in one step. Args: short_name: Short name for the variable (used as suffix in full name). + category: Category for segment expansion handling. See VariableCategory. **kwargs: Additional arguments passed to linopy.Model.add_variables(). Returns: @@ -1624,6 +1747,11 @@ def add_variables( variable = self._model.add_variables(**kwargs) self.register_variable(variable, short_name) + + # Register category in FlowSystemModel for segment expansion handling + if category is not None: + self._model.variable_categories[variable.name] = category + return variable def add_constraints(self, expression, short_name: str = None, **kwargs) -> linopy.Constraint: diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py index f83dc697a..05a95ba07 100644 --- a/flixopt/transform_accessor.py +++ b/flixopt/transform_accessor.py @@ -16,6 +16,9 @@ import pandas as pd import xarray as xr +from .modeling import _scalar_safe_reduce +from .structure import EXPAND_DIVIDE, EXPAND_INTERPOLATE, VariableCategory + if TYPE_CHECKING: from tsam.config import ClusterConfig, ExtremeConfig, SegmentConfig @@ -882,6 +885,26 @@ def _dataset_resample( time_var_names = [v for v in dataset.data_vars if 'time' in dataset[v].dims] non_time_var_names = [v for v in dataset.data_vars if v not in time_var_names] + # Handle case where no data variables have time dimension (all scalars) + # We still need to resample the time coordinate itself + if not time_var_names: + if 'time' not in dataset.coords: + raise ValueError('Dataset has no time dimension to resample') + # Create a dummy variable to resample the time coordinate + dummy = xr.DataArray( + np.zeros(len(dataset.coords['time'])), dims=['time'], coords={'time': dataset.coords['time']} + ) + dummy_ds = xr.Dataset({'__dummy__': dummy}) + resampled_dummy = getattr(dummy_ds.resample(time=freq, **kwargs), method)() + # Get the resampled time coordinate + resampled_time = resampled_dummy.coords['time'] + # Create result with all original scalar data and resampled time coordinate + # Keep all existing coordinates (period, scenario, etc.) except time which gets resampled + result = dataset.copy() + result = result.assign_coords(time=resampled_time) + result.attrs.update(original_attrs) + return FlowSystem._update_time_metadata(result, hours_of_last_timestep, hours_of_previous_timesteps) + time_dataset = dataset[time_var_names] resampled_time_dataset = cls._resample_by_dimension_groups(time_dataset, freq, method, **kwargs) @@ -915,6 +938,12 @@ def _dataset_resample( else: result = resampled_time_dataset + # Preserve all original coordinates that aren't 'time' (e.g., period, scenario, cluster) + # These may be lost during merge if no data variable uses them + for coord_name, coord_val in dataset.coords.items(): + if coord_name != 'time' and coord_name not in result.coords: + result = result.assign_coords({coord_name: coord_val}) + result.attrs.update(original_attrs) return FlowSystem._update_time_metadata(result, hours_of_last_timestep, hours_of_previous_timesteps) @@ -1695,7 +1724,7 @@ def _combine_intercluster_charge_states( n_original_clusters: Number of original clusters before aggregation. """ n_original_timesteps_extra = len(original_timesteps_extra) - soc_boundary_vars = [name for name in reduced_solution.data_vars if name.endswith('|SOC_boundary')] + soc_boundary_vars = self._fs.get_variables_by_category(VariableCategory.SOC_BOUNDARY) for soc_boundary_name in soc_boundary_vars: storage_name = soc_boundary_name.rsplit('|', 1)[0] @@ -1774,7 +1803,7 @@ def _apply_soc_decay( ) # Decay factor: (1 - loss)^t - loss_value = storage.relative_loss_per_hour.mean('time') + loss_value = _scalar_safe_reduce(storage.relative_loss_per_hour, 'time', 'mean') if not np.any(loss_value.values > 0): return soc_boundary_per_timestep @@ -1798,15 +1827,16 @@ def _apply_soc_decay( return soc_boundary_per_timestep * decay_da def _build_segment_total_varnames(self) -> set[str]: - """Build the set of solution variable names that represent segment totals. + """Build segment total variable names - BACKWARDS COMPATIBILITY FALLBACK. + + This method is only used when variable_categories is empty (old FlowSystems + saved before category registration was implemented). New FlowSystems use + the VariableCategory registry with EXPAND_DIVIDE categories (PER_TIMESTEP, SHARE). For segmented systems, these variables contain values that are summed over segments. When expanded to hourly resolution, they need to be divided by segment duration to get correct hourly rates. - Derives variable names directly from FlowSystem structure (effects, flows, - components) rather than pattern matching, ensuring robustness. - Returns: Set of variable names that should be divided by expansion divisor. """ @@ -1853,6 +1883,8 @@ def _interpolate_charge_state_segmented( this method interpolates between start and end boundary values to show the actual charge trajectory as the storage charges/discharges. + Uses vectorized xarray operations via Clustering class properties. + Args: da: charge_state DataArray with dims (cluster, time) where time has n_segments+1 entries. clustering: Clustering object with segment info. @@ -1861,114 +1893,44 @@ def _interpolate_charge_state_segmented( Returns: Interpolated charge_state with dims (time, ...) for original timesteps. """ - timesteps_per_cluster = clustering.timesteps_per_cluster - n_original_clusters = clustering.n_original_clusters - cluster_assignments = clustering.cluster_assignments - - # Get segment assignments and durations from clustering results - extra_dims = clustering.results.dim_names - - def _interpolate_slice( - charge_state_data: np.ndarray, - assignments: np.ndarray, - clustering_result, - ) -> np.ndarray: - """Interpolate charge_state for a single period/scenario slice.""" - n_timesteps = n_original_clusters * timesteps_per_cluster - result = np.zeros(n_timesteps) - - seg_assignments = clustering_result.segment_assignments - seg_durations = clustering_result.segment_durations - - for orig_cluster_idx in range(n_original_clusters): - typical_cluster_idx = int(assignments[orig_cluster_idx]) - cluster_seg_assigns = seg_assignments[typical_cluster_idx] - cluster_seg_durs = seg_durations[typical_cluster_idx] - - # Build cumulative positions within cluster for interpolation - for t in range(timesteps_per_cluster): - seg_idx = int(cluster_seg_assigns[t]) - # Count how many timesteps before this one are in the same segment - seg_start_t = 0 - for prev_t in range(t): - if cluster_seg_assigns[prev_t] == seg_idx: - break - seg_start_t = prev_t + 1 - t_within_seg = t - seg_start_t - seg_duration = cluster_seg_durs[seg_idx] - - # Interpolation factor: position within segment (0 to 1) - # At t_within_seg=0, factor=0 (start of segment) - # At t_within_seg=seg_duration-1, factor approaches 1 (end of segment) - if seg_duration > 1: - factor = (t_within_seg + 0.5) / seg_duration - else: - factor = 0.5 - - # Get start and end boundary values - start_val = charge_state_data[typical_cluster_idx, seg_idx] - end_val = charge_state_data[typical_cluster_idx, seg_idx + 1] - - # Linear interpolation - result[orig_cluster_idx * timesteps_per_cluster + t] = start_val + (end_val - start_val) * factor - - return result - - # Handle extra dimensions (period, scenario) - if not extra_dims: - # Simple case: no period/scenario dimensions - result = clustering.results.sel() - interpolated = _interpolate_slice(da.values, cluster_assignments.values, result) - return xr.DataArray( - interpolated, - dims=['time'], - coords={'time': original_timesteps}, - attrs=da.attrs, - ) + # Get multi-dimensional properties from Clustering + timestep_mapping = clustering.timestep_mapping + segment_assignments = clustering.results.segment_assignments + segment_durations = clustering.results.segment_durations + position_within_segment = clustering.results.position_within_segment + + # Decode timestep_mapping into cluster and time indices + # For segmented systems, use n_segments as the divisor (matches expand_data/build_expansion_divisor) + if clustering.is_segmented and clustering.n_segments is not None: + time_dim_size = clustering.n_segments + else: + time_dim_size = clustering.timesteps_per_cluster + cluster_indices = timestep_mapping // time_dim_size + time_indices = timestep_mapping % time_dim_size - # Multi-dimensional case - dim_coords = clustering.results.coords - interpolated_slices = {} + # Get segment index and position for each original timestep + seg_indices = segment_assignments.isel(cluster=cluster_indices, time=time_indices) + positions = position_within_segment.isel(cluster=cluster_indices, time=time_indices) + durations = segment_durations.isel(cluster=cluster_indices, segment=seg_indices) - for combo in np.ndindex(*[len(v) for v in dim_coords.values()]): - selector = {d: dim_coords[d][i] for d, i in zip(extra_dims, combo, strict=True)} + # Calculate interpolation factor: position within segment (0 to 1) + # At position=0, factor=0.5/duration (start of segment) + # At position=duration-1, factor approaches 1 (end of segment) + factor = xr.where(durations > 1, (positions + 0.5) / durations, 0.5) - # Get cluster assignments for this period/scenario - if any(d in cluster_assignments.dims for d in selector): - from .clustering.base import _select_dims + # Get start and end boundary values from charge_state + # charge_state has dims (cluster, time) where time = segment boundaries (n_segments+1) + start_vals = da.isel(cluster=cluster_indices, time=seg_indices) + end_vals = da.isel(cluster=cluster_indices, time=seg_indices + 1) - assignments = _select_dims(cluster_assignments, **selector).values - else: - assignments = cluster_assignments.values - - # Get charge_state data for this period/scenario - da_slice = da - for dim, val in selector.items(): - if dim in da.dims: - da_slice = da_slice.sel({dim: val}) - - # Get clustering result for this period/scenario - result = clustering.results.sel(**selector) - interpolated = _interpolate_slice(da_slice.values, assignments, result) - - interpolated_slices[tuple(selector.values())] = xr.DataArray( - interpolated, - dims=['time'], - coords={'time': original_timesteps}, - ) + # Linear interpolation + interpolated = start_vals + (end_vals - start_vals) * factor - # Concatenate along extra dimensions - result_arrays = interpolated_slices - for dim in reversed(extra_dims): - dim_vals = dim_coords[dim] - grouped = {} - for key, arr in result_arrays.items(): - rest_key = key[:-1] if len(key) > 1 else () - grouped.setdefault(rest_key, []).append(arr) - result_arrays = {k: xr.concat(v, dim=pd.Index(dim_vals, name=dim)) for k, v in grouped.items()} + # Clean up coordinate artifacts and rename + interpolated = interpolated.drop_vars(['cluster', 'time', 'segment'], errors='ignore') + interpolated = interpolated.rename({'original_time': 'time'}).assign_coords(time=original_timesteps) - result = list(result_arrays.values())[0] - return result.transpose('time', ...).assign_attrs(da.attrs) + return interpolated.transpose('time', ...).assign_attrs(da.attrs) def expand(self) -> FlowSystem: """Expand a clustered FlowSystem back to full original timesteps. @@ -2093,69 +2055,72 @@ def expand(self) -> FlowSystem: # For segmented systems: build expansion divisor and identify segment total variables expansion_divisor = None segment_total_vars: set[str] = set() + variable_categories = getattr(self._fs, '_variable_categories', {}) if clustering.is_segmented: expansion_divisor = clustering.build_expansion_divisor(original_time=original_timesteps) - segment_total_vars = self._build_segment_total_varnames() + # Build segment total vars using registry first, fall back to pattern matching + segment_total_vars = {name for name, cat in variable_categories.items() if cat in EXPAND_DIVIDE} + # Fall back to pattern matching for backwards compatibility (old FlowSystems without categories) + if not segment_total_vars: + segment_total_vars = self._build_segment_total_varnames() + + def _is_state_variable(var_name: str) -> bool: + """Check if a variable is a state variable (should be interpolated).""" + if var_name in variable_categories: + return variable_categories[var_name] in EXPAND_INTERPOLATE + # Fall back to pattern matching for backwards compatibility + return var_name.endswith('|charge_state') + + def _append_final_state(expanded: xr.DataArray, da: xr.DataArray) -> xr.DataArray: + """Append final state value from original data to expanded data.""" + cluster_assignments = clustering.cluster_assignments + if cluster_assignments.ndim == 1: + last_cluster = int(cluster_assignments.values[last_original_cluster_idx]) + extra_val = da.isel(cluster=last_cluster, time=-1) + else: + last_clusters = cluster_assignments.isel(original_cluster=last_original_cluster_idx) + extra_val = da.isel(cluster=last_clusters, time=-1) + extra_val = extra_val.drop_vars(['cluster', 'time'], errors='ignore') + extra_val = extra_val.expand_dims(time=[original_timesteps_extra[-1]]) + return xr.concat([expanded, extra_val], dim='time') def expand_da(da: xr.DataArray, var_name: str = '', is_solution: bool = False) -> xr.DataArray: - """Expand a DataArray from clustered to original timesteps. - - Args: - da: DataArray to expand. - var_name: Variable name for segment total lookup. - is_solution: True if this is a solution variable (may need segment correction). - FlowSystem data (is_solution=False) is never corrected for segments. - """ + """Expand a DataArray from clustered to original timesteps.""" if 'time' not in da.dims: return da.copy() - # For charge_state in segmented systems: interpolate within segments - # to show the actual charge trajectory as storage charges/discharges - if var_name.endswith('|charge_state') and 'cluster' in da.dims and clustering.is_segmented: + is_state = _is_state_variable(var_name) and 'cluster' in da.dims + + # State variables in segmented systems: interpolate within segments + if is_state and clustering.is_segmented: expanded = self._interpolate_charge_state_segmented(da, clustering, original_timesteps) - # Append the extra timestep value (final charge state) - cluster_assignments = clustering.cluster_assignments - if cluster_assignments.ndim == 1: - last_cluster = int(cluster_assignments[last_original_cluster_idx]) - extra_val = da.isel(cluster=last_cluster, time=-1) - else: - last_clusters = cluster_assignments.isel(original_cluster=last_original_cluster_idx) - extra_val = da.isel(cluster=last_clusters, time=-1) - extra_val = extra_val.drop_vars(['cluster', 'time'], errors='ignore') - extra_val = extra_val.expand_dims(time=[original_timesteps_extra[-1]]) - expanded = xr.concat([expanded, extra_val], dim='time') - return expanded + return _append_final_state(expanded, da) expanded = clustering.expand_data(da, original_time=original_timesteps) - # For segmented systems: divide segment totals by expansion divisor - # ONLY for solution variables explicitly identified as segment totals + # Segment totals: divide by expansion divisor if is_solution and expansion_divisor is not None and var_name in segment_total_vars: expanded = expanded / expansion_divisor - # For charge_state with cluster dim (non-segmented), append the extra timestep value - if var_name.endswith('|charge_state') and 'cluster' in da.dims: - cluster_assignments = clustering.cluster_assignments - if cluster_assignments.ndim == 1: - last_cluster = int(cluster_assignments[last_original_cluster_idx]) - extra_val = da.isel(cluster=last_cluster, time=-1) - else: - last_clusters = cluster_assignments.isel(original_cluster=last_original_cluster_idx) - extra_val = da.isel(cluster=last_clusters, time=-1) - extra_val = extra_val.drop_vars(['cluster', 'time'], errors='ignore') - extra_val = extra_val.expand_dims(time=[original_timesteps_extra[-1]]) - expanded = xr.concat([expanded, extra_val], dim='time') + # State variables: append final state + if is_state: + expanded = _append_final_state(expanded, da) return expanded # 1. Expand FlowSystem data reduced_ds = self._fs.to_dataset(include_solution=False) clustering_attrs = {'is_clustered', 'n_clusters', 'timesteps_per_cluster', 'clustering', 'cluster_weight'} - data_vars = { - name: expand_da(da, name) - for name, da in reduced_ds.data_vars.items() - if name != 'cluster_weight' and not name.startswith('clustering|') - } + skip_vars = {'cluster_weight', 'timestep_duration'} # These have special handling + data_vars = {} + for name, da in reduced_ds.data_vars.items(): + if name in skip_vars or name.startswith('clustering|'): + continue + # Skip vars with cluster dim but no time dim - they don't make sense after expansion + # (e.g., representative_weights with dims ('cluster',) or ('cluster', 'period')) + if 'cluster' in da.dims and 'time' not in da.dims: + continue + data_vars[name] = expand_da(da, name) attrs = {k: v for k, v in reduced_ds.attrs.items() if k not in clustering_attrs} expanded_ds = xr.Dataset(data_vars, attrs=attrs) diff --git a/tests/conftest.py b/tests/conftest.py index ee2c0f4e2..9923af896 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -829,6 +829,25 @@ def assert_sets_equal(set1: Iterable, set2: Iterable, msg=''): raise AssertionError(error_msg) +def assert_dims_compatible(data: xr.DataArray, model_coords: tuple[str, ...], msg: str = ''): + """Assert that data dimensions are a subset of model coordinates (compatible with broadcasting). + + Parameters in flixopt now stay in minimal form (scalar, 1D, etc.) and are broadcast + at the linopy interface. This helper verifies that data dims are valid for the model. + + Args: + data: DataArray to check + model_coords: Tuple of model coordinate names (from model.get_coords()) + msg: Optional message for assertion error + """ + extra_dims = set(data.dims) - set(model_coords) + if extra_dims: + error = f'Data has dimensions {extra_dims} not in model coordinates {model_coords}' + if msg: + error = f'{msg}: {error}' + raise AssertionError(error) + + # ============================================================================ # PLOTTING CLEANUP FIXTURES # ============================================================================ diff --git a/tests/deprecated/test_component.py b/tests/deprecated/test_component.py index 765a08c5a..f81ca270e 100644 --- a/tests/deprecated/test_component.py +++ b/tests/deprecated/test_component.py @@ -131,7 +131,8 @@ def test_on_with_multiple_flows(self, basic_flow_system_linopy_coords, coords_co upper_bound_flow_rate = outputs[1].relative_maximum - assert upper_bound_flow_rate.dims == tuple(model.get_coords()) + # Data stays in minimal form (1D array stays 1D) + assert upper_bound_flow_rate.dims == ('time',) assert_var_equal( model['TestComponent(Out2)|flow_rate'], @@ -311,7 +312,8 @@ def test_previous_states_with_multiple_flows(self, basic_flow_system_linopy_coor upper_bound_flow_rate = outputs[1].relative_maximum - assert upper_bound_flow_rate.dims == tuple(model.get_coords()) + # Data stays in minimal form (1D array stays 1D) + assert upper_bound_flow_rate.dims == ('time',) assert_var_equal( model['TestComponent(Out2)|flow_rate'], diff --git a/tests/deprecated/test_dataconverter.py b/tests/deprecated/test_dataconverter.py deleted file mode 100644 index a5774fd6b..000000000 --- a/tests/deprecated/test_dataconverter.py +++ /dev/null @@ -1,1262 +0,0 @@ -import numpy as np -import pandas as pd -import pytest -import xarray as xr - -from flixopt.core import ( # Adjust this import to match your project structure - ConversionError, - DataConverter, - TimeSeriesData, -) - - -@pytest.fixture -def time_coords(): - return pd.date_range('2024-01-01', periods=5, freq='D', name='time') - - -@pytest.fixture -def scenario_coords(): - return pd.Index(['baseline', 'high', 'low'], name='scenario') - - -@pytest.fixture -def region_coords(): - return pd.Index(['north', 'south', 'east'], name='region') - - -@pytest.fixture -def standard_coords(): - """Standard coordinates with unique lengths for easy testing.""" - return { - 'time': pd.date_range('2024-01-01', periods=5, freq='D', name='time'), # length 5 - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['north', 'south'], name='region'), # length 2 - } - - -class TestScalarConversion: - """Test scalar data conversions with different coordinate configurations.""" - - def test_scalar_no_coords(self): - """Scalar without coordinates should create 0D DataArray.""" - result = DataConverter.to_dataarray(42) - assert result.shape == () - assert result.dims == () - assert result.item() == 42 - - def test_scalar_single_coord(self, time_coords): - """Scalar with single coordinate should broadcast.""" - result = DataConverter.to_dataarray(42, coords={'time': time_coords}) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.all(result.values == 42) - - def test_scalar_multiple_coords(self, time_coords, scenario_coords): - """Scalar with multiple coordinates should broadcast to all.""" - result = DataConverter.to_dataarray(42, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - assert np.all(result.values == 42) - - def test_numpy_scalars(self, time_coords): - """Test numpy scalar types.""" - for scalar in [np.int32(42), np.int64(42), np.float32(42.5), np.float64(42.5)]: - result = DataConverter.to_dataarray(scalar, coords={'time': time_coords}) - assert result.shape == (5,) - assert np.all(result.values == scalar.item()) - - def test_scalar_many_dimensions(self, standard_coords): - """Scalar should broadcast to any number of dimensions.""" - coords = {**standard_coords, 'technology': pd.Index(['solar', 'wind'], name='technology')} - - result = DataConverter.to_dataarray(42, coords=coords) - assert result.shape == (5, 3, 2, 2) - assert result.dims == ('time', 'scenario', 'region', 'technology') - assert np.all(result.values == 42) - - -class TestOneDimensionalArrayConversion: - """Test 1D numpy array and pandas Series conversions.""" - - def test_1d_array_no_coords(self): - """1D array without coords should fail unless single element.""" - # Multi-element fails - with pytest.raises(ConversionError): - DataConverter.to_dataarray(np.array([1, 2, 3])) - - # Single element succeeds - result = DataConverter.to_dataarray(np.array([42])) - assert result.shape == () - assert result.item() == 42 - - def test_1d_array_matching_coord(self, time_coords): - """1D array matching coordinate length should work.""" - arr = np.array([10, 20, 30, 40, 50]) - result = DataConverter.to_dataarray(arr, coords={'time': time_coords}) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, arr) - - def test_1d_array_mismatched_coord(self, time_coords): - """1D array not matching coordinate length should fail.""" - arr = np.array([10, 20, 30]) # Length 3, time_coords has length 5 - with pytest.raises(ConversionError): - DataConverter.to_dataarray(arr, coords={'time': time_coords}) - - def test_1d_array_broadcast_to_multiple_coords(self, time_coords, scenario_coords): - """1D array should broadcast to matching dimension.""" - # Array matching time dimension - time_arr = np.array([10, 20, 30, 40, 50]) - result = DataConverter.to_dataarray(time_arr, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - - # Each scenario should have the same time values - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, time_arr) - - # Array matching scenario dimension - scenario_arr = np.array([100, 200, 300]) - result = DataConverter.to_dataarray(scenario_arr, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - - # Each time should have the same scenario values - for time in time_coords: - assert np.array_equal(result.sel(time=time).values, scenario_arr) - - def test_1d_array_ambiguous_length(self): - """Array length matching multiple dimensions should fail.""" - # Both dimensions have length 3 - coords_3x3 = { - 'time': pd.date_range('2024-01-01', periods=3, freq='D', name='time'), - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), - } - arr = np.array([1, 2, 3]) - - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr, coords=coords_3x3) - - def test_1d_array_broadcast_to_many_dimensions(self, standard_coords): - """1D array should broadcast to many dimensions.""" - # Array matching time dimension - time_arr = np.array([10, 20, 30, 40, 50]) - result = DataConverter.to_dataarray(time_arr, coords=standard_coords) - - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Check broadcasting - all scenarios and regions should have same time values - for scenario in standard_coords['scenario']: - for region in standard_coords['region']: - assert np.array_equal(result.sel(scenario=scenario, region=region).values, time_arr) - - -class TestSeriesConversion: - """Test pandas Series conversions.""" - - def test_series_no_coords(self): - """Series without coords should fail unless single element.""" - # Multi-element fails - series = pd.Series([1, 2, 3]) - with pytest.raises(ConversionError): - DataConverter.to_dataarray(series) - - # Single element succeeds - single_series = pd.Series([42]) - result = DataConverter.to_dataarray(single_series) - assert result.shape == () - assert result.item() == 42 - - def test_series_matching_index(self, time_coords, scenario_coords): - """Series with matching index should work.""" - # Time-indexed series - time_series = pd.Series([10, 20, 30, 40, 50], index=time_coords) - result = DataConverter.to_dataarray(time_series, coords={'time': time_coords}) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, time_series.values) - - # Scenario-indexed series - scenario_series = pd.Series([100, 200, 300], index=scenario_coords) - result = DataConverter.to_dataarray(scenario_series, coords={'scenario': scenario_coords}) - assert result.shape == (3,) - assert result.dims == ('scenario',) - assert np.array_equal(result.values, scenario_series.values) - - def test_series_mismatched_index(self, time_coords): - """Series with non-matching index should fail.""" - wrong_times = pd.date_range('2025-01-01', periods=5, freq='D', name='time') - series = pd.Series([10, 20, 30, 40, 50], index=wrong_times) - - with pytest.raises(ConversionError): - DataConverter.to_dataarray(series, coords={'time': time_coords}) - - def test_series_broadcast_to_multiple_coords(self, time_coords, scenario_coords): - """Series should broadcast to non-matching dimensions.""" - # Time series broadcast to scenarios - time_series = pd.Series([10, 20, 30, 40, 50], index=time_coords) - result = DataConverter.to_dataarray(time_series, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, time_series.values) - - def test_series_wrong_dimension(self, time_coords, region_coords): - """Series indexed by dimension not in coords should fail.""" - wrong_series = pd.Series([1, 2, 3], index=region_coords) - - with pytest.raises(ConversionError): - DataConverter.to_dataarray(wrong_series, coords={'time': time_coords}) - - def test_series_broadcast_to_many_dimensions(self, standard_coords): - """Series should broadcast to many dimensions.""" - time_series = pd.Series([100, 200, 300, 400, 500], index=standard_coords['time']) - result = DataConverter.to_dataarray(time_series, coords=standard_coords) - - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Check that all non-time dimensions have the same time series values - for scenario in standard_coords['scenario']: - for region in standard_coords['region']: - assert np.array_equal(result.sel(scenario=scenario, region=region).values, time_series.values) - - -class TestDataFrameConversion: - """Test pandas DataFrame conversions.""" - - def test_single_column_dataframe(self, time_coords): - """Single-column DataFrame should work like Series.""" - df = pd.DataFrame({'value': [10, 20, 30, 40, 50]}, index=time_coords) - result = DataConverter.to_dataarray(df, coords={'time': time_coords}) - - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, df['value'].values) - - def test_multi_column_dataframe_accepted(self, time_coords, scenario_coords): - """Multi-column DataFrame should now be accepted and converted via numpy array path.""" - df = pd.DataFrame( - {'value1': [10, 20, 30, 40, 50], 'value2': [15, 25, 35, 45, 55], 'value3': [12, 22, 32, 42, 52]}, - index=time_coords, - ) - - # Should work by converting to numpy array (5x3) and matching to time x scenario - result = DataConverter.to_dataarray(df, coords={'time': time_coords, 'scenario': scenario_coords}) - - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - assert np.array_equal(result.values, df.to_numpy()) - - def test_empty_dataframe_rejected(self, time_coords): - """Empty DataFrame should be rejected.""" - df = pd.DataFrame(index=time_coords) # No columns - - with pytest.raises(ConversionError, match='DataFrame must have at least one column'): - DataConverter.to_dataarray(df, coords={'time': time_coords}) - - def test_dataframe_broadcast(self, time_coords, scenario_coords): - """Single-column DataFrame should broadcast like Series.""" - df = pd.DataFrame({'power': [10, 20, 30, 40, 50]}, index=time_coords) - result = DataConverter.to_dataarray(df, coords={'time': time_coords, 'scenario': scenario_coords}) - - assert result.shape == (5, 3) - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, df['power'].values) - - -class TestMultiDimensionalArrayConversion: - """Test multi-dimensional numpy array conversions.""" - - def test_2d_array_unique_dimensions(self, standard_coords): - """2D array with unique dimension lengths should work.""" - # 5x3 array should map to time x scenario - data_2d = np.random.rand(5, 3) - result = DataConverter.to_dataarray( - data_2d, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} - ) - - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - assert np.array_equal(result.values, data_2d) - - # 3x5 array should map to scenario x time - data_2d_flipped = np.random.rand(3, 5) - result_flipped = DataConverter.to_dataarray( - data_2d_flipped, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} - ) - - assert result_flipped.shape == (5, 3) - assert result_flipped.dims == ('time', 'scenario') - assert np.array_equal(result_flipped.values.transpose(), data_2d_flipped) - - def test_2d_array_broadcast_to_3d(self, standard_coords): - """2D array should broadcast to additional dimensions when using partial matching.""" - # With improved integration, 2D array (5x3) should match time×scenario and broadcast to region - data_2d = np.random.rand(5, 3) - result = DataConverter.to_dataarray(data_2d, coords=standard_coords) - - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Check that all regions have the same time x scenario data - for region in standard_coords['region']: - assert np.array_equal(result.sel(region=region).values, data_2d) - - def test_3d_array_unique_dimensions(self, standard_coords): - """3D array with unique dimension lengths should work.""" - # 5x3x2 array should map to time x scenario x region - data_3d = np.random.rand(5, 3, 2) - result = DataConverter.to_dataarray(data_3d, coords=standard_coords) - - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - assert np.array_equal(result.values, data_3d) - - def test_3d_array_different_permutation(self, standard_coords): - """3D array with different dimension order should work.""" - # 2x5x3 array should map to region x time x scenario - data_3d = np.random.rand(2, 5, 3) - result = DataConverter.to_dataarray(data_3d, coords=standard_coords) - - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - assert np.array_equal(result.transpose('region', 'time', 'scenario').values, data_3d) - - def test_4d_array_unique_dimensions(self): - """4D array with unique dimension lengths should work.""" - coords = { - 'time': pd.date_range('2024-01-01', periods=2, freq='D', name='time'), # length 2 - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['north', 'south', 'east', 'west'], name='region'), # length 4 - 'technology': pd.Index(['solar', 'wind', 'gas', 'coal', 'hydro'], name='technology'), # length 5 - } - - # 3x5x2x4 array should map to scenario x technology x time x region - data_4d = np.random.rand(3, 5, 2, 4) - result = DataConverter.to_dataarray(data_4d, coords=coords) - - assert result.shape == (2, 3, 4, 5) - assert result.dims == ('time', 'scenario', 'region', 'technology') - assert np.array_equal(result.transpose('scenario', 'technology', 'time', 'region').values, data_4d) - - def test_2d_array_ambiguous_dimensions_error(self): - """2D array with ambiguous dimension lengths should fail.""" - # Both dimensions have length 3 - coords_ambiguous = { - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['north', 'south', 'east'], name='region'), # length 3 - } - - data_2d = np.random.rand(3, 3) - with pytest.raises(ConversionError, match='matches multiple dimension combinations'): - DataConverter.to_dataarray(data_2d, coords=coords_ambiguous) - - def test_multid_array_no_coords(self): - """Multi-D arrays without coords should fail unless scalar.""" - # Multi-element fails - data_2d = np.random.rand(2, 3) - with pytest.raises(ConversionError, match='Cannot convert multi-element array without target dimensions'): - DataConverter.to_dataarray(data_2d) - - # Single element succeeds - single_element = np.array([[42]]) - result = DataConverter.to_dataarray(single_element) - assert result.shape == () - assert result.item() == 42 - - def test_array_no_matching_dimensions_error(self, standard_coords): - """Array with no matching dimension lengths should fail.""" - # 7x8 array - no dimension has length 7 or 8 - data_2d = np.random.rand(7, 8) - coords_2d = { - 'time': standard_coords['time'], # length 5 - 'scenario': standard_coords['scenario'], # length 3 - } - - with pytest.raises(ConversionError, match='cannot be mapped to any combination'): - DataConverter.to_dataarray(data_2d, coords=coords_2d) - - def test_multid_array_special_values(self, standard_coords): - """Multi-D arrays should preserve special values.""" - # Create 2D array with special values - data_2d = np.array( - [[1.0, np.nan, 3.0], [np.inf, 5.0, -np.inf], [7.0, 8.0, 9.0], [10.0, np.nan, 12.0], [13.0, 14.0, np.inf]] - ) - - result = DataConverter.to_dataarray( - data_2d, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} - ) - - assert result.shape == (5, 3) - assert np.array_equal(np.isnan(result.values), np.isnan(data_2d)) - assert np.array_equal(np.isinf(result.values), np.isinf(data_2d)) - - def test_multid_array_dtype_preservation(self, standard_coords): - """Multi-D arrays should preserve data types.""" - # Integer array - int_data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int32) - - result_int = DataConverter.to_dataarray( - int_data, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} - ) - - assert result_int.dtype == np.int32 - assert np.array_equal(result_int.values, int_data) - - # Boolean array - bool_data = np.array( - [[True, False, True], [False, True, False], [True, True, False], [False, False, True], [True, False, True]] - ) - - result_bool = DataConverter.to_dataarray( - bool_data, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} - ) - - assert result_bool.dtype == bool - assert np.array_equal(result_bool.values, bool_data) - - -class TestDataArrayConversion: - """Test xarray DataArray conversions.""" - - def test_compatible_dataarray(self, time_coords): - """Compatible DataArray should pass through.""" - original = xr.DataArray([10, 20, 30, 40, 50], coords={'time': time_coords}, dims='time') - result = DataConverter.to_dataarray(original, coords={'time': time_coords}) - - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, original.values) - - # Should be a copy - result[0] = 999 - assert original[0].item() == 10 - - def test_incompatible_dataarray_coords(self, time_coords): - """DataArray with wrong coordinates should fail.""" - wrong_times = pd.date_range('2025-01-01', periods=5, freq='D', name='time') - original = xr.DataArray([10, 20, 30, 40, 50], coords={'time': wrong_times}, dims='time') - - with pytest.raises(ConversionError): - DataConverter.to_dataarray(original, coords={'time': time_coords}) - - def test_incompatible_dataarray_dims(self, time_coords): - """DataArray with wrong dimensions should fail.""" - original = xr.DataArray([10, 20, 30, 40, 50], coords={'wrong_dim': range(5)}, dims='wrong_dim') - - with pytest.raises(ConversionError): - DataConverter.to_dataarray(original, coords={'time': time_coords}) - - def test_dataarray_broadcast(self, time_coords, scenario_coords): - """DataArray should broadcast to additional dimensions.""" - # 1D time DataArray to 2D time+scenario - original = xr.DataArray([10, 20, 30, 40, 50], coords={'time': time_coords}, dims='time') - result = DataConverter.to_dataarray(original, coords={'time': time_coords, 'scenario': scenario_coords}) - - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, original.values) - - def test_scalar_dataarray_broadcast(self, time_coords, scenario_coords): - """Scalar DataArray should broadcast to all dimensions.""" - scalar_da = xr.DataArray(42) - result = DataConverter.to_dataarray(scalar_da, coords={'time': time_coords, 'scenario': scenario_coords}) - - assert result.shape == (5, 3) - assert np.all(result.values == 42) - - def test_2d_dataarray_broadcast_to_more_dimensions(self, standard_coords): - """DataArray should broadcast to additional dimensions.""" - # Start with 2D DataArray - original = xr.DataArray( - [[10, 20, 30], [40, 50, 60], [70, 80, 90], [100, 110, 120], [130, 140, 150]], - coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']}, - dims=('time', 'scenario'), - ) - - # Broadcast to 3D - result = DataConverter.to_dataarray(original, coords=standard_coords) - - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Check that all regions have the same time+scenario values - for region in standard_coords['region']: - assert np.array_equal(result.sel(region=region).values, original.values) - - -class TestTimeSeriesDataConversion: - """Test TimeSeriesData conversions.""" - - def test_timeseries_data_basic(self, time_coords): - """TimeSeriesData should work like DataArray.""" - data_array = xr.DataArray([10, 20, 30, 40, 50], coords={'time': time_coords}, dims='time') - ts_data = TimeSeriesData(data_array, clustering_group='test') - - result = DataConverter.to_dataarray(ts_data, coords={'time': time_coords}) - - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, [10, 20, 30, 40, 50]) - - def test_timeseries_data_broadcast(self, time_coords, scenario_coords): - """TimeSeriesData should broadcast to additional dimensions.""" - data_array = xr.DataArray([10, 20, 30, 40, 50], coords={'time': time_coords}, dims='time') - ts_data = TimeSeriesData(data_array) - - result = DataConverter.to_dataarray(ts_data, coords={'time': time_coords, 'scenario': scenario_coords}) - - assert result.shape == (5, 3) - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, [10, 20, 30, 40, 50]) - - -class TestAsDataArrayAlias: - """Test that to_dataarray works as an alias for to_dataarray.""" - - def test_to_dataarray_is_alias(self, time_coords, scenario_coords): - """to_dataarray should work identically to to_dataarray.""" - # Test with scalar - result_to = DataConverter.to_dataarray(42, coords={'time': time_coords}) - result_as = DataConverter.to_dataarray(42, coords={'time': time_coords}) - assert np.array_equal(result_to.values, result_as.values) - assert result_to.dims == result_as.dims - assert result_to.shape == result_as.shape - - # Test with array - arr = np.array([10, 20, 30, 40, 50]) - result_to_arr = DataConverter.to_dataarray(arr, coords={'time': time_coords}) - result_as_arr = DataConverter.to_dataarray(arr, coords={'time': time_coords}) - assert np.array_equal(result_to_arr.values, result_as_arr.values) - assert result_to_arr.dims == result_as_arr.dims - - # Test with Series - series = pd.Series([100, 200, 300, 400, 500], index=time_coords) - result_to_series = DataConverter.to_dataarray(series, coords={'time': time_coords, 'scenario': scenario_coords}) - result_as_series = DataConverter.to_dataarray(series, coords={'time': time_coords, 'scenario': scenario_coords}) - assert np.array_equal(result_to_series.values, result_as_series.values) - assert result_to_series.dims == result_as_series.dims - - -class TestCustomDimensions: - """Test with custom dimension names beyond time/scenario.""" - - def test_custom_single_dimension(self, region_coords): - """Test with custom dimension name.""" - result = DataConverter.to_dataarray(42, coords={'region': region_coords}) - assert result.shape == (3,) - assert result.dims == ('region',) - assert np.all(result.values == 42) - - def test_custom_multiple_dimensions(self): - """Test with multiple custom dimensions.""" - products = pd.Index(['A', 'B'], name='product') - technologies = pd.Index(['solar', 'wind', 'gas'], name='technology') - - # Array matching technology dimension - arr = np.array([100, 150, 80]) - result = DataConverter.to_dataarray(arr, coords={'product': products, 'technology': technologies}) - - assert result.shape == (2, 3) - assert result.dims == ('product', 'technology') - - # Should broadcast across products - for product in products: - assert np.array_equal(result.sel(product=product).values, arr) - - def test_mixed_dimension_types(self): - """Test mixing time dimension with custom dimensions.""" - time_coords = pd.date_range('2024-01-01', periods=3, freq='D', name='time') - regions = pd.Index(['north', 'south'], name='region') - - # Time series should broadcast to regions - time_series = pd.Series([10, 20, 30], index=time_coords) - result = DataConverter.to_dataarray(time_series, coords={'time': time_coords, 'region': regions}) - - assert result.shape == (3, 2) - assert result.dims == ('time', 'region') - - def test_custom_dimensions_complex(self): - """Test complex scenario with custom dimensions.""" - coords = { - 'product': pd.Index(['A', 'B'], name='product'), - 'factory': pd.Index(['F1', 'F2', 'F3'], name='factory'), - 'quarter': pd.Index(['Q1', 'Q2', 'Q3', 'Q4'], name='quarter'), - } - - # Array matching factory dimension - factory_arr = np.array([100, 200, 300]) - result = DataConverter.to_dataarray(factory_arr, coords=coords) - - assert result.shape == (2, 3, 4) - assert result.dims == ('product', 'factory', 'quarter') - - # Check broadcasting - for product in coords['product']: - for quarter in coords['quarter']: - slice_data = result.sel(product=product, quarter=quarter) - assert np.array_equal(slice_data.values, factory_arr) - - -class TestValidation: - """Test coordinate validation.""" - - def test_empty_coords(self): - """Empty coordinates should work for scalars.""" - result = DataConverter.to_dataarray(42, coords={}) - assert result.shape == () - assert result.item() == 42 - - def test_invalid_coord_type(self): - """Non-pandas Index coordinates should fail.""" - with pytest.raises(ConversionError): - DataConverter.to_dataarray(42, coords={'time': [1, 2, 3]}) - - def test_empty_coord_index(self): - """Empty coordinate index should fail.""" - empty_index = pd.Index([], name='time') - with pytest.raises(ConversionError): - DataConverter.to_dataarray(42, coords={'time': empty_index}) - - def test_time_coord_validation(self): - """Time coordinates must be DatetimeIndex.""" - # Non-datetime index with name 'time' should fail - wrong_time = pd.Index([1, 2, 3], name='time') - with pytest.raises(ConversionError, match='DatetimeIndex'): - DataConverter.to_dataarray(42, coords={'time': wrong_time}) - - def test_coord_naming(self, time_coords): - """Coordinates should be auto-renamed to match dimension.""" - # Unnamed time index should be renamed - unnamed_time = time_coords.rename(None) - result = DataConverter.to_dataarray(42, coords={'time': unnamed_time}) - assert result.coords['time'].name == 'time' - - -class TestErrorHandling: - """Test error handling and edge cases.""" - - def test_unsupported_data_types(self, time_coords): - """Unsupported data types should fail with clear messages.""" - unsupported = ['string', object(), None, {'dict': 'value'}, [1, 2, 3]] - - for data in unsupported: - with pytest.raises(ConversionError): - DataConverter.to_dataarray(data, coords={'time': time_coords}) - - def test_dimension_mismatch_messages(self, time_coords, scenario_coords): - """Error messages should be informative.""" - # Array with wrong length - wrong_arr = np.array([1, 2]) # Length 2, but no dimension has length 2 - with pytest.raises(ConversionError, match='does not match any target dimension lengths'): - DataConverter.to_dataarray(wrong_arr, coords={'time': time_coords, 'scenario': scenario_coords}) - - def test_multidimensional_array_dimension_count_mismatch(self, standard_coords): - """Array with wrong number of dimensions should fail with clear error.""" - # 4D array with 3D coordinates - data_4d = np.random.rand(5, 3, 2, 4) - with pytest.raises(ConversionError, match='cannot be mapped to any combination'): - DataConverter.to_dataarray(data_4d, coords=standard_coords) - - def test_error_message_quality(self, standard_coords): - """Error messages should include helpful information.""" - # Wrong shape array - data_2d = np.random.rand(7, 8) - coords_2d = { - 'time': standard_coords['time'], # length 5 - 'scenario': standard_coords['scenario'], # length 3 - } - - try: - DataConverter.to_dataarray(data_2d, coords=coords_2d) - raise AssertionError('Should have raised ConversionError') - except ConversionError as e: - error_msg = str(e) - assert 'Array shape (7, 8)' in error_msg - assert 'target coordinate lengths:' in error_msg - - -class TestDataIntegrity: - """Test data copying and integrity.""" - - def test_array_copy_independence(self, time_coords): - """Converted arrays should be independent copies.""" - original_arr = np.array([10, 20, 30, 40, 50]) - result = DataConverter.to_dataarray(original_arr, coords={'time': time_coords}) - - # Modify result - result[0] = 999 - - # Original should be unchanged - assert original_arr[0] == 10 - - def test_series_copy_independence(self, time_coords): - """Converted Series should be independent copies.""" - original_series = pd.Series([10, 20, 30, 40, 50], index=time_coords) - result = DataConverter.to_dataarray(original_series, coords={'time': time_coords}) - - # Modify result - result[0] = 999 - - # Original should be unchanged - assert original_series.iloc[0] == 10 - - def test_dataframe_copy_independence(self, time_coords): - """Converted DataFrames should be independent copies.""" - original_df = pd.DataFrame({'value': [10, 20, 30, 40, 50]}, index=time_coords) - result = DataConverter.to_dataarray(original_df, coords={'time': time_coords}) - - # Modify result - result[0] = 999 - - # Original should be unchanged - assert original_df.loc[time_coords[0], 'value'] == 10 - - def test_multid_array_copy_independence(self, standard_coords): - """Multi-D arrays should be independent copies.""" - original_data = np.random.rand(5, 3) - result = DataConverter.to_dataarray( - original_data, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} - ) - - # Modify result - result[0, 0] = 999 - - # Original should be unchanged - assert original_data[0, 0] != 999 - - -class TestBooleanValues: - """Test handling of boolean values and arrays.""" - - def test_scalar_boolean_to_dataarray(self, time_coords): - """Scalar boolean values should work with to_dataarray.""" - result_true = DataConverter.to_dataarray(True, coords={'time': time_coords}) - assert result_true.shape == (5,) - assert result_true.dtype == bool - assert np.all(result_true.values) - - result_false = DataConverter.to_dataarray(False, coords={'time': time_coords}) - assert result_false.shape == (5,) - assert result_false.dtype == bool - assert not np.any(result_false.values) - - def test_numpy_boolean_scalar(self, time_coords): - """Numpy boolean scalars should work.""" - result_np_true = DataConverter.to_dataarray(np.bool_(True), coords={'time': time_coords}) - assert result_np_true.shape == (5,) - assert result_np_true.dtype == bool - assert np.all(result_np_true.values) - - result_np_false = DataConverter.to_dataarray(np.bool_(False), coords={'time': time_coords}) - assert result_np_false.shape == (5,) - assert result_np_false.dtype == bool - assert not np.any(result_np_false.values) - - def test_boolean_array_to_dataarray(self, time_coords): - """Boolean arrays should work with to_dataarray.""" - bool_arr = np.array([True, False, True, False, True]) - result = DataConverter.to_dataarray(bool_arr, coords={'time': time_coords}) - assert result.shape == (5,) - assert result.dims == ('time',) - assert result.dtype == bool - assert np.array_equal(result.values, bool_arr) - - def test_boolean_no_coords(self): - """Boolean scalar without coordinates should create 0D DataArray.""" - result = DataConverter.to_dataarray(True) - assert result.shape == () - assert result.dims == () - assert result.item() - - result_as = DataConverter.to_dataarray(False) - assert result_as.shape == () - assert result_as.dims == () - assert not result_as.item() - - def test_boolean_multidimensional_broadcast(self, standard_coords): - """Boolean values should broadcast to multiple dimensions.""" - result = DataConverter.to_dataarray(True, coords=standard_coords) - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - assert result.dtype == bool - assert np.all(result.values) - - result_as = DataConverter.to_dataarray(False, coords=standard_coords) - assert result_as.shape == (5, 3, 2) - assert result_as.dims == ('time', 'scenario', 'region') - assert result_as.dtype == bool - assert not np.any(result_as.values) - - def test_boolean_series(self, time_coords): - """Boolean Series should work.""" - bool_series = pd.Series([True, False, True, False, True], index=time_coords) - result = DataConverter.to_dataarray(bool_series, coords={'time': time_coords}) - assert result.shape == (5,) - assert result.dtype == bool - assert np.array_equal(result.values, bool_series.values) - - result_as = DataConverter.to_dataarray(bool_series, coords={'time': time_coords}) - assert result_as.shape == (5,) - assert result_as.dtype == bool - assert np.array_equal(result_as.values, bool_series.values) - - def test_boolean_dataframe(self, time_coords): - """Boolean DataFrame should work.""" - bool_df = pd.DataFrame({'values': [True, False, True, False, True]}, index=time_coords) - result = DataConverter.to_dataarray(bool_df, coords={'time': time_coords}) - assert result.shape == (5,) - assert result.dtype == bool - assert np.array_equal(result.values, bool_df['values'].values) - - result_as = DataConverter.to_dataarray(bool_df, coords={'time': time_coords}) - assert result_as.shape == (5,) - assert result_as.dtype == bool - assert np.array_equal(result_as.values, bool_df['values'].values) - - def test_multidimensional_boolean_array(self, standard_coords): - """Multi-dimensional boolean arrays should work.""" - bool_data = np.array( - [[True, False, True], [False, True, False], [True, True, False], [False, False, True], [True, False, True]] - ) - result = DataConverter.to_dataarray( - bool_data, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} - ) - assert result.shape == (5, 3) - assert result.dtype == bool - assert np.array_equal(result.values, bool_data) - - result_as = DataConverter.to_dataarray( - bool_data, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} - ) - assert result_as.shape == (5, 3) - assert result_as.dtype == bool - assert np.array_equal(result_as.values, bool_data) - - -class TestSpecialValues: - """Test handling of special numeric values.""" - - def test_nan_values(self, time_coords): - """NaN values should be preserved.""" - arr_with_nan = np.array([1, np.nan, 3, np.nan, 5]) - result = DataConverter.to_dataarray(arr_with_nan, coords={'time': time_coords}) - - assert np.array_equal(np.isnan(result.values), np.isnan(arr_with_nan)) - assert np.array_equal(result.values[~np.isnan(result.values)], arr_with_nan[~np.isnan(arr_with_nan)]) - - def test_infinite_values(self, time_coords): - """Infinite values should be preserved.""" - arr_with_inf = np.array([1, np.inf, 3, -np.inf, 5]) - result = DataConverter.to_dataarray(arr_with_inf, coords={'time': time_coords}) - - assert np.array_equal(result.values, arr_with_inf) - - def test_boolean_values(self, time_coords): - """Boolean values should be preserved.""" - bool_arr = np.array([True, False, True, False, True]) - result = DataConverter.to_dataarray(bool_arr, coords={'time': time_coords}) - - assert result.dtype == bool - assert np.array_equal(result.values, bool_arr) - - def test_mixed_numeric_types(self, time_coords): - """Mixed integer/float should become float.""" - mixed_arr = np.array([1, 2.5, 3, 4.5, 5]) - result = DataConverter.to_dataarray(mixed_arr, coords={'time': time_coords}) - - assert np.issubdtype(result.dtype, np.floating) - assert np.array_equal(result.values, mixed_arr) - - def test_special_values_in_multid_arrays(self, standard_coords): - """Special values should be preserved in multi-D arrays and broadcasting.""" - # Array with NaN and inf - special_arr = np.array([1, np.nan, np.inf, -np.inf, 5]) - result = DataConverter.to_dataarray(special_arr, coords=standard_coords) - - assert result.shape == (5, 3, 2) - - # Check that special values are preserved in all broadcasts - for scenario in standard_coords['scenario']: - for region in standard_coords['region']: - slice_data = result.sel(scenario=scenario, region=region) - assert np.array_equal(np.isnan(slice_data.values), np.isnan(special_arr)) - assert np.array_equal(np.isinf(slice_data.values), np.isinf(special_arr)) - - -class TestAdvancedBroadcasting: - """Test advanced broadcasting scenarios and edge cases.""" - - def test_partial_dimension_matching_with_broadcasting(self, standard_coords): - """Test that partial dimension matching works with the improved integration.""" - # 1D array matching one dimension should broadcast to all target dimensions - time_arr = np.array([10, 20, 30, 40, 50]) # matches time (length 5) - result = DataConverter.to_dataarray(time_arr, coords=standard_coords) - - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Verify broadcasting - for scenario in standard_coords['scenario']: - for region in standard_coords['region']: - assert np.array_equal(result.sel(scenario=scenario, region=region).values, time_arr) - - def test_complex_multid_scenario(self): - """Complex real-world scenario with multi-D array and broadcasting.""" - # Energy system data: time x technology, broadcast to regions - coords = { - 'time': pd.date_range('2024-01-01', periods=24, freq='h', name='time'), # 24 hours - 'technology': pd.Index(['solar', 'wind', 'gas', 'coal'], name='technology'), # 4 technologies - 'region': pd.Index(['north', 'south', 'east'], name='region'), # 3 regions - } - - # Capacity factors: 24 x 4 (will broadcast to 24 x 4 x 3) - capacity_factors = np.random.rand(24, 4) - - result = DataConverter.to_dataarray(capacity_factors, coords=coords) - - assert result.shape == (24, 4, 3) - assert result.dims == ('time', 'technology', 'region') - assert isinstance(result.indexes['time'], pd.DatetimeIndex) - - # Verify broadcasting: all regions should have same time×technology data - for region in coords['region']: - assert np.array_equal(result.sel(region=region).values, capacity_factors) - - def test_ambiguous_length_handling(self): - """Test handling of ambiguous length scenarios across different data types.""" - # All dimensions have length 3 - coords_3x3x3 = { - 'time': pd.date_range('2024-01-01', periods=3, freq='D', name='time'), - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), - 'region': pd.Index(['X', 'Y', 'Z'], name='region'), - } - - # 1D array - should fail - arr_1d = np.array([1, 2, 3]) - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_1d, coords=coords_3x3x3) - - # 2D array - should fail - arr_2d = np.random.rand(3, 3) - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_2d, coords=coords_3x3x3) - - # 3D array - should fail - arr_3d = np.random.rand(3, 3, 3) - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_3d, coords=coords_3x3x3) - - def test_mixed_broadcasting_scenarios(self): - """Test various broadcasting scenarios with different input types.""" - coords = { - 'time': pd.date_range('2024-01-01', periods=4, freq='D', name='time'), # length 4 - 'scenario': pd.Index(['A', 'B'], name='scenario'), # length 2 - 'region': pd.Index(['north', 'south', 'east'], name='region'), # length 3 - 'product': pd.Index(['X', 'Y', 'Z', 'W', 'V'], name='product'), # length 5 - } - - # Scalar to 4D - scalar_result = DataConverter.to_dataarray(42, coords=coords) - assert scalar_result.shape == (4, 2, 3, 5) - assert np.all(scalar_result.values == 42) - - # 1D array (length 4, matches time) to 4D - arr_1d = np.array([10, 20, 30, 40]) - arr_result = DataConverter.to_dataarray(arr_1d, coords=coords) - assert arr_result.shape == (4, 2, 3, 5) - # Verify broadcasting - for scenario in coords['scenario']: - for region in coords['region']: - for product in coords['product']: - assert np.array_equal( - arr_result.sel(scenario=scenario, region=region, product=product).values, arr_1d - ) - - # 2D array (4x2, matches time×scenario) to 4D - arr_2d = np.random.rand(4, 2) - arr_2d_result = DataConverter.to_dataarray(arr_2d, coords=coords) - assert arr_2d_result.shape == (4, 2, 3, 5) - # Verify broadcasting - for region in coords['region']: - for product in coords['product']: - assert np.array_equal(arr_2d_result.sel(region=region, product=product).values, arr_2d) - - -class TestAmbiguousDimensionLengthHandling: - """Test that DataConverter correctly raises errors when multiple dimensions have the same length.""" - - def test_1d_array_ambiguous_dimensions_simple(self): - """Test 1D array with two dimensions of same length should fail.""" - # Both dimensions have length 3 - coords_ambiguous = { - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['north', 'south', 'east'], name='region'), # length 3 - } - - arr_1d = np.array([1, 2, 3]) # length 3 - matches both dimensions - - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_1d, coords=coords_ambiguous) - - def test_1d_array_ambiguous_dimensions_complex(self): - """Test 1D array with multiple dimensions of same length.""" - # Three dimensions have length 4 - coords_4x4x4 = { - 'time': pd.date_range('2024-01-01', periods=4, freq='D', name='time'), # length 4 - 'scenario': pd.Index(['A', 'B', 'C', 'D'], name='scenario'), # length 4 - 'region': pd.Index(['north', 'south', 'east', 'west'], name='region'), # length 4 - 'product': pd.Index(['X', 'Y'], name='product'), # length 2 - unique - } - - # Array matching the ambiguous length - arr_1d = np.array([10, 20, 30, 40]) # length 4 - matches time, scenario, region - - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_1d, coords=coords_4x4x4) - - # Array matching the unique length should work - arr_1d_unique = np.array([100, 200]) # length 2 - matches only product - result = DataConverter.to_dataarray(arr_1d_unique, coords=coords_4x4x4) - assert result.shape == (4, 4, 4, 2) # broadcast to all dimensions - assert result.dims == ('time', 'scenario', 'region', 'product') - - def test_2d_array_ambiguous_dimensions_both_same(self): - """Test 2D array where both dimensions have the same ambiguous length.""" - # All dimensions have length 3 - coords_3x3x3 = { - 'time': pd.date_range('2024-01-01', periods=3, freq='D', name='time'), # length 3 - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['X', 'Y', 'Z'], name='region'), # length 3 - } - - # 3x3 array - could be any combination of the three dimensions - arr_2d = np.random.rand(3, 3) - - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_2d, coords=coords_3x3x3) - - def test_2d_array_one_dimension_ambiguous(self): - """Test 2D array where only one dimension length is ambiguous.""" - coords_mixed = { - 'time': pd.date_range('2024-01-01', periods=5, freq='D', name='time'), # length 5 - unique - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['X', 'Y', 'Z'], name='region'), # length 3 - same as scenario - 'product': pd.Index(['P1', 'P2'], name='product'), # length 2 - unique - } - - # 5x3 array - first dimension clearly maps to time (unique length 5) - # but second dimension could be scenario or region (both length 3) - arr_5x3 = np.random.rand(5, 3) - - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_5x3, coords=coords_mixed) - - # 5x2 array should work - dimensions are unambiguous - arr_5x2 = np.random.rand(5, 2) - result = DataConverter.to_dataarray( - arr_5x2, coords={'time': coords_mixed['time'], 'product': coords_mixed['product']} - ) - assert result.shape == (5, 2) - assert result.dims == ('time', 'product') - - def test_3d_array_all_dimensions_ambiguous(self): - """Test 3D array where all dimension lengths are ambiguous.""" - # All dimensions have length 2 - coords_2x2x2x2 = { - 'scenario': pd.Index(['A', 'B'], name='scenario'), # length 2 - 'region': pd.Index(['north', 'south'], name='region'), # length 2 - 'technology': pd.Index(['solar', 'wind'], name='technology'), # length 2 - 'product': pd.Index(['X', 'Y'], name='product'), # length 2 - } - - # 2x2x2 array - could be any combination of 3 dimensions from the 4 available - arr_3d = np.random.rand(2, 2, 2) - - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_3d, coords=coords_2x2x2x2) - - def test_3d_array_partial_ambiguity(self): - """Test 3D array with partial dimension ambiguity.""" - coords_partial = { - 'time': pd.date_range('2024-01-01', periods=4, freq='D', name='time'), # length 4 - unique - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['X', 'Y', 'Z'], name='region'), # length 3 - same as scenario - 'technology': pd.Index(['solar', 'wind'], name='technology'), # length 2 - unique - } - - # 4x3x2 array - first and third dimensions are unique, middle is ambiguous - # This should still fail because middle dimension (length 3) could be scenario or region - arr_4x3x2 = np.random.rand(4, 3, 2) - - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_4x3x2, coords=coords_partial) - - def test_pandas_series_ambiguous_dimensions(self): - """Test pandas Series with ambiguous dimension lengths.""" - coords_ambiguous = { - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['north', 'south', 'east'], name='region'), # length 3 - } - - # Series with length 3 but index that doesn't match either coordinate exactly - generic_series = pd.Series([10, 20, 30], index=[0, 1, 2]) - - # Should fail because length matches multiple dimensions and index doesn't match any - with pytest.raises(ConversionError, match='Series index does not match any target dimension coordinates'): - DataConverter.to_dataarray(generic_series, coords=coords_ambiguous) - - # Series with index that matches one of the ambiguous coordinates should work - scenario_series = pd.Series([10, 20, 30], index=coords_ambiguous['scenario']) - result = DataConverter.to_dataarray(scenario_series, coords=coords_ambiguous) - assert result.shape == (3, 3) # should broadcast to both dimensions - assert result.dims == ('scenario', 'region') - - def test_edge_case_many_same_lengths(self): - """Test edge case with many dimensions having the same length.""" - # Five dimensions all have length 2 - coords_many = { - 'dim1': pd.Index(['A', 'B'], name='dim1'), - 'dim2': pd.Index(['X', 'Y'], name='dim2'), - 'dim3': pd.Index(['P', 'Q'], name='dim3'), - 'dim4': pd.Index(['M', 'N'], name='dim4'), - 'dim5': pd.Index(['U', 'V'], name='dim5'), - } - - # 1D array - arr_1d = np.array([1, 2]) - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_1d, coords=coords_many) - - # 2D array - arr_2d = np.random.rand(2, 2) - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_2d, coords=coords_many) - - # 3D array - arr_3d = np.random.rand(2, 2, 2) - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_3d, coords=coords_many) - - def test_mixed_lengths_with_duplicates(self): - """Test mixed scenario with some duplicate and some unique lengths.""" - coords_mixed = { - 'time': pd.date_range('2024-01-01', periods=8, freq='D', name='time'), # length 8 - unique - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['X', 'Y', 'Z'], name='region'), # length 3 - same as scenario - 'technology': pd.Index(['solar'], name='technology'), # length 1 - unique - 'product': pd.Index(['P1', 'P2', 'P3', 'P4', 'P5'], name='product'), # length 5 - unique - } - - # Arrays with unique lengths should work - arr_8 = np.arange(8) - result_8 = DataConverter.to_dataarray(arr_8, coords=coords_mixed) - assert result_8.dims == ('time', 'scenario', 'region', 'technology', 'product') - - arr_1 = np.array([42]) - result_1 = DataConverter.to_dataarray(arr_1, coords={'technology': coords_mixed['technology']}) - assert result_1.shape == (1,) - - arr_5 = np.arange(5) - result_5 = DataConverter.to_dataarray(arr_5, coords={'product': coords_mixed['product']}) - assert result_5.shape == (5,) - - # Arrays with ambiguous length should fail - arr_3 = np.array([1, 2, 3]) # matches both scenario and region - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_3, coords=coords_mixed) - - def test_dataframe_with_ambiguous_dimensions(self): - """Test DataFrame handling with ambiguous dimensions.""" - coords_ambiguous = { - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['X', 'Y', 'Z'], name='region'), # length 3 - } - - # Multi-column DataFrame with ambiguous dimensions - df = pd.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6], 'col3': [7, 8, 9]}) # 3x3 DataFrame - - # Should fail due to ambiguous dimensions - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(df, coords=coords_ambiguous) - - def test_error_message_quality_for_ambiguous_dimensions(self): - """Test that error messages for ambiguous dimensions are helpful.""" - coords_ambiguous = { - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), - 'region': pd.Index(['north', 'south', 'east'], name='region'), - 'technology': pd.Index(['solar', 'wind', 'gas'], name='technology'), - } - - # 1D array case - arr_1d = np.array([1, 2, 3]) - try: - DataConverter.to_dataarray(arr_1d, coords=coords_ambiguous) - raise AssertionError('Should have raised ConversionError') - except ConversionError as e: - error_msg = str(e) - assert 'matches multiple dimension' in error_msg - assert 'scenario' in error_msg - assert 'region' in error_msg - assert 'technology' in error_msg - - # 2D array case - arr_2d = np.random.rand(3, 3) - try: - DataConverter.to_dataarray(arr_2d, coords=coords_ambiguous) - raise AssertionError('Should have raised ConversionError') - except ConversionError as e: - error_msg = str(e) - assert 'matches multiple dimension combinations' in error_msg - assert '(3, 3)' in error_msg - - def test_ambiguous_with_broadcasting_target(self): - """Test ambiguous dimensions when target includes broadcasting.""" - coords_ambiguous_plus = { - 'time': pd.date_range('2024-01-01', periods=5, freq='D', name='time'), # length 5 - 'scenario': pd.Index(['A', 'B', 'C'], name='scenario'), # length 3 - 'region': pd.Index(['X', 'Y', 'Z'], name='region'), # length 3 - same as scenario - 'technology': pd.Index(['solar', 'wind'], name='technology'), # length 2 - } - - # 1D array with ambiguous length, but targeting broadcast scenario - arr_3 = np.array([10, 20, 30]) # length 3, matches scenario and region - - # Should fail even though it would broadcast to other dimensions - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_3, coords=coords_ambiguous_plus) - - # 2D array with one ambiguous dimension - arr_5x3 = np.random.rand(5, 3) # 5 is unique (time), 3 is ambiguous (scenario/region) - - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(arr_5x3, coords=coords_ambiguous_plus) - - def test_time_dimension_ambiguity(self): - """Test ambiguity specifically involving time dimension.""" - # Create scenario where time has same length as another dimension - coords_time_ambiguous = { - 'time': pd.date_range('2024-01-01', periods=3, freq='D', name='time'), # length 3 - 'scenario': pd.Index(['base', 'high', 'low'], name='scenario'), # length 3 - same as time - 'region': pd.Index(['north', 'south'], name='region'), # length 2 - unique - } - - # Time-indexed series should work even with ambiguous lengths (index matching takes precedence) - time_series = pd.Series([100, 200, 300], index=coords_time_ambiguous['time']) - result = DataConverter.to_dataarray(time_series, coords=coords_time_ambiguous) - assert result.shape == (3, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # But generic array with length 3 should still fail - generic_array = np.array([100, 200, 300]) - with pytest.raises(ConversionError, match='matches multiple dimension'): - DataConverter.to_dataarray(generic_array, coords=coords_time_ambiguous) - - -if __name__ == '__main__': - pytest.main() diff --git a/tests/deprecated/test_flow.py b/tests/deprecated/test_flow.py index 487a96c25..69922482a 100644 --- a/tests/deprecated/test_flow.py +++ b/tests/deprecated/test_flow.py @@ -69,8 +69,9 @@ def test_flow(self, basic_flow_system_linopy_coords, coords_config): model.add_variables(lower=10, upper=1000, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + # Data stays in minimal form (not broadcast to all model dimensions) + assert flow.relative_minimum.dims == ('time',) # Only time dimension + assert flow.relative_maximum.dims == ('time',) # Only time dimension assert_var_equal( flow.submodel.flow_rate, @@ -182,8 +183,9 @@ def test_flow_invest(self, basic_flow_system_linopy_coords, coords_config): model.add_variables(lower=20, upper=100, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + # Data stays in minimal form (not broadcast to all model dimensions) + assert flow.relative_minimum.dims == ('time',) # Only time dimension + assert flow.relative_maximum.dims == ('time',) # Only time dimension # flow_rate assert_var_equal( @@ -247,8 +249,9 @@ def test_flow_invest_optional(self, basic_flow_system_linopy_coords, coords_conf model.add_variables(binary=True, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + # Data stays in minimal form (not broadcast to all model dimensions) + assert flow.relative_minimum.dims == ('time',) # Only time dimension + assert flow.relative_maximum.dims == ('time',) # Only time dimension # flow_rate assert_var_equal( @@ -322,8 +325,9 @@ def test_flow_invest_optional_wo_min_size(self, basic_flow_system_linopy_coords, model.add_variables(binary=True, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + # Data stays in minimal form (not broadcast to all model dimensions) + assert flow.relative_minimum.dims == ('time',) # Only time dimension + assert flow.relative_maximum.dims == ('time',) # Only time dimension # flow_rate assert_var_equal( @@ -390,8 +394,9 @@ def test_flow_invest_wo_min_size_non_optional(self, basic_flow_system_linopy_coo model.add_variables(lower=1e-5, upper=100, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + # Data stays in minimal form (not broadcast to all model dimensions) + assert flow.relative_minimum.dims == ('time',) # Only time dimension + assert flow.relative_maximum.dims == ('time',) # Only time dimension # flow_rate assert_var_equal( @@ -629,8 +634,9 @@ def test_effects_per_active_hour(self, basic_flow_system_linopy_coords, coords_c costs_per_running_hour = flow.status_parameters.effects_per_active_hour['costs'] co2_per_running_hour = flow.status_parameters.effects_per_active_hour['CO2'] - assert costs_per_running_hour.dims == tuple(model.get_coords()) - assert co2_per_running_hour.dims == tuple(model.get_coords()) + # Data stays in minimal form (1D array stays 1D) + assert costs_per_running_hour.dims == ('time',) + assert co2_per_running_hour.dims == ('time',) assert_conequal( model.constraints['Sink(Wärme)->costs(temporal)'], diff --git a/tests/deprecated/test_linear_converter.py b/tests/deprecated/test_linear_converter.py index d20d104d0..76a45553e 100644 --- a/tests/deprecated/test_linear_converter.py +++ b/tests/deprecated/test_linear_converter.py @@ -282,7 +282,8 @@ def test_edge_case_time_varying_conversion(self, basic_flow_system_linopy_coords factor = converter.conversion_factors[0]['electricity'] - assert factor.dims == tuple(model.get_coords()) + # Data stays in minimal form (1D array stays 1D) + assert factor.dims == ('time',) # Verify the constraint has the time-varying coefficient assert_conequal( diff --git a/tests/test_cluster_reduce_expand.py b/tests/test_cluster_reduce_expand.py index 9c119ee2d..d6c991783 100644 --- a/tests/test_cluster_reduce_expand.py +++ b/tests/test_cluster_reduce_expand.py @@ -1180,6 +1180,51 @@ def test_segmented_timestep_mapping_uses_segment_assignments(self, timesteps_8_d assert mapping.min() >= 0 assert mapping.max() <= max_valid_idx + @pytest.mark.parametrize('freq', ['1h', '2h']) + def test_segmented_total_effects_match_solution(self, solver_fixture, freq): + """Test that total_effects matches solution Cost after expand with segmentation. + + This is a regression test for the bug where expansion_divisor was computed + incorrectly for segmented systems, causing total_effects to not match the + solution's objective value. + """ + from tsam.config import SegmentConfig + + # Create system with specified timestep frequency + n_timesteps = 72 if freq == '1h' else 36 # 3 days worth + timesteps = pd.date_range('2024-01-01', periods=n_timesteps, freq=freq) + fs = fx.FlowSystem(timesteps=timesteps) + + # Minimal components: effect + source + sink with varying demand + fs.add_elements(fx.Effect('Cost', unit='EUR', is_objective=True)) + fs.add_elements(fx.Bus('Heat')) + fs.add_elements( + fx.Source( + 'Boiler', + outputs=[fx.Flow('Q', bus='Heat', size=100, effects_per_flow_hour={'Cost': 50})], + ) + ) + demand_profile = np.tile([0.5, 1], n_timesteps // 2) + fs.add_elements( + fx.Sink('Demand', inputs=[fx.Flow('Q', bus='Heat', size=50, fixed_relative_profile=demand_profile)]) + ) + + # Cluster with segments -> solve -> expand + fs_clustered = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segments=SegmentConfig(n_segments=4), + ) + fs_clustered.optimize(solver_fixture) + fs_expanded = fs_clustered.transform.expand() + + # Validate: total_effects must match solution objective + computed = fs_expanded.statistics.total_effects['Cost'].sum('contributor') + expected = fs_expanded.solution['Cost'] + assert np.allclose(computed.values, expected.values, rtol=1e-5), ( + f'total_effects mismatch: computed={float(computed):.2f}, expected={float(expected):.2f}' + ) + class TestSegmentationWithStorage: """Tests for segmentation combined with storage components.""" @@ -1393,3 +1438,86 @@ def test_segmented_expand_after_load(self, solver_fixture, timesteps_8_days, tmp fs_segmented.solution['objective'].item(), rtol=1e-6, ) + + +class TestCombineSlices: + """Tests for the combine_slices utility function.""" + + def test_single_dim(self): + """Test combining slices with a single extra dimension.""" + from flixopt.clustering.base import combine_slices + + slices = { + ('A',): np.array([1.0, 2.0, 3.0]), + ('B',): np.array([4.0, 5.0, 6.0]), + } + result = combine_slices( + slices, + extra_dims=['x'], + dim_coords={'x': ['A', 'B']}, + output_dim='time', + output_coord=[0, 1, 2], + ) + + assert result.dims == ('time', 'x') + assert result.shape == (3, 2) + assert result.sel(x='A').values.tolist() == [1.0, 2.0, 3.0] + assert result.sel(x='B').values.tolist() == [4.0, 5.0, 6.0] + + def test_two_dims(self): + """Test combining slices with two extra dimensions.""" + from flixopt.clustering.base import combine_slices + + slices = { + ('P1', 'base'): np.array([1.0, 2.0]), + ('P1', 'high'): np.array([3.0, 4.0]), + ('P2', 'base'): np.array([5.0, 6.0]), + ('P2', 'high'): np.array([7.0, 8.0]), + } + result = combine_slices( + slices, + extra_dims=['period', 'scenario'], + dim_coords={'period': ['P1', 'P2'], 'scenario': ['base', 'high']}, + output_dim='time', + output_coord=[0, 1], + ) + + assert result.dims == ('time', 'period', 'scenario') + assert result.shape == (2, 2, 2) + assert result.sel(period='P1', scenario='base').values.tolist() == [1.0, 2.0] + assert result.sel(period='P2', scenario='high').values.tolist() == [7.0, 8.0] + + def test_attrs_propagation(self): + """Test that attrs are propagated to the result.""" + from flixopt.clustering.base import combine_slices + + slices = {('A',): np.array([1.0, 2.0])} + result = combine_slices( + slices, + extra_dims=['x'], + dim_coords={'x': ['A']}, + output_dim='time', + output_coord=[0, 1], + attrs={'units': 'kW', 'description': 'power'}, + ) + + assert result.attrs['units'] == 'kW' + assert result.attrs['description'] == 'power' + + def test_datetime_coords(self): + """Test with pandas DatetimeIndex as output coordinates.""" + from flixopt.clustering.base import combine_slices + + time_index = pd.date_range('2020-01-01', periods=3, freq='h') + slices = {('A',): np.array([1.0, 2.0, 3.0])} + result = combine_slices( + slices, + extra_dims=['x'], + dim_coords={'x': ['A']}, + output_dim='time', + output_coord=time_index, + ) + + assert result.dims == ('time', 'x') + assert len(result.coords['time']) == 3 + assert result.coords['time'][0].values == time_index[0] diff --git a/tests/test_component.py b/tests/test_component.py index 626a64272..c5ebd34a3 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -7,6 +7,7 @@ from .conftest import ( assert_almost_equal_numeric, assert_conequal, + assert_dims_compatible, assert_sets_equal, assert_var_equal, create_linopy_model, @@ -131,7 +132,7 @@ def test_on_with_multiple_flows(self, basic_flow_system_linopy_coords, coords_co upper_bound_flow_rate = outputs[1].relative_maximum - assert upper_bound_flow_rate.dims == tuple(model.get_coords()) + assert_dims_compatible(upper_bound_flow_rate, tuple(model.get_coords())) assert_var_equal( model['TestComponent(Out2)|flow_rate'], @@ -311,7 +312,7 @@ def test_previous_states_with_multiple_flows(self, basic_flow_system_linopy_coor upper_bound_flow_rate = outputs[1].relative_maximum - assert upper_bound_flow_rate.dims == tuple(model.get_coords()) + assert_dims_compatible(upper_bound_flow_rate, tuple(model.get_coords())) assert_var_equal( model['TestComponent(Out2)|flow_rate'], diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index a5774fd6b..f9f2df889 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -46,34 +46,38 @@ def test_scalar_no_coords(self): assert result.item() == 42 def test_scalar_single_coord(self, time_coords): - """Scalar with single coordinate should broadcast.""" + """Scalar with single coordinate stays as scalar (no broadcasting at conversion time).""" result = DataConverter.to_dataarray(42, coords={'time': time_coords}) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.all(result.values == 42) + # Scalars stay as scalars - broadcasting happens at linopy interface + assert result.shape == () + assert result.dims == () + assert result.item() == 42 def test_scalar_multiple_coords(self, time_coords, scenario_coords): - """Scalar with multiple coordinates should broadcast to all.""" + """Scalar with multiple coordinates stays as scalar (no broadcasting at conversion time).""" result = DataConverter.to_dataarray(42, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - assert np.all(result.values == 42) + # Scalars stay as scalars - broadcasting happens at linopy interface + assert result.shape == () + assert result.dims == () + assert result.item() == 42 def test_numpy_scalars(self, time_coords): - """Test numpy scalar types.""" + """Test numpy scalar types stay as scalars.""" for scalar in [np.int32(42), np.int64(42), np.float32(42.5), np.float64(42.5)]: result = DataConverter.to_dataarray(scalar, coords={'time': time_coords}) - assert result.shape == (5,) - assert np.all(result.values == scalar.item()) + # Scalars stay as scalars - broadcasting happens at linopy interface + assert result.shape == () + assert result.item() == scalar.item() def test_scalar_many_dimensions(self, standard_coords): - """Scalar should broadcast to any number of dimensions.""" + """Scalar stays as scalar regardless of target dimensions.""" coords = {**standard_coords, 'technology': pd.Index(['solar', 'wind'], name='technology')} result = DataConverter.to_dataarray(42, coords=coords) - assert result.shape == (5, 3, 2, 2) - assert result.dims == ('time', 'scenario', 'region', 'technology') - assert np.all(result.values == 42) + # Scalars stay as scalars - broadcasting happens at linopy interface + assert result.shape == () + assert result.dims == () + assert result.item() == 42 class TestOneDimensionalArrayConversion: @@ -105,26 +109,20 @@ def test_1d_array_mismatched_coord(self, time_coords): DataConverter.to_dataarray(arr, coords={'time': time_coords}) def test_1d_array_broadcast_to_multiple_coords(self, time_coords, scenario_coords): - """1D array should broadcast to matching dimension.""" - # Array matching time dimension + """1D array stays in minimal form, matching only one dimension (no broadcast at conversion).""" + # Array matching time dimension - stays as 1D with time dim only time_arr = np.array([10, 20, 30, 40, 50]) result = DataConverter.to_dataarray(time_arr, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - - # Each scenario should have the same time values - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, time_arr) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, time_arr) - # Array matching scenario dimension + # Array matching scenario dimension - stays as 1D with scenario dim only scenario_arr = np.array([100, 200, 300]) result = DataConverter.to_dataarray(scenario_arr, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - - # Each time should have the same scenario values - for time in time_coords: - assert np.array_equal(result.sel(time=time).values, scenario_arr) + assert result.shape == (3,) + assert result.dims == ('scenario',) + assert np.array_equal(result.values, scenario_arr) def test_1d_array_ambiguous_length(self): """Array length matching multiple dimensions should fail.""" @@ -139,18 +137,14 @@ def test_1d_array_ambiguous_length(self): DataConverter.to_dataarray(arr, coords=coords_3x3) def test_1d_array_broadcast_to_many_dimensions(self, standard_coords): - """1D array should broadcast to many dimensions.""" - # Array matching time dimension + """1D array stays in minimal form with matched dimension only (no broadcast at conversion).""" + # Array matching time dimension - stays as 1D with time dim only time_arr = np.array([10, 20, 30, 40, 50]) result = DataConverter.to_dataarray(time_arr, coords=standard_coords) - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Check broadcasting - all scenarios and regions should have same time values - for scenario in standard_coords['scenario']: - for region in standard_coords['region']: - assert np.array_equal(result.sel(scenario=scenario, region=region).values, time_arr) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, time_arr) class TestSeriesConversion: @@ -194,14 +188,13 @@ def test_series_mismatched_index(self, time_coords): DataConverter.to_dataarray(series, coords={'time': time_coords}) def test_series_broadcast_to_multiple_coords(self, time_coords, scenario_coords): - """Series should broadcast to non-matching dimensions.""" - # Time series broadcast to scenarios + """Series stays in minimal form with matched dimension only (no broadcast at conversion).""" + # Time series stays as 1D with time dim only time_series = pd.Series([10, 20, 30, 40, 50], index=time_coords) result = DataConverter.to_dataarray(time_series, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, time_series.values) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, time_series.values) def test_series_wrong_dimension(self, time_coords, region_coords): """Series indexed by dimension not in coords should fail.""" @@ -211,17 +204,13 @@ def test_series_wrong_dimension(self, time_coords, region_coords): DataConverter.to_dataarray(wrong_series, coords={'time': time_coords}) def test_series_broadcast_to_many_dimensions(self, standard_coords): - """Series should broadcast to many dimensions.""" + """Series stays in minimal form with matched dimension only (no broadcast at conversion).""" time_series = pd.Series([100, 200, 300, 400, 500], index=standard_coords['time']) result = DataConverter.to_dataarray(time_series, coords=standard_coords) - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Check that all non-time dimensions have the same time series values - for scenario in standard_coords['scenario']: - for region in standard_coords['region']: - assert np.array_equal(result.sel(scenario=scenario, region=region).values, time_series.values) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, time_series.values) class TestDataFrameConversion: @@ -258,13 +247,14 @@ def test_empty_dataframe_rejected(self, time_coords): DataConverter.to_dataarray(df, coords={'time': time_coords}) def test_dataframe_broadcast(self, time_coords, scenario_coords): - """Single-column DataFrame should broadcast like Series.""" + """Single-column DataFrame stays 1D (no broadcasting at conversion time).""" df = pd.DataFrame({'power': [10, 20, 30, 40, 50]}, index=time_coords) result = DataConverter.to_dataarray(df, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, df['power'].values) + # Data stays in minimal form - 1D along time + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, df['power'].values) class TestMultiDimensionalArrayConversion: @@ -282,7 +272,7 @@ def test_2d_array_unique_dimensions(self, standard_coords): assert result.dims == ('time', 'scenario') assert np.array_equal(result.values, data_2d) - # 3x5 array should map to scenario x time + # 3x5 array should map to scenario x time, then transpose to canonical (time, scenario) order data_2d_flipped = np.random.rand(3, 5) result_flipped = DataConverter.to_dataarray( data_2d_flipped, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} @@ -292,18 +282,15 @@ def test_2d_array_unique_dimensions(self, standard_coords): assert result_flipped.dims == ('time', 'scenario') assert np.array_equal(result_flipped.values.transpose(), data_2d_flipped) - def test_2d_array_broadcast_to_3d(self, standard_coords): - """2D array should broadcast to additional dimensions when using partial matching.""" - # With improved integration, 2D array (5x3) should match time×scenario and broadcast to region + def test_2d_array_stays_2d(self, standard_coords): + """2D array stays 2D even when more coords are provided (no broadcasting).""" + # 2D array (5x3) matches time×scenario, stays 2D (no broadcast to region) data_2d = np.random.rand(5, 3) result = DataConverter.to_dataarray(data_2d, coords=standard_coords) - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Check that all regions have the same time x scenario data - for region in standard_coords['region']: - assert np.array_equal(result.sel(region=region).values, data_2d) + assert result.shape == (5, 3) + assert result.dims == ('time', 'scenario') + assert np.array_equal(result.values, data_2d) def test_3d_array_unique_dimensions(self, standard_coords): """3D array with unique dimension lengths should work.""" @@ -316,8 +303,8 @@ def test_3d_array_unique_dimensions(self, standard_coords): assert np.array_equal(result.values, data_3d) def test_3d_array_different_permutation(self, standard_coords): - """3D array with different dimension order should work.""" - # 2x5x3 array should map to region x time x scenario + """3D array with different dimension order should be transposed to canonical order.""" + # 2x5x3 array should map to region x time x scenario, then transpose to canonical order data_3d = np.random.rand(2, 5, 3) result = DataConverter.to_dataarray(data_3d, coords=standard_coords) @@ -450,28 +437,26 @@ def test_incompatible_dataarray_dims(self, time_coords): with pytest.raises(ConversionError): DataConverter.to_dataarray(original, coords={'time': time_coords}) - def test_dataarray_broadcast(self, time_coords, scenario_coords): - """DataArray should broadcast to additional dimensions.""" - # 1D time DataArray to 2D time+scenario + def test_dataarray_no_broadcast(self, time_coords, scenario_coords): + """DataArray stays in minimal form (no broadcasting at conversion time).""" + # 1D time DataArray stays 1D even with additional coords original = xr.DataArray([10, 20, 30, 40, 50], coords={'time': time_coords}, dims='time') result = DataConverter.to_dataarray(original, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - assert result.dims == ('time', 'scenario') - - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, original.values) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, original.values) - def test_scalar_dataarray_broadcast(self, time_coords, scenario_coords): - """Scalar DataArray should broadcast to all dimensions.""" + def test_scalar_dataarray_stays_scalar(self, time_coords, scenario_coords): + """Scalar DataArray stays scalar (no broadcasting at conversion time).""" scalar_da = xr.DataArray(42) result = DataConverter.to_dataarray(scalar_da, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - assert np.all(result.values == 42) + assert result.shape == () + assert result.item() == 42 - def test_2d_dataarray_broadcast_to_more_dimensions(self, standard_coords): - """DataArray should broadcast to additional dimensions.""" + def test_2d_dataarray_stays_2d(self, standard_coords): + """DataArray stays in minimal form (no broadcasting at conversion time).""" # Start with 2D DataArray original = xr.DataArray( [[10, 20, 30], [40, 50, 60], [70, 80, 90], [100, 110, 120], [130, 140, 150]], @@ -479,15 +464,12 @@ def test_2d_dataarray_broadcast_to_more_dimensions(self, standard_coords): dims=('time', 'scenario'), ) - # Broadcast to 3D + # Stays 2D (no broadcast to 3D) result = DataConverter.to_dataarray(original, coords=standard_coords) - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Check that all regions have the same time+scenario values - for region in standard_coords['region']: - assert np.array_equal(result.sel(region=region).values, original.values) + assert result.shape == (5, 3) + assert result.dims == ('time', 'scenario') + assert np.array_equal(result.values, original.values) class TestTimeSeriesDataConversion: @@ -504,16 +486,17 @@ def test_timeseries_data_basic(self, time_coords): assert result.dims == ('time',) assert np.array_equal(result.values, [10, 20, 30, 40, 50]) - def test_timeseries_data_broadcast(self, time_coords, scenario_coords): - """TimeSeriesData should broadcast to additional dimensions.""" + def test_timeseries_data_stays_minimal(self, time_coords, scenario_coords): + """TimeSeriesData stays in minimal form (no broadcasting at conversion time).""" data_array = xr.DataArray([10, 20, 30, 40, 50], coords={'time': time_coords}, dims='time') ts_data = TimeSeriesData(data_array) result = DataConverter.to_dataarray(ts_data, coords={'time': time_coords, 'scenario': scenario_coords}) - assert result.shape == (5, 3) - for scenario in scenario_coords: - assert np.array_equal(result.sel(scenario=scenario).values, [10, 20, 30, 40, 50]) + # Stays 1D (time only) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, [10, 20, 30, 40, 50]) class TestAsDataArrayAlias: @@ -547,60 +530,52 @@ class TestCustomDimensions: """Test with custom dimension names beyond time/scenario.""" def test_custom_single_dimension(self, region_coords): - """Test with custom dimension name.""" + """Scalar stays scalar even with custom dimension.""" result = DataConverter.to_dataarray(42, coords={'region': region_coords}) - assert result.shape == (3,) - assert result.dims == ('region',) - assert np.all(result.values == 42) + assert result.shape == () + assert result.dims == () + assert result.item() == 42 def test_custom_multiple_dimensions(self): - """Test with multiple custom dimensions.""" + """Array with matching dimension stays 1D (no broadcasting).""" products = pd.Index(['A', 'B'], name='product') technologies = pd.Index(['solar', 'wind', 'gas'], name='technology') - # Array matching technology dimension + # Array matching technology dimension stays 1D arr = np.array([100, 150, 80]) result = DataConverter.to_dataarray(arr, coords={'product': products, 'technology': technologies}) - assert result.shape == (2, 3) - assert result.dims == ('product', 'technology') - - # Should broadcast across products - for product in products: - assert np.array_equal(result.sel(product=product).values, arr) + assert result.shape == (3,) + assert result.dims == ('technology',) + assert np.array_equal(result.values, arr) def test_mixed_dimension_types(self): - """Test mixing time dimension with custom dimensions.""" + """Time series stays 1D (no broadcasting to regions).""" time_coords = pd.date_range('2024-01-01', periods=3, freq='D', name='time') regions = pd.Index(['north', 'south'], name='region') - # Time series should broadcast to regions time_series = pd.Series([10, 20, 30], index=time_coords) result = DataConverter.to_dataarray(time_series, coords={'time': time_coords, 'region': regions}) - assert result.shape == (3, 2) - assert result.dims == ('time', 'region') + # Stays 1D along time + assert result.shape == (3,) + assert result.dims == ('time',) def test_custom_dimensions_complex(self): - """Test complex scenario with custom dimensions.""" + """Array with matching dimension stays 1D (no broadcasting).""" coords = { 'product': pd.Index(['A', 'B'], name='product'), 'factory': pd.Index(['F1', 'F2', 'F3'], name='factory'), 'quarter': pd.Index(['Q1', 'Q2', 'Q3', 'Q4'], name='quarter'), } - # Array matching factory dimension + # Array matching factory dimension stays 1D factory_arr = np.array([100, 200, 300]) result = DataConverter.to_dataarray(factory_arr, coords=coords) - assert result.shape == (2, 3, 4) - assert result.dims == ('product', 'factory', 'quarter') - - # Check broadcasting - for product in coords['product']: - for quarter in coords['quarter']: - slice_data = result.sel(product=product, quarter=quarter) - assert np.array_equal(slice_data.values, factory_arr) + assert result.shape == (3,) + assert result.dims == ('factory',) + assert np.array_equal(result.values, factory_arr) class TestValidation: @@ -631,11 +606,11 @@ def test_time_coord_validation(self): DataConverter.to_dataarray(42, coords={'time': wrong_time}) def test_coord_naming(self, time_coords): - """Coordinates should be auto-renamed to match dimension.""" - # Unnamed time index should be renamed - unnamed_time = time_coords.rename(None) - result = DataConverter.to_dataarray(42, coords={'time': unnamed_time}) - assert result.coords['time'].name == 'time' + """Scalar with coords stays scalar but validates coords.""" + # Scalar stays scalar regardless of coords + result = DataConverter.to_dataarray(42, coords={'time': time_coords}) + assert result.shape == () + assert result.item() == 42 class TestErrorHandling: @@ -735,28 +710,28 @@ class TestBooleanValues: """Test handling of boolean values and arrays.""" def test_scalar_boolean_to_dataarray(self, time_coords): - """Scalar boolean values should work with to_dataarray.""" + """Scalar boolean values stay scalar (no broadcasting).""" result_true = DataConverter.to_dataarray(True, coords={'time': time_coords}) - assert result_true.shape == (5,) + assert result_true.shape == () assert result_true.dtype == bool - assert np.all(result_true.values) + assert result_true.item() is True result_false = DataConverter.to_dataarray(False, coords={'time': time_coords}) - assert result_false.shape == (5,) + assert result_false.shape == () assert result_false.dtype == bool - assert not np.any(result_false.values) + assert result_false.item() is False def test_numpy_boolean_scalar(self, time_coords): - """Numpy boolean scalars should work.""" + """Numpy boolean scalars stay scalar (no broadcasting).""" result_np_true = DataConverter.to_dataarray(np.bool_(True), coords={'time': time_coords}) - assert result_np_true.shape == (5,) + assert result_np_true.shape == () assert result_np_true.dtype == bool - assert np.all(result_np_true.values) + assert result_np_true.item() is True result_np_false = DataConverter.to_dataarray(np.bool_(False), coords={'time': time_coords}) - assert result_np_false.shape == (5,) + assert result_np_false.shape == () assert result_np_false.dtype == bool - assert not np.any(result_np_false.values) + assert result_np_false.item() is False def test_boolean_array_to_dataarray(self, time_coords): """Boolean arrays should work with to_dataarray.""" @@ -779,19 +754,17 @@ def test_boolean_no_coords(self): assert result_as.dims == () assert not result_as.item() - def test_boolean_multidimensional_broadcast(self, standard_coords): - """Boolean values should broadcast to multiple dimensions.""" + def test_boolean_scalar_stays_scalar(self, standard_coords): + """Boolean scalars stay scalar (no broadcasting).""" result = DataConverter.to_dataarray(True, coords=standard_coords) - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') + assert result.shape == () assert result.dtype == bool - assert np.all(result.values) + assert result.item() is True result_as = DataConverter.to_dataarray(False, coords=standard_coords) - assert result_as.shape == (5, 3, 2) - assert result_as.dims == ('time', 'scenario', 'region') + assert result_as.shape == () assert result_as.dtype == bool - assert not np.any(result_as.values) + assert result_as.item() is False def test_boolean_series(self, time_coords): """Boolean Series should work.""" @@ -873,60 +846,51 @@ def test_mixed_numeric_types(self, time_coords): assert np.issubdtype(result.dtype, np.floating) assert np.array_equal(result.values, mixed_arr) - def test_special_values_in_multid_arrays(self, standard_coords): - """Special values should be preserved in multi-D arrays and broadcasting.""" + def test_special_values_in_1d_arrays(self, standard_coords): + """Special values should be preserved in arrays (no broadcasting).""" # Array with NaN and inf special_arr = np.array([1, np.nan, np.inf, -np.inf, 5]) result = DataConverter.to_dataarray(special_arr, coords=standard_coords) - assert result.shape == (5, 3, 2) + # Stays 1D (no broadcasting) + assert result.shape == (5,) + assert result.dims == ('time',) - # Check that special values are preserved in all broadcasts - for scenario in standard_coords['scenario']: - for region in standard_coords['region']: - slice_data = result.sel(scenario=scenario, region=region) - assert np.array_equal(np.isnan(slice_data.values), np.isnan(special_arr)) - assert np.array_equal(np.isinf(slice_data.values), np.isinf(special_arr)) + # Check that special values are preserved + assert np.array_equal(np.isnan(result.values), np.isnan(special_arr)) + assert np.array_equal(np.isinf(result.values), np.isinf(special_arr)) class TestAdvancedBroadcasting: - """Test advanced broadcasting scenarios and edge cases.""" + """Test advanced scenarios (no broadcasting - data stays minimal).""" - def test_partial_dimension_matching_with_broadcasting(self, standard_coords): - """Test that partial dimension matching works with the improved integration.""" - # 1D array matching one dimension should broadcast to all target dimensions + def test_partial_dimension_matching_stays_1d(self, standard_coords): + """1D array stays 1D (no broadcasting to additional dimensions).""" time_arr = np.array([10, 20, 30, 40, 50]) # matches time (length 5) result = DataConverter.to_dataarray(time_arr, coords=standard_coords) - assert result.shape == (5, 3, 2) - assert result.dims == ('time', 'scenario', 'region') - - # Verify broadcasting - for scenario in standard_coords['scenario']: - for region in standard_coords['region']: - assert np.array_equal(result.sel(scenario=scenario, region=region).values, time_arr) + # Stays 1D + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, time_arr) def test_complex_multid_scenario(self): - """Complex real-world scenario with multi-D array and broadcasting.""" - # Energy system data: time x technology, broadcast to regions + """2D array stays 2D (no broadcasting to additional dimensions).""" coords = { 'time': pd.date_range('2024-01-01', periods=24, freq='h', name='time'), # 24 hours 'technology': pd.Index(['solar', 'wind', 'gas', 'coal'], name='technology'), # 4 technologies 'region': pd.Index(['north', 'south', 'east'], name='region'), # 3 regions } - # Capacity factors: 24 x 4 (will broadcast to 24 x 4 x 3) + # Capacity factors: 24 x 4 stays 2D (no broadcast to 24 x 4 x 3) capacity_factors = np.random.rand(24, 4) result = DataConverter.to_dataarray(capacity_factors, coords=coords) - assert result.shape == (24, 4, 3) - assert result.dims == ('time', 'technology', 'region') + assert result.shape == (24, 4) + assert result.dims == ('time', 'technology') assert isinstance(result.indexes['time'], pd.DatetimeIndex) - - # Verify broadcasting: all regions should have same time×technology data - for region in coords['region']: - assert np.array_equal(result.sel(region=region).values, capacity_factors) + assert np.array_equal(result.values, capacity_factors) def test_ambiguous_length_handling(self): """Test handling of ambiguous length scenarios across different data types.""" @@ -952,8 +916,8 @@ def test_ambiguous_length_handling(self): with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_3d, coords=coords_3x3x3) - def test_mixed_broadcasting_scenarios(self): - """Test various broadcasting scenarios with different input types.""" + def test_no_broadcasting_scenarios(self): + """Data stays in minimal form (no broadcasting to additional dimensions).""" coords = { 'time': pd.date_range('2024-01-01', periods=4, freq='D', name='time'), # length 4 'scenario': pd.Index(['A', 'B'], name='scenario'), # length 2 @@ -961,31 +925,24 @@ def test_mixed_broadcasting_scenarios(self): 'product': pd.Index(['X', 'Y', 'Z', 'W', 'V'], name='product'), # length 5 } - # Scalar to 4D + # Scalar stays scalar scalar_result = DataConverter.to_dataarray(42, coords=coords) - assert scalar_result.shape == (4, 2, 3, 5) - assert np.all(scalar_result.values == 42) + assert scalar_result.shape == () + assert scalar_result.item() == 42 - # 1D array (length 4, matches time) to 4D + # 1D array (length 4, matches time) stays 1D arr_1d = np.array([10, 20, 30, 40]) arr_result = DataConverter.to_dataarray(arr_1d, coords=coords) - assert arr_result.shape == (4, 2, 3, 5) - # Verify broadcasting - for scenario in coords['scenario']: - for region in coords['region']: - for product in coords['product']: - assert np.array_equal( - arr_result.sel(scenario=scenario, region=region, product=product).values, arr_1d - ) - - # 2D array (4x2, matches time×scenario) to 4D + assert arr_result.shape == (4,) + assert arr_result.dims == ('time',) + assert np.array_equal(arr_result.values, arr_1d) + + # 2D array (4x2, matches time×scenario) stays 2D arr_2d = np.random.rand(4, 2) arr_2d_result = DataConverter.to_dataarray(arr_2d, coords=coords) - assert arr_2d_result.shape == (4, 2, 3, 5) - # Verify broadcasting - for region in coords['region']: - for product in coords['product']: - assert np.array_equal(arr_2d_result.sel(region=region, product=product).values, arr_2d) + assert arr_2d_result.shape == (4, 2) + assert arr_2d_result.dims == ('time', 'scenario') + assert np.array_equal(arr_2d_result.values, arr_2d) class TestAmbiguousDimensionLengthHandling: @@ -1020,11 +977,11 @@ def test_1d_array_ambiguous_dimensions_complex(self): with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_1d, coords=coords_4x4x4) - # Array matching the unique length should work + # Array matching the unique length should work (stays 1D, no broadcasting) arr_1d_unique = np.array([100, 200]) # length 2 - matches only product result = DataConverter.to_dataarray(arr_1d_unique, coords=coords_4x4x4) - assert result.shape == (4, 4, 4, 2) # broadcast to all dimensions - assert result.dims == ('time', 'scenario', 'region', 'product') + assert result.shape == (2,) # stays 1D + assert result.dims == ('product',) def test_2d_array_ambiguous_dimensions_both_same(self): """Test 2D array where both dimensions have the same ambiguous length.""" @@ -1111,11 +1068,11 @@ def test_pandas_series_ambiguous_dimensions(self): with pytest.raises(ConversionError, match='Series index does not match any target dimension coordinates'): DataConverter.to_dataarray(generic_series, coords=coords_ambiguous) - # Series with index that matches one of the ambiguous coordinates should work + # Series with index that matches one of the ambiguous coordinates should work (stays 1D) scenario_series = pd.Series([10, 20, 30], index=coords_ambiguous['scenario']) result = DataConverter.to_dataarray(scenario_series, coords=coords_ambiguous) - assert result.shape == (3, 3) # should broadcast to both dimensions - assert result.dims == ('scenario', 'region') + assert result.shape == (3,) # stays 1D + assert result.dims == ('scenario',) def test_edge_case_many_same_lengths(self): """Test edge case with many dimensions having the same length.""" @@ -1153,10 +1110,10 @@ def test_mixed_lengths_with_duplicates(self): 'product': pd.Index(['P1', 'P2', 'P3', 'P4', 'P5'], name='product'), # length 5 - unique } - # Arrays with unique lengths should work + # Arrays with unique lengths should work (stays minimal, no broadcasting) arr_8 = np.arange(8) result_8 = DataConverter.to_dataarray(arr_8, coords=coords_mixed) - assert result_8.dims == ('time', 'scenario', 'region', 'technology', 'product') + assert result_8.dims == ('time',) # Stays 1D, matches time dimension arr_1 = np.array([42]) result_1 = DataConverter.to_dataarray(arr_1, coords={'technology': coords_mixed['technology']}) @@ -1247,10 +1204,11 @@ def test_time_dimension_ambiguity(self): } # Time-indexed series should work even with ambiguous lengths (index matching takes precedence) + # Stays minimal - no broadcasting to other dimensions time_series = pd.Series([100, 200, 300], index=coords_time_ambiguous['time']) result = DataConverter.to_dataarray(time_series, coords=coords_time_ambiguous) - assert result.shape == (3, 3, 2) - assert result.dims == ('time', 'scenario', 'region') + assert result.shape == (3,) # Stays 1D + assert result.dims == ('time',) # Matches time via index # But generic array with length 3 should still fail generic_array = np.array([100, 200, 300]) diff --git a/tests/test_flow.py b/tests/test_flow.py index 275963348..aa75b3c66 100644 --- a/tests/test_flow.py +++ b/tests/test_flow.py @@ -4,7 +4,7 @@ import flixopt as fx -from .conftest import assert_conequal, assert_sets_equal, assert_var_equal, create_linopy_model +from .conftest import assert_conequal, assert_dims_compatible, assert_sets_equal, assert_var_equal, create_linopy_model class TestFlowModel: @@ -69,8 +69,8 @@ def test_flow(self, basic_flow_system_linopy_coords, coords_config): model.add_variables(lower=10, upper=1000, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + assert_dims_compatible(flow.relative_minimum, tuple(model.get_coords())) + assert_dims_compatible(flow.relative_maximum, tuple(model.get_coords())) assert_var_equal( flow.submodel.flow_rate, @@ -182,8 +182,8 @@ def test_flow_invest(self, basic_flow_system_linopy_coords, coords_config): model.add_variables(lower=20, upper=100, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + assert_dims_compatible(flow.relative_minimum, tuple(model.get_coords())) + assert_dims_compatible(flow.relative_maximum, tuple(model.get_coords())) # flow_rate assert_var_equal( @@ -247,8 +247,8 @@ def test_flow_invest_optional(self, basic_flow_system_linopy_coords, coords_conf model.add_variables(binary=True, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + assert_dims_compatible(flow.relative_minimum, tuple(model.get_coords())) + assert_dims_compatible(flow.relative_maximum, tuple(model.get_coords())) # flow_rate assert_var_equal( @@ -322,8 +322,8 @@ def test_flow_invest_optional_wo_min_size(self, basic_flow_system_linopy_coords, model.add_variables(binary=True, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + assert_dims_compatible(flow.relative_minimum, tuple(model.get_coords())) + assert_dims_compatible(flow.relative_maximum, tuple(model.get_coords())) # flow_rate assert_var_equal( @@ -390,8 +390,8 @@ def test_flow_invest_wo_min_size_non_optional(self, basic_flow_system_linopy_coo model.add_variables(lower=1e-5, upper=100, coords=model.get_coords(['period', 'scenario'])), ) - assert flow.relative_minimum.dims == tuple(model.get_coords()) - assert flow.relative_maximum.dims == tuple(model.get_coords()) + assert_dims_compatible(flow.relative_minimum, tuple(model.get_coords())) + assert_dims_compatible(flow.relative_maximum, tuple(model.get_coords())) # flow_rate assert_var_equal( @@ -629,8 +629,8 @@ def test_effects_per_active_hour(self, basic_flow_system_linopy_coords, coords_c costs_per_running_hour = flow.status_parameters.effects_per_active_hour['costs'] co2_per_running_hour = flow.status_parameters.effects_per_active_hour['CO2'] - assert costs_per_running_hour.dims == tuple(model.get_coords()) - assert co2_per_running_hour.dims == tuple(model.get_coords()) + assert_dims_compatible(costs_per_running_hour, tuple(model.get_coords())) + assert_dims_compatible(co2_per_running_hour, tuple(model.get_coords())) assert_conequal( model.constraints['Sink(Wärme)->costs(temporal)'], diff --git a/tests/test_io_conversion.py b/tests/test_io_conversion.py index 7775f987a..dffba1dfc 100644 --- a/tests/test_io_conversion.py +++ b/tests/test_io_conversion.py @@ -204,12 +204,12 @@ def test_custom_renames(self): assert 'custom_new' in result.attrs assert 'custom_old' not in result.attrs - def test_returns_same_object(self): - """Test that the function modifies and returns the same dataset object.""" + def test_returns_equivalent_dataset(self): + """Test that the function converts and returns equivalent dataset.""" ds = xr.Dataset(attrs={'minimum_operation': 100}) result = convert_old_dataset(ds) - # Note: attrs are modified in place, so the object should be the same - assert result is ds + # Check that attrs are converted + assert result.attrs == {'minimum_temporal': 100} class TestConvertOldNetcdf: diff --git a/tests/test_linear_converter.py b/tests/test_linear_converter.py index d20d104d0..c8fc3fb52 100644 --- a/tests/test_linear_converter.py +++ b/tests/test_linear_converter.py @@ -4,7 +4,7 @@ import flixopt as fx -from .conftest import assert_conequal, assert_var_equal, create_linopy_model +from .conftest import assert_conequal, assert_dims_compatible, assert_var_equal, create_linopy_model class TestLinearConverterModel: @@ -282,7 +282,7 @@ def test_edge_case_time_varying_conversion(self, basic_flow_system_linopy_coords factor = converter.conversion_factors[0]['electricity'] - assert factor.dims == tuple(model.get_coords()) + assert_dims_compatible(factor, tuple(model.get_coords())) # Verify the constraint has the time-varying coefficient assert_conequal(