Coverage for src/sensai/util/sequences.py: 65%

92 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-29 18:29 +0000

1from bisect import bisect_right, bisect_left 

2from typing import Optional, TypeVar, Sequence, Any, List 

3 

4T = TypeVar("T") 

5 

6 

7def concat_sequences(seqs: Sequence[Sequence[T]]) -> List[T]: 

8 result = [] 

9 for s in seqs: 

10 result.extend(s) 

11 return result 

12 

13 

14def get_first_duplicate(seq: Sequence[T]) -> Optional[T]: 

15 """ 

16 Returns the first duplicate in a sequence or None 

17 

18 :param seq: a sequence of hashable elements 

19 :return: 

20 """ 

21 set_of_elems = set() 

22 for elem in seq: 

23 if elem in set_of_elems: 

24 return elem 

25 set_of_elems.add(elem) 

26 

27 

28def floor_index(arr, value) -> Optional[int]: 

29 """ 

30 Finds the rightmost/largest index in a sorted array where the value is less than or equal to the given value 

31 

32 :param arr: the sorted array of values 

33 :param value: the value to search for 

34 :return: the index or None if there is no such index 

35 """ 

36 idx = bisect_right(arr, value) 

37 if idx: 

38 return idx - 1 

39 return None 

40 

41 

42def ceil_index(arr, value) -> Optional[int]: 

43 """ 

44 finds the leftmost/lowest index in a sorted array where the value is greater than or equal to the given value 

45 

46 :param arr: the sorted array of values 

47 :param value: the value to search for 

48 :return: the index or None if there is no such index 

49 """ 

50 idx = bisect_left(arr, value) 

51 if idx != len(arr): 

52 return idx 

53 return None 

54 

55 

56def closest_index(arr, value) -> Optional[int]: 

57 """ 

58 Finds the index in the given array where the value is closest to the given value. 

59 If two subsequent values have the same distance, the smaller index is returned. 

60 

61 :param arr: the array to search in 

62 :param value: the value to search for 

63 :return: the index or None if the array is empty 

64 """ 

65 length = len(arr) 

66 if length == 0: 

67 return None 

68 floor_idx = floor_index(arr, value) 

69 if floor_idx is None: 

70 return 0 

71 ceil_idx = floor_idx + 1 

72 if ceil_idx >= length: 

73 return floor_idx 

74 floor_val = arr[floor_idx] 

75 ceil_val = arr[ceil_idx] 

76 floor_dist = abs(floor_val - value) 

77 ceil_dist = abs(ceil_val - value) 

78 if floor_dist <= ceil_dist: 

79 return floor_idx 

80 else: 

81 return ceil_idx 

82 

83 

84def ceil_value(keys, key_value, values=None) -> Optional[Any]: 

85 """ 

86 For a sorted array of keys (and a an array of corresponding values), 

87 returns the corresponding value (values[i]) for the lowest index i where keys[i] >= keyValue 

88 

89 :param keys: the sorted array of keys for in which to search for the value closest to `keyValue` 

90 :param key_value: the key to search for 

91 :param values: the array from which to retrieve the value; if None, use `keys` 

92 :return: the value or None if no such value exists 

93 """ 

94 idx = ceil_index(keys, key_value) 

95 if idx is None: 

96 return None 

97 values = values if values is not None else keys 

98 return values[idx] 

99 

100 

101def floor_value(keys, key_value, values=None, fallback_first=False) -> Optional[Any]: 

102 """ 

103 For a sorted array of keys (and an array of corresponding values), 

104 returns the corresponding value (values[i]) for the largest index i in keys where keys[i] <= keyValue. 

105 

106 :param keys: the sorted array of keys in which to perform the lookup 

107 :param key_value: the key for which to perform the lookup 

108 :param values: the sorted array of values; if None, use keys as values 

109 :param fallback_first: if True, then return the first value instead of None for the case where no floor value exists 

110 :return: the floor value 

111 """ 

112 idx = floor_index(keys, key_value) 

113 values = values if values is not None else keys 

114 if idx is None: 

115 if fallback_first: 

116 return values[0] 

117 return None 

118 return values[idx] 

119 

120 

121def closest_value(keys, key_value, values=None) -> Optional[Any]: 

122 """ 

123 For a sorted array of keys (and an array of corresponding values), 

124 finds the value at the index where the key is closest to the given key value. 

125 If two subsequent values are equally close, the value at the smaller index is returned. 

126 """ 

127 idx = closest_index(keys, key_value) 

128 if idx is None: 

129 return None 

130 if values is None: 

131 values = keys 

132 return values[idx] 

133 

134 

135def value_slice_inner(keys, lower_bound_key, upper_bound_key, values=None): 

136 """ 

137 For a sorted array of keys (and an array of corresponding values), 

138 finds indices i, j such that i is the lowest key where keys[i] >= lowerBoundKey and 

139 j is the largest key where keys[j] <= upperBoundKey, 

140 and returns the corresponding slice of values values[i:j+1], 

141 i.e. the slice will not include the bounds keys if they are not present in the keys array. 

142 

143 :param keys: the sorted array of key values 

144 :param lower_bound_key: the key value defining the lower bound 

145 :param upper_bound_key: the key value defining the upper bound 

146 :param values: the sorted array of values; if None, use keys 

147 :return: the corresponding slice of `values` 

148 """ 

149 if values is None: 

150 values = keys 

151 elif len(values) != len(keys): 

152 raise ValueError("Length of values must match length of keys") 

153 if lower_bound_key > upper_bound_key: 

154 raise ValueError(f"Invalid bounds: {lower_bound_key} to {upper_bound_key}") 

155 first_idx = ceil_index(keys, lower_bound_key) 

156 last_idx = floor_index(keys, upper_bound_key) 

157 if first_idx is None or last_idx is None: 

158 return values[0:0] # empty slice 

159 return values[first_idx:last_idx+1] 

160 

161 

162def value_slice_outer(keys, lower_bound_key, upper_bound_key, values=None, fallback_bounds=False): 

163 """ 

164 For a sorted array of keys and an array of corresponding values, 

165 finds indices i, j such that i is the largest key where keys[i] <= lowerBoundKey and 

166 j is the lowest key where keys[j] <= upperBoundKey, 

167 and returns the corresponding slice of values values[i:j+1]. 

168 If such indices do not exists and fallbackBounds==True, the array bounds are used (i.e. 0 or len-1). 

169 If such indices do not exists and fallbackBounds==False, an exception is raised. 

170 This returned slice is an outer slice, which is the smallest slice that definitely contains two given bounds 

171 (for fallbackBounds==False). 

172 

173 :param keys: the sorted array of key values 

174 :param lower_bound_key: the key value defining the lower bound 

175 :param upper_bound_key: the key value defining the upper bound 

176 :param values: the sorted array of values; if None, use keys 

177 :param fallback_bounds: whether to use the smallest/largest index (i.e. 0 or len-1) as a fallback in case no matching bounds exist 

178 :return: the corresponding slice of `values` 

179 """ 

180 if values is None: 

181 values = keys 

182 elif len(values) != len(keys): 

183 raise ValueError("Length of values must match length of keys") 

184 if lower_bound_key > upper_bound_key: 

185 raise ValueError(f"Invalid bounds: {lower_bound_key} to {upper_bound_key}") 

186 first_idx = floor_index(keys, lower_bound_key) 

187 last_idx = ceil_index(keys, upper_bound_key) 

188 if first_idx is None: 

189 if fallback_bounds: 

190 first_idx = 0 

191 else: 

192 raise Exception(f"Bounds would exceed start of array (lowerBoundKey={lower_bound_key}, lowest key={keys[0]})") 

193 if last_idx is None: 

194 if fallback_bounds: 

195 last_idx = len(keys) - 1 

196 else: 

197 raise Exception(f"Bounds would exceed end of array (upperBoundKey={upper_bound_key}, largest key={keys[-1]})") 

198 return values[first_idx:last_idx+1]