Coverage for src/sensai/util/string.py: 60%

215 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-13 22:17 +0000

1import functools 

2import logging 

3import re 

4import sys 

5import types 

6from abc import ABC, abstractmethod 

7from typing import Union, List, Dict, Any, Sequence, Iterable, Optional, Mapping, Callable 

8 

9reCommaWhitespacePotentiallyBreaks = re.compile(r",\s+") 

10 

11log = logging.getLogger(__name__) 

12 

13 

14class StringConverter(ABC): 

15 """ 

16 Abstraction for a string conversion mechanism 

17 """ 

18 @abstractmethod 

19 def to_string(self, x) -> str: 

20 pass 

21 

22 

23def dict_string(d: Mapping, brackets: Optional[str] = None, converter: StringConverter = None): 

24 """ 

25 Converts a dictionary to a string of the form "<key>=<value>, <key>=<value>, ...", optionally enclosed 

26 by brackets 

27 

28 :param d: the dictionary 

29 :param brackets: a two-character string containing the opening and closing bracket to use, e.g. ``"{}"``; 

30 if None, do not use enclosing brackets 

31 :param converter: the string converter to use for values 

32 :return: the string representation 

33 """ 

34 s = ', '.join([f'{k}={to_string(v, converter=converter, context=k)}' for k, v in d.items()]) 

35 if brackets is not None: 

36 return brackets[:1] + s + brackets[-1:] 

37 else: 

38 return s 

39 

40 

41def list_string(l: Iterable[Any], brackets="[]", quote: Optional[str] = None, converter: StringConverter = None): 

42 """ 

43 Converts a list or any other iterable to a string of the form "[<value>, <value>, ...]", optionally enclosed 

44 by different brackets or with the values quoted. 

45 

46 :param l: the list 

47 :param brackets: a two-character string containing the opening and closing bracket to use, e.g. ``"[]"``; 

48 if None, do not use enclosing brackets 

49 :param quote: a 1-character string defining the quote to use around each value, e.g. ``"'"``. 

50 :param converter: the string converter to use for values 

51 :return: the string representation 

52 """ 

53 def item(x): 

54 x = to_string(x, converter=converter, context="list") 

55 if quote is not None: 

56 return quote + x + quote 

57 else: 

58 return x 

59 s = ", ".join((item(x) for x in l)) 

60 if brackets is not None: 

61 return brackets[:1] + s + brackets[-1:] 

62 else: 

63 return s 

64 

65 

66def to_string(x, converter: StringConverter = None, apply_converter_to_non_complex_objects=True, context=None): 

67 """ 

68 Converts the given object to a string, with proper handling of lists, tuples and dictionaries, optionally using a converter. 

69 The conversion also removes unwanted line breaks (as present, in particular, in sklearn's string representations). 

70 

71 :param x: the object to convert 

72 :param converter: the converter with which to convert objects to strings 

73 :param apply_converter_to_non_complex_objects: whether to apply/pass on the converter (if any) not only when converting complex objects 

74 but also non-complex, primitive objects; use of this flag enables converters to implement their conversion functionality using this 

75 function for complex objects without causing an infinite recursion. 

76 :param context: context in which the object is being converted (e.g. dictionary key for case where x is the corresponding 

77 dictionary value), only for debugging purposes (will be reported in log messages upon recursion exception) 

78 :return: the string representation 

79 """ 

80 try: 

81 if type(x) == list: 

82 return list_string(x, converter=converter) 

83 elif type(x) == tuple: 

84 return list_string(x, brackets="()", converter=converter) 

85 elif type(x) == dict: 

86 return dict_string(x, brackets="{}", converter=converter) 

87 elif type(x) == types.MethodType: 

88 # could be bound method of a ToStringMixin instance (which would print the repr of the instance, which can potentially cause 

89 # an infinite recursion) 

90 return f"Method[{x.__name__}]" 

91 else: 

92 if converter and apply_converter_to_non_complex_objects: 

93 s = converter.to_string(x) 

94 else: 

95 s = str(x) 

96 

97 # remove any unwanted line breaks and indentation after commas (as generated, for example, by sklearn objects) 

98 s = reCommaWhitespacePotentiallyBreaks.sub(", ", s) 

99 

100 return s 

101 except RecursionError as e: 

102 log.error(f"Recursion in string conversion detected; context={context}") 

103 raise 

104 

105 

106def object_repr(obj, member_names_or_dict: Union[List[str], Dict[str, Any]]): 

107 if type(member_names_or_dict) == dict: 

108 members_dict = member_names_or_dict 

109 else: 

110 members_dict = {m: to_string(getattr(obj, m)) for m in member_names_or_dict} 

111 return f"{obj.__class__.__name__}[{dict_string(members_dict)}]" 

112 

113 

114def or_regex_group(allowed_names: Sequence[str]): 

115 """ 

116 

117 :param allowed_names: strings to include as literals in the regex 

118 :return: a regular expression string of the form (<name1>| ...|<nameN>), which any of the given names 

119 """ 

120 allowed_names = [re.escape(name) for name in allowed_names] 

121 return r"(%s)" % "|".join(allowed_names) 

122 

123 

124def function_name(x: Callable) -> str: 

125 if isinstance(x, functools.partial): 

126 return function_name(x.func) 

127 elif hasattr(x, "__name__"): 

128 return x.__name__ 

129 else: 

130 return str(x) 

131 

132 

133class ToStringMixin: 

134 """ 

135 Provides implementations for ``__str__`` and ``__repr__`` which are based on the format ``"<class name>[<object info>]"`` and 

136 ``"<class name>[id=<object id>, <object info>]"`` respectively, where ``<object info>`` is usually a list of entries of the 

137 form ``"<name>=<value>, ..."``. 

138 

139 By default, ``<class name>`` will be the qualified name of the class, and ``<object info>`` will include all properties 

140 of the class, including private ones starting with an underscore (though the underscore will be dropped in the string 

141 representation). 

142 

143 * To exclude private properties, override :meth:`_tostring_exclude_private` to return True. If there are exceptions 

144 (and some private properties shall be retained), additionally override :meth:`_tostring_exclude_exceptions`. 

145 * To exclude a particular set of properties, override :meth:`_tostring_excludes`. 

146 * To include only select properties (introducing inclusion semantics), override :meth:`_tostring_includes`. 

147 * To add values to the properties list that aren't actually properties of the object (i.e. derived properties), 

148 override :meth:`_tostring_additional_entries`. 

149 * To define a fully custom representation for ``<object info>`` which is not based on the above principles, override 

150 :meth:`_tostring_object_info`. 

151 

152 For well-defined string conversions within a class hierarchy, it can be a good practice to define additional 

153 inclusions/exclusions by overriding the respective method once more and basing the return value on an extended 

154 version of the value returned by superclass. 

155 In some cases, the requirements of a subclass can be at odds with the definitions in the superclass: The superclass 

156 may make use of exclusion semantics, but the subclass may want to use inclusion semantics (and include 

157 only some of the many properties it adds). In this case, if the subclass used :meth:`_tostring_includes`, the exclusion semantics 

158 of the superclass would be void and none of its properties would actually be included. 

159 In such cases, override :meth:`_tostring_includes_forced` to add inclusions regardless of the semantics otherwise used along 

160 the class hierarchy. 

161 

162 .. document private functions 

163 .. automethod:: _tostring_class_name 

164 .. automethod:: _tostring_object_info 

165 .. automethod:: _tostring_excludes 

166 .. automethod:: _tostring_exclude_exceptions 

167 .. automethod:: _tostring_includes 

168 .. automethod:: _tostring_includes_forced 

169 .. automethod:: _tostring_additional_entries 

170 .. automethod:: _tostring_exclude_private 

171 """ 

172 _TOSTRING_INCLUDE_ALL = "__all__" 

173 

174 def _tostring_class_name(self): 

175 """ 

176 :return: the string use for <class name> in the string representation ``"<class name>[<object info]"`` 

177 """ 

178 return type(self).__qualname__ 

179 

180 def _tostring_properties(self, 

181 exclude: Optional[Union[str, Iterable[str]]] = None, 

182 include: Optional[Union[str, Iterable[str]]] = None, 

183 exclude_exceptions: Optional[List[str]] = None, 

184 include_forced: Optional[List[str]] = None, 

185 additional_entries: Dict[str, Any] = None, 

186 converter: StringConverter = None) -> str: 

187 """ 

188 Creates a string of the class attributes, with optional exclusions/inclusions/additions. 

189 Exclusions take precedence over inclusions. 

190 

191 :param exclude: attributes to be excluded 

192 :param include: attributes to be included; if non-empty, only the specified attributes will be printed (bar the ones 

193 excluded by ``exclude``) 

194 :param include_forced: additional attributes to be included 

195 :param additional_entries: additional key-value entries to be added 

196 :param converter: the string converter to use; if None, use default (which avoids infinite recursions) 

197 :return: a string containing entry/property names and values 

198 """ 

199 def mklist(x): 

200 if x is None: 

201 return [] 

202 if type(x) == str: 

203 return [x] 

204 return x 

205 

206 exclude = mklist(exclude) 

207 include = mklist(include) 

208 include_forced = mklist(include_forced) 

209 exclude_exceptions = mklist(exclude_exceptions) 

210 

211 def is_excluded(k): 

212 if k in include_forced or k in exclude_exceptions: 

213 return False 

214 if k in exclude: 

215 return True 

216 if self._tostring_exclude_private(): 

217 is_private = k.startswith("_") 

218 return is_private 

219 else: 

220 return False 

221 

222 # determine relevant attribute dictionary 

223 if len(include) == 1 and include[0] == self._TOSTRING_INCLUDE_ALL: # exclude semantics (include everything by default) 

224 attribute_dict = self.__dict__ 

225 else: # include semantics (include only inclusions) 

226 attribute_dict = {k: getattr(self, k) for k in set(include + include_forced) 

227 if hasattr(self, k) and k != self._TOSTRING_INCLUDE_ALL} 

228 

229 # apply exclusions and remove underscores from attribute names 

230 d = {k.strip("_"): v for k, v in attribute_dict.items() if not is_excluded(k)} 

231 

232 if additional_entries is not None: 

233 d.update(additional_entries) 

234 

235 if converter is None: 

236 converter = self._StringConverterAvoidToStringMixinRecursion(self) 

237 return dict_string(d, converter=converter) 

238 

239 def _tostring_object_info(self) -> str: 

240 """ 

241 Override this method to use a fully custom definition of the ``<object info>`` part in the full string 

242 representation ``"<class name>[<object info>]"`` to be generated. 

243 As soon as this method is overridden, any property-based exclusions, inclusions, etc. will have no effect 

244 (unless the implementation is specifically designed to make use of them - as is the default 

245 implementation). 

246 NOTE: Overrides must not internally use super() because of a technical limitation in the proxy 

247 object that is used for nested object structures. 

248 

249 :return: a string containing the string to use for ``<object info>`` 

250 """ 

251 return self._tostring_properties(exclude=self._tostring_excludes(), include=self._tostring_includes(), 

252 exclude_exceptions=self._tostring_exclude_exceptions(), include_forced=self._tostring_includes_forced(), 

253 additional_entries=self._tostring_additional_entries()) 

254 

255 def _tostring_excludes(self) -> List[str]: 

256 """ 

257 Makes the string representation exclude the returned attributes. 

258 This method can be conveniently overridden by subclasses which can call super and extend the list returned. 

259 

260 This method will only have no effect if :meth:`_tostring_object_info` is overridden to not use its result. 

261 

262 :return: a list of attribute names 

263 """ 

264 return [] 

265 

266 def _tostring_includes(self) -> List[str]: 

267 """ 

268 Makes the string representation include only the returned attributes (i.e. introduces inclusion semantics); 

269 By default, the list contains only a marker element, which is interpreted as "all attributes included". 

270 

271 This method can be conveniently overridden by sub-classes which can call super and extend the list returned. 

272 Note that it is not a problem for a list containing the aforementioned marker element (which stands for all attributes) 

273 to be extended; the marker element will be ignored and only the user-added elements will be considered as included. 

274 

275 Note: To add an included attribute in a sub-class, regardless of any super-classes using exclusion or inclusion semantics, 

276 use :meth:`_tostring_includes_forced` instead. 

277 

278 This method will have no effect if :meth:`_tostring_object_info` is overridden to not use its result. 

279 

280 :return: a list of attribute names to be included in the string representation 

281 """ 

282 return [self._TOSTRING_INCLUDE_ALL] 

283 

284 # noinspection PyMethodMayBeStatic 

285 def _tostring_includes_forced(self) -> List[str]: 

286 """ 

287 Defines a list of attribute names that are required to be present in the string representation, regardless of the 

288 instance using include semantics or exclude semantics, thus facilitating added inclusions in sub-classes. 

289 

290 This method will have no effect if :meth:`_tostring_object_info` is overridden to not use its result. 

291 

292 :return: a list of attribute names 

293 """ 

294 return [] 

295 

296 def _tostring_additional_entries(self) -> Dict[str, Any]: 

297 """ 

298 :return: a dictionary of entries to be included in the ``<object info>`` part of the string representation 

299 """ 

300 return {} 

301 

302 def _tostring_exclude_private(self) -> bool: 

303 """ 

304 :return: whether to exclude properties that are private (start with an underscore); explicitly included attributes 

305 will still be considered - as will properties exempt from the rule via :meth:`_tostring_exclude_exceptions`. 

306 """ 

307 return False 

308 

309 def _tostring_exclude_exceptions(self) -> List[str]: 

310 """ 

311 Defines attribute names which should not be excluded even though other rules (particularly the exclusion of private members 

312 via :meth:`_tostring_exclude_private`) would otherwise exclude them. 

313 

314 :return: a list of attribute names 

315 """ 

316 return [] 

317 

318 def __str__(self): 

319 return f"{self._tostring_class_name()}[{self._tostring_object_info()}]" 

320 

321 def __repr__(self): 

322 info = f"id={id(self)}" 

323 property_info = self._tostring_object_info() 

324 if len(property_info) > 0: 

325 info += ", " + property_info 

326 return f"{self._tostring_class_name()}[{info}]" 

327 

328 def pprint(self, file=sys.stdout): 

329 """ 

330 Prints a prettily formatted string representation of the object (with line breaks and indentations) 

331 to ``stdout`` or the given file. 

332 

333 :param file: the file to print to 

334 """ 

335 print(self.pprints(), file=file) 

336 

337 def pprints(self) -> str: 

338 """ 

339 :return: a prettily formatted string representation with line breaks and indentations 

340 """ 

341 return pretty_string_repr(self) 

342 

343 class _StringConverterAvoidToStringMixinRecursion(StringConverter): 

344 """ 

345 Avoids recursions when converting objects implementing :class:`ToStringMixin` which may contain themselves to strings. 

346 Use of this object prevents infinite recursions caused by a :class:`ToStringMixin` instance recursively containing itself in 

347 either a property of another :class:`ToStringMixin`, a list or a tuple. 

348 It handles all :class:`ToStringMixin` instances recursively encountered. 

349 

350 A previously handled instance is converted to a string of the form "<class name>[<<]". 

351 """ 

352 def __init__(self, *handled_objects: "ToStringMixin"): 

353 """ 

354 :param handled_objects: objects which are initially assumed to have been handled already 

355 """ 

356 self._handled_to_string_mixin_ids = set([id(o) for o in handled_objects]) 

357 

358 def to_string(self, x) -> str: 

359 if isinstance(x, ToStringMixin): 

360 oid = id(x) 

361 if oid in self._handled_to_string_mixin_ids: 

362 return f"{x._tostring_class_name()}[<<]" 

363 self._handled_to_string_mixin_ids.add(oid) 

364 return str(self._ToStringMixinProxy(x, self)) 

365 else: 

366 return to_string(x, converter=self, apply_converter_to_non_complex_objects=False, context=x.__class__) 

367 

368 class _ToStringMixinProxy: 

369 """ 

370 A proxy object which wraps a ToStringMixin to ensure that the converter is applied when creating the properties string. 

371 The proxy is to achieve that all ToStringMixin methods that aren't explicitly overwritten are bound to this proxy 

372 (rather than the original object), such that the transitive call to `_tostring_properties` will call the new 

373 implementation. 

374 """ 

375 

376 # methods where we assume that they could transitively call `_tostring_properties` (others are assumed not to) 

377 TOSTRING_METHODS_TRANSITIVELY_CALLING_TOSTRINGPROPERTIES = {"_tostring_object_info"} 

378 

379 def __init__(self, x: "ToStringMixin", converter): 

380 self.x = x 

381 self.converter = converter 

382 

383 def _tostring_properties(self, *args, **kwargs): 

384 return self.x._tostring_properties(*args, **kwargs, converter=self.converter) 

385 

386 def _tostring_class_name(self): 

387 return self.x._tostring_class_name() 

388 

389 def __getattr__(self, attr: str): 

390 if attr.startswith("_tostring"): # ToStringMixin method which we may bind to use this proxy to ensure correct transitive call 

391 method = getattr(self.x.__class__, attr) 

392 obj = self if attr in self.TOSTRING_METHODS_TRANSITIVELY_CALLING_TOSTRINGPROPERTIES else self.x 

393 return lambda *args, **kwargs: method(obj, *args, **kwargs) 

394 else: 

395 return getattr(self.x, attr) 

396 

397 def __str__(self: "ToStringMixin"): 

398 return ToStringMixin.__str__(self) 

399 

400 

401def pretty_string_repr(s: Any, initial_indentation_level=0, indentation_string=" "): 

402 """ 

403 Creates a pretty string representation (using indentations) from the given object/string representation (as generated, for example, via 

404 ToStringMixin). An indentation level is added for every opening bracket. 

405 

406 :param s: an object or object string representation 

407 :param initial_indentation_level: the initial indentation level 

408 :param indentation_string: the string which corresponds to a single indentation level 

409 :return: a reformatted version of the input string with added indentations and line breaks 

410 """ 

411 if type(s) != str: 

412 s = str(s) 

413 indent = initial_indentation_level 

414 result = indentation_string * indent 

415 i = 0 

416 

417 def nl(): 

418 nonlocal result 

419 result += "\n" + (indentation_string * indent) 

420 

421 def take(cnt=1): 

422 nonlocal result, i 

423 result += s[i:i+cnt] 

424 i += cnt 

425 

426 def find_matching(j): 

427 start = j 

428 op = s[j] 

429 cl = {"[": "]", "(": ")", "'": "'"}[s[j]] 

430 is_bracket = cl != s[j] 

431 stack = 0 

432 while j < len(s): 

433 if s[j] == op and (is_bracket or j == start): 

434 stack += 1 

435 elif s[j] == cl: 

436 stack -= 1 

437 if stack == 0: 

438 return j 

439 j += 1 

440 return None 

441 

442 brackets = "[(" 

443 quotes = "'" 

444 while i < len(s): 

445 is_bracket = s[i] in brackets 

446 is_quote = s[i] in quotes 

447 if is_bracket or is_quote: 

448 i_match = find_matching(i) 

449 take_full_match_without_break = False 

450 if i_match is not None: 

451 k = i_match + 1 

452 full_match = s[i:k] 

453 take_full_match_without_break = is_quote or not("=" in full_match and "," in full_match) 

454 if take_full_match_without_break: 

455 take(k-i) 

456 if not take_full_match_without_break: 

457 take(1) 

458 indent += 1 

459 nl() 

460 elif s[i] in "])": 

461 take(1) 

462 indent -= 1 

463 elif s[i:i+2] == ", ": 

464 take(2) 

465 nl() 

466 else: 

467 take(1) 

468 

469 return result 

470 

471 

472class TagBuilder: 

473 """ 

474 Assists in building strings made up of components that are joined via a glue string 

475 """ 

476 def __init__(self, *initial_components: str, glue="_"): 

477 """ 

478 :param initial_components: initial components to always include at the beginning 

479 :param glue: the glue string which joins components 

480 """ 

481 self.glue = glue 

482 self.components = list(initial_components) 

483 

484 def with_component(self, component: str): 

485 self.components.append(component) 

486 return self 

487 

488 def with_conditional(self, cond: bool, component: str): 

489 """ 

490 Conditionally adds the given component 

491 

492 :param cond: the condition 

493 :param component: the component to add if the condition holds 

494 :return: the builder 

495 """ 

496 if cond: 

497 self.components.append(component) 

498 return self 

499 

500 def with_alternative(self, cond: bool, true_component: str, false_component: str): 

501 """ 

502 Adds a component depending on a condition 

503 

504 :param cond: the condition 

505 :param true_component: the component to add if the condition holds 

506 :param false_component: the component to add if the condition does not hold 

507 :return: the builder 

508 """ 

509 self.components.append(true_component if cond else false_component) 

510 return self 

511 

512 def build(self): 

513 """ 

514 :return: the string (with all components joined) 

515 """ 

516 return self.glue.join(self.components)