Coverage for src/sensai/util/string.py: 60%
215 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-29 18:29 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-29 18:29 +0000
1import functools
2import logging
3import re
4import sys
5import types
6from abc import ABC, abstractmethod
7from typing import Union, List, Dict, Any, Sequence, Iterable, Optional, Mapping, Callable
9reCommaWhitespacePotentiallyBreaks = re.compile(r",\s+")
11log = logging.getLogger(__name__)
14class StringConverter(ABC):
15 """
16 Abstraction for a string conversion mechanism
17 """
18 @abstractmethod
19 def to_string(self, x) -> str:
20 pass
23def dict_string(d: Mapping, brackets: Optional[str] = None, converter: StringConverter = None):
24 """
25 Converts a dictionary to a string of the form "<key>=<value>, <key>=<value>, ...", optionally enclosed
26 by brackets
28 :param d: the dictionary
29 :param brackets: a two-character string containing the opening and closing bracket to use, e.g. ``"{}"``;
30 if None, do not use enclosing brackets
31 :param converter: the string converter to use for values
32 :return: the string representation
33 """
34 s = ', '.join([f'{k}={to_string(v, converter=converter, context=k)}' for k, v in d.items()])
35 if brackets is not None:
36 return brackets[:1] + s + brackets[-1:]
37 else:
38 return s
41def list_string(l: Iterable[Any], brackets="[]", quote: Optional[str] = None, converter: StringConverter = None):
42 """
43 Converts a list or any other iterable to a string of the form "[<value>, <value>, ...]", optionally enclosed
44 by different brackets or with the values quoted.
46 :param l: the list
47 :param brackets: a two-character string containing the opening and closing bracket to use, e.g. ``"[]"``;
48 if None, do not use enclosing brackets
49 :param quote: a 1-character string defining the quote to use around each value, e.g. ``"'"``.
50 :param converter: the string converter to use for values
51 :return: the string representation
52 """
53 def item(x):
54 x = to_string(x, converter=converter, context="list")
55 if quote is not None:
56 return quote + x + quote
57 else:
58 return x
59 s = ", ".join((item(x) for x in l))
60 if brackets is not None:
61 return brackets[:1] + s + brackets[-1:]
62 else:
63 return s
66def to_string(x, converter: StringConverter = None, apply_converter_to_non_complex_objects=True, context=None):
67 """
68 Converts the given object to a string, with proper handling of lists, tuples and dictionaries, optionally using a converter.
69 The conversion also removes unwanted line breaks (as present, in particular, in sklearn's string representations).
71 :param x: the object to convert
72 :param converter: the converter with which to convert objects to strings
73 :param apply_converter_to_non_complex_objects: whether to apply/pass on the converter (if any) not only when converting complex objects
74 but also non-complex, primitive objects; use of this flag enables converters to implement their conversion functionality using this
75 function for complex objects without causing an infinite recursion.
76 :param context: context in which the object is being converted (e.g. dictionary key for case where x is the corresponding
77 dictionary value), only for debugging purposes (will be reported in log messages upon recursion exception)
78 :return: the string representation
79 """
80 try:
81 if type(x) == list:
82 return list_string(x, converter=converter)
83 elif type(x) == tuple:
84 return list_string(x, brackets="()", converter=converter)
85 elif type(x) == dict:
86 return dict_string(x, brackets="{}", converter=converter)
87 elif type(x) == types.MethodType:
88 # could be bound method of a ToStringMixin instance (which would print the repr of the instance, which can potentially cause
89 # an infinite recursion)
90 return f"Method[{x.__name__}]"
91 else:
92 if converter and apply_converter_to_non_complex_objects:
93 s = converter.to_string(x)
94 else:
95 s = str(x)
97 # remove any unwanted line breaks and indentation after commas (as generated, for example, by sklearn objects)
98 s = reCommaWhitespacePotentiallyBreaks.sub(", ", s)
100 return s
101 except RecursionError as e:
102 log.error(f"Recursion in string conversion detected; context={context}")
103 raise
106def object_repr(obj, member_names_or_dict: Union[List[str], Dict[str, Any]]):
107 if type(member_names_or_dict) == dict:
108 members_dict = member_names_or_dict
109 else:
110 members_dict = {m: to_string(getattr(obj, m)) for m in member_names_or_dict}
111 return f"{obj.__class__.__name__}[{dict_string(members_dict)}]"
114def or_regex_group(allowed_names: Sequence[str]):
115 """
117 :param allowed_names: strings to include as literals in the regex
118 :return: a regular expression string of the form (<name1>| ...|<nameN>), which any of the given names
119 """
120 allowed_names = [re.escape(name) for name in allowed_names]
121 return r"(%s)" % "|".join(allowed_names)
124def function_name(x: Callable) -> str:
125 if isinstance(x, functools.partial):
126 return function_name(x.func)
127 elif hasattr(x, "__name__"):
128 return x.__name__
129 else:
130 return str(x)
133class ToStringMixin:
134 """
135 Provides implementations for ``__str__`` and ``__repr__`` which are based on the format ``"<class name>[<object info>]"`` and
136 ``"<class name>[id=<object id>, <object info>]"`` respectively, where ``<object info>`` is usually a list of entries of the
137 form ``"<name>=<value>, ..."``.
139 By default, ``<class name>`` will be the qualified name of the class, and ``<object info>`` will include all properties
140 of the class, including private ones starting with an underscore (though the underscore will be dropped in the string
141 representation).
143 * To exclude private properties, override :meth:`_tostring_exclude_private` to return True. If there are exceptions
144 (and some private properties shall be retained), additionally override :meth:`_tostring_exclude_exceptions`.
145 * To exclude a particular set of properties, override :meth:`_tostring_excludes`.
146 * To include only select properties (introducing inclusion semantics), override :meth:`_tostring_includes`.
147 * To add values to the properties list that aren't actually properties of the object (i.e. derived properties),
148 override :meth:`_tostring_additional_entries`.
149 * To define a fully custom representation for ``<object info>`` which is not based on the above principles, override
150 :meth:`_tostring_object_info`.
152 For well-defined string conversions within a class hierarchy, it can be a good practice to define additional
153 inclusions/exclusions by overriding the respective method once more and basing the return value on an extended
154 version of the value returned by superclass.
155 In some cases, the requirements of a subclass can be at odds with the definitions in the superclass: The superclass
156 may make use of exclusion semantics, but the subclass may want to use inclusion semantics (and include
157 only some of the many properties it adds). In this case, if the subclass used :meth:`_tostring_includes`, the exclusion semantics
158 of the superclass would be void and none of its properties would actually be included.
159 In such cases, override :meth:`_tostring_includes_forced` to add inclusions regardless of the semantics otherwise used along
160 the class hierarchy.
162 .. document private functions
163 .. automethod:: _tostring_class_name
164 .. automethod:: _tostring_object_info
165 .. automethod:: _tostring_excludes
166 .. automethod:: _tostring_exclude_exceptions
167 .. automethod:: _tostring_includes
168 .. automethod:: _tostring_includes_forced
169 .. automethod:: _tostring_additional_entries
170 .. automethod:: _tostring_exclude_private
171 """
172 _TOSTRING_INCLUDE_ALL = "__all__"
174 def _tostring_class_name(self):
175 """
176 :return: the string use for <class name> in the string representation ``"<class name>[<object info]"``
177 """
178 return type(self).__qualname__
180 def _tostring_properties(self,
181 exclude: Optional[Union[str, Iterable[str]]] = None,
182 include: Optional[Union[str, Iterable[str]]] = None,
183 exclude_exceptions: Optional[List[str]] = None,
184 include_forced: Optional[List[str]] = None,
185 additional_entries: Dict[str, Any] = None,
186 converter: StringConverter = None) -> str:
187 """
188 Creates a string of the class attributes, with optional exclusions/inclusions/additions.
189 Exclusions take precedence over inclusions.
191 :param exclude: attributes to be excluded
192 :param include: attributes to be included; if non-empty, only the specified attributes will be printed (bar the ones
193 excluded by ``exclude``)
194 :param include_forced: additional attributes to be included
195 :param additional_entries: additional key-value entries to be added
196 :param converter: the string converter to use; if None, use default (which avoids infinite recursions)
197 :return: a string containing entry/property names and values
198 """
199 def mklist(x):
200 if x is None:
201 return []
202 if type(x) == str:
203 return [x]
204 return x
206 exclude = mklist(exclude)
207 include = mklist(include)
208 include_forced = mklist(include_forced)
209 exclude_exceptions = mklist(exclude_exceptions)
211 def is_excluded(k):
212 if k in include_forced or k in exclude_exceptions:
213 return False
214 if k in exclude:
215 return True
216 if self._tostring_exclude_private():
217 is_private = k.startswith("_")
218 return is_private
219 else:
220 return False
222 # determine relevant attribute dictionary
223 if len(include) == 1 and include[0] == self._TOSTRING_INCLUDE_ALL: # exclude semantics (include everything by default)
224 attribute_dict = self.__dict__
225 else: # include semantics (include only inclusions)
226 attribute_dict = {k: getattr(self, k) for k in set(include + include_forced)
227 if hasattr(self, k) and k != self._TOSTRING_INCLUDE_ALL}
229 # apply exclusions and remove underscores from attribute names
230 d = {k.strip("_"): v for k, v in attribute_dict.items() if not is_excluded(k)}
232 if additional_entries is not None:
233 d.update(additional_entries)
235 if converter is None:
236 converter = self._StringConverterAvoidToStringMixinRecursion(self)
237 return dict_string(d, converter=converter)
239 def _tostring_object_info(self) -> str:
240 """
241 Override this method to use a fully custom definition of the ``<object info>`` part in the full string
242 representation ``"<class name>[<object info>]"`` to be generated.
243 As soon as this method is overridden, any property-based exclusions, inclusions, etc. will have no effect
244 (unless the implementation is specifically designed to make use of them - as is the default
245 implementation).
246 NOTE: Overrides must not internally use super() because of a technical limitation in the proxy
247 object that is used for nested object structures.
249 :return: a string containing the string to use for ``<object info>``
250 """
251 return self._tostring_properties(exclude=self._tostring_excludes(), include=self._tostring_includes(),
252 exclude_exceptions=self._tostring_exclude_exceptions(), include_forced=self._tostring_includes_forced(),
253 additional_entries=self._tostring_additional_entries())
255 def _tostring_excludes(self) -> List[str]:
256 """
257 Makes the string representation exclude the returned attributes.
258 This method can be conveniently overridden by subclasses which can call super and extend the list returned.
260 This method will only have no effect if :meth:`_tostring_object_info` is overridden to not use its result.
262 :return: a list of attribute names
263 """
264 return []
266 def _tostring_includes(self) -> List[str]:
267 """
268 Makes the string representation include only the returned attributes (i.e. introduces inclusion semantics);
269 By default, the list contains only a marker element, which is interpreted as "all attributes included".
271 This method can be conveniently overridden by sub-classes which can call super and extend the list returned.
272 Note that it is not a problem for a list containing the aforementioned marker element (which stands for all attributes)
273 to be extended; the marker element will be ignored and only the user-added elements will be considered as included.
275 Note: To add an included attribute in a sub-class, regardless of any super-classes using exclusion or inclusion semantics,
276 use :meth:`_tostring_includes_forced` instead.
278 This method will have no effect if :meth:`_tostring_object_info` is overridden to not use its result.
280 :return: a list of attribute names to be included in the string representation
281 """
282 return [self._TOSTRING_INCLUDE_ALL]
284 # noinspection PyMethodMayBeStatic
285 def _tostring_includes_forced(self) -> List[str]:
286 """
287 Defines a list of attribute names that are required to be present in the string representation, regardless of the
288 instance using include semantics or exclude semantics, thus facilitating added inclusions in sub-classes.
290 This method will have no effect if :meth:`_tostring_object_info` is overridden to not use its result.
292 :return: a list of attribute names
293 """
294 return []
296 def _tostring_additional_entries(self) -> Dict[str, Any]:
297 """
298 :return: a dictionary of entries to be included in the ``<object info>`` part of the string representation
299 """
300 return {}
302 def _tostring_exclude_private(self) -> bool:
303 """
304 :return: whether to exclude properties that are private (start with an underscore); explicitly included attributes
305 will still be considered - as will properties exempt from the rule via :meth:`_tostring_exclude_exceptions`.
306 """
307 return False
309 def _tostring_exclude_exceptions(self) -> List[str]:
310 """
311 Defines attribute names which should not be excluded even though other rules (particularly the exclusion of private members
312 via :meth:`_tostring_exclude_private`) would otherwise exclude them.
314 :return: a list of attribute names
315 """
316 return []
318 def __str__(self):
319 return f"{self._tostring_class_name()}[{self._tostring_object_info()}]"
321 def __repr__(self):
322 info = f"id={id(self)}"
323 property_info = self._tostring_object_info()
324 if len(property_info) > 0:
325 info += ", " + property_info
326 return f"{self._tostring_class_name()}[{info}]"
328 def pprint(self, file=sys.stdout):
329 """
330 Prints a prettily formatted string representation of the object (with line breaks and indentations)
331 to ``stdout`` or the given file.
333 :param file: the file to print to
334 """
335 print(self.pprints(), file=file)
337 def pprints(self) -> str:
338 """
339 :return: a prettily formatted string representation with line breaks and indentations
340 """
341 return pretty_string_repr(self)
343 class _StringConverterAvoidToStringMixinRecursion(StringConverter):
344 """
345 Avoids recursions when converting objects implementing :class:`ToStringMixin` which may contain themselves to strings.
346 Use of this object prevents infinite recursions caused by a :class:`ToStringMixin` instance recursively containing itself in
347 either a property of another :class:`ToStringMixin`, a list or a tuple.
348 It handles all :class:`ToStringMixin` instances recursively encountered.
350 A previously handled instance is converted to a string of the form "<class name>[<<]".
351 """
352 def __init__(self, *handled_objects: "ToStringMixin"):
353 """
354 :param handled_objects: objects which are initially assumed to have been handled already
355 """
356 self._handled_to_string_mixin_ids = set([id(o) for o in handled_objects])
358 def to_string(self, x) -> str:
359 if isinstance(x, ToStringMixin):
360 oid = id(x)
361 if oid in self._handled_to_string_mixin_ids:
362 return f"{x._tostring_class_name()}[<<]"
363 self._handled_to_string_mixin_ids.add(oid)
364 return str(self._ToStringMixinProxy(x, self))
365 else:
366 return to_string(x, converter=self, apply_converter_to_non_complex_objects=False, context=x.__class__)
368 class _ToStringMixinProxy:
369 """
370 A proxy object which wraps a ToStringMixin to ensure that the converter is applied when creating the properties string.
371 The proxy is to achieve that all ToStringMixin methods that aren't explicitly overwritten are bound to this proxy
372 (rather than the original object), such that the transitive call to `_tostring_properties` will call the new
373 implementation.
374 """
376 # methods where we assume that they could transitively call `_tostring_properties` (others are assumed not to)
377 TOSTRING_METHODS_TRANSITIVELY_CALLING_TOSTRINGPROPERTIES = {"_tostring_object_info"}
379 def __init__(self, x: "ToStringMixin", converter):
380 self.x = x
381 self.converter = converter
383 def _tostring_properties(self, *args, **kwargs):
384 return self.x._tostring_properties(*args, **kwargs, converter=self.converter)
386 def _tostring_class_name(self):
387 return self.x._tostring_class_name()
389 def __getattr__(self, attr: str):
390 if attr.startswith("_tostring"): # ToStringMixin method which we may bind to use this proxy to ensure correct transitive call
391 method = getattr(self.x.__class__, attr)
392 obj = self if attr in self.TOSTRING_METHODS_TRANSITIVELY_CALLING_TOSTRINGPROPERTIES else self.x
393 return lambda *args, **kwargs: method(obj, *args, **kwargs)
394 else:
395 return getattr(self.x, attr)
397 def __str__(self: "ToStringMixin"):
398 return ToStringMixin.__str__(self)
401def pretty_string_repr(s: Any, initial_indentation_level=0, indentation_string=" "):
402 """
403 Creates a pretty string representation (using indentations) from the given object/string representation (as generated, for example, via
404 ToStringMixin). An indentation level is added for every opening bracket.
406 :param s: an object or object string representation
407 :param initial_indentation_level: the initial indentation level
408 :param indentation_string: the string which corresponds to a single indentation level
409 :return: a reformatted version of the input string with added indentations and line breaks
410 """
411 if type(s) != str:
412 s = str(s)
413 indent = initial_indentation_level
414 result = indentation_string * indent
415 i = 0
417 def nl():
418 nonlocal result
419 result += "\n" + (indentation_string * indent)
421 def take(cnt=1):
422 nonlocal result, i
423 result += s[i:i+cnt]
424 i += cnt
426 def find_matching(j):
427 start = j
428 op = s[j]
429 cl = {"[": "]", "(": ")", "'": "'"}[s[j]]
430 is_bracket = cl != s[j]
431 stack = 0
432 while j < len(s):
433 if s[j] == op and (is_bracket or j == start):
434 stack += 1
435 elif s[j] == cl:
436 stack -= 1
437 if stack == 0:
438 return j
439 j += 1
440 return None
442 brackets = "[("
443 quotes = "'"
444 while i < len(s):
445 is_bracket = s[i] in brackets
446 is_quote = s[i] in quotes
447 if is_bracket or is_quote:
448 i_match = find_matching(i)
449 take_full_match_without_break = False
450 if i_match is not None:
451 k = i_match + 1
452 full_match = s[i:k]
453 take_full_match_without_break = is_quote or not("=" in full_match and "," in full_match)
454 if take_full_match_without_break:
455 take(k-i)
456 if not take_full_match_without_break:
457 take(1)
458 indent += 1
459 nl()
460 elif s[i] in "])":
461 take(1)
462 indent -= 1
463 elif s[i:i+2] == ", ":
464 take(2)
465 nl()
466 else:
467 take(1)
469 return result
472class TagBuilder:
473 """
474 Assists in building strings made up of components that are joined via a glue string
475 """
476 def __init__(self, *initial_components: str, glue="_"):
477 """
478 :param initial_components: initial components to always include at the beginning
479 :param glue: the glue string which joins components
480 """
481 self.glue = glue
482 self.components = list(initial_components)
484 def with_component(self, component: str):
485 self.components.append(component)
486 return self
488 def with_conditional(self, cond: bool, component: str):
489 """
490 Conditionally adds the given component
492 :param cond: the condition
493 :param component: the component to add if the condition holds
494 :return: the builder
495 """
496 if cond:
497 self.components.append(component)
498 return self
500 def with_alternative(self, cond: bool, true_component: str, false_component: str):
501 """
502 Adds a component depending on a condition
504 :param cond: the condition
505 :param true_component: the component to add if the condition holds
506 :param false_component: the component to add if the condition does not hold
507 :return: the builder
508 """
509 self.components.append(true_component if cond else false_component)
510 return self
512 def build(self):
513 """
514 :return: the string (with all components joined)
515 """
516 return self.glue.join(self.components)