2016-06-11 12:49:08 +02:00
|
|
|
"""
|
|
|
|
String Utilities:
|
|
|
|
|
|
|
|
This module helps in converting strings from one type to another.
|
|
|
|
|
|
|
|
Currently we have strings of 3 semantic types:
|
|
|
|
|
|
|
|
1. text strings: These strings are used to represent all textual data,
|
|
|
|
like people's names, stream names, content of messages, etc.
|
|
|
|
These strings can contain non-ASCII characters, so its type should be
|
2018-05-10 19:13:36 +02:00
|
|
|
typing.str (which is `str` in python 3 and `unicode` in python 2).
|
2016-06-11 12:49:08 +02:00
|
|
|
|
|
|
|
2. binary strings: These strings are used to represent binary data.
|
2017-09-27 10:06:17 +02:00
|
|
|
This should be of type `bytes`
|
2016-06-11 12:49:08 +02:00
|
|
|
|
|
|
|
3. native strings: These strings are for internal use only. Strings of
|
|
|
|
this type are not meant to be stored in database, displayed to end
|
|
|
|
users, etc. Things like exception names, parameter names, attribute
|
|
|
|
names, etc should be native strings. These strings should only
|
|
|
|
contain ASCII characters and they should have type `str`.
|
|
|
|
|
|
|
|
There are 3 utility functions provided for converting strings from one type
|
|
|
|
to another - force_text, force_bytes, force_str
|
|
|
|
|
|
|
|
Interconversion between text strings and binary strings can be done by
|
|
|
|
using encode and decode appropriately or by using the utility functions
|
|
|
|
force_text and force_bytes.
|
|
|
|
|
|
|
|
It is recommended to use the utility functions for other string conversions.
|
|
|
|
"""
|
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
from typing import Any, Dict, Mapping, Union, TypeVar
|
2016-06-09 08:37:53 +02:00
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
NonBinaryStr = TypeVar('NonBinaryStr', str, str)
|
2016-06-09 08:37:53 +02:00
|
|
|
# This is used to represent text or native strings
|
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
def force_text(s: Union[str, bytes], encoding: str='utf-8') -> str:
|
2016-06-11 12:49:08 +02:00
|
|
|
"""converts a string to a text string"""
|
2018-05-10 19:13:36 +02:00
|
|
|
if isinstance(s, str):
|
2016-06-09 08:37:53 +02:00
|
|
|
return s
|
2017-09-27 10:06:17 +02:00
|
|
|
elif isinstance(s, bytes):
|
2016-07-05 03:26:40 +02:00
|
|
|
return s.decode(encoding)
|
2016-06-09 08:37:53 +02:00
|
|
|
else:
|
2016-07-21 19:04:24 +02:00
|
|
|
raise TypeError("force_text expects a string type")
|
2016-06-09 08:37:53 +02:00
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
def force_str(s: Union[str, bytes], encoding: str='utf-8') -> str:
|
2016-06-11 12:49:08 +02:00
|
|
|
"""converts a string to a native string"""
|
2016-06-09 08:37:53 +02:00
|
|
|
if isinstance(s, str):
|
|
|
|
return s
|
2018-05-10 19:13:36 +02:00
|
|
|
elif isinstance(s, str):
|
2016-07-05 03:26:40 +02:00
|
|
|
return s.encode(encoding)
|
2017-09-27 10:06:17 +02:00
|
|
|
elif isinstance(s, bytes):
|
2016-07-05 03:26:40 +02:00
|
|
|
return s.decode(encoding)
|
2016-06-09 08:37:53 +02:00
|
|
|
else:
|
2016-07-21 19:04:24 +02:00
|
|
|
raise TypeError("force_str expects a string type")
|