import re
from dataclasses import dataclass
from typing import List, Tuple, Optional
from urllib import parse as urlparse
__all__ = [
'urlsplit', 'SplitURL',
]
def _split_params(params: str) -> List[Tuple[str, Optional[str]]]:
name_value_pairs = re.split(r'[&;]', params)
result = []
for name_value_pair in name_value_pairs:
# Split the pair string into a naive, encoded (name, value) pair.
name_value = name_value_pair.split('=', 1)
if len(name_value) == 1:
# 'param' => ('param', None)
name, value = name_value[0], None
else:
# 'param=value' => ('param', 'value')
# 'param=' => ('param', '')
name, value = name_value
name = urlparse.unquote_plus(name)
if value is not None:
value = urlparse.unquote_plus(value)
result.append((name, value))
return result
def _split_path(path: str) -> List[str]:
return [urlparse.unquote(segment) for segment in path.split('/')]
[docs]@dataclass
class SplitURL:
"""
Overview:
Splitted url object.
"""
url: str
scheme: str
host: str
path: str
query: str
fragment: str
@property
def query_dict(self):
"""
Query dict.
"""
retval = {}
for key, value in _split_params(self.query or ''):
if key in retval:
if isinstance(retval[key], list):
retval[key].append(value)
else:
retval[key] = [retval[key], value]
else:
retval[key] = value
return retval
@property
def path_segments(self) -> List[str]:
"""
Separated path segments.
"""
return _split_path(self.path)
@property
def filename(self) -> Optional[str]:
"""
Filename of current url, return ``None`` when path is empty.
"""
return self.path_segments[-1]
[docs] def __str__(self):
"""
Original url.
"""
return self.url
[docs] def __repr__(self):
"""
Repr format of :class:`SplitURL`.
"""
content = ', '.join([f'{key}={value!r}' for key, value in [
('scheme', self.scheme),
('host', self.host),
('path', self.path),
('query', self.query_dict),
('fragment', self.fragment),
] if value])
return f'{self.__class__.__name__}({content})'
[docs]def urlsplit(url: str) -> SplitURL:
"""
Overview:
Split url into 5 parts (scheme, host, path, query and fragment).
:param url: Original url string.
:return: :class:`SplitURL` object.
Examples::
>>> from hbutils.system import urlsplit
>>>
>>> sp = urlsplit('https://www.baidu.com/dslkjf/sdfhk/asdasd.png?q=1&v=kdjf&q=2#fff')
>>> sp
SplitURL(scheme='https', host='www.baidu.com', path='/dslkjf/sdfhk/asdasd.png', query={'q': ['1', '2'], 'v': 'kdjf'}, fragment='fff')
>>> repr(sp)
"SplitURL(scheme='https', host='www.baidu.com', path='/dslkjf/sdfhk/asdasd.png', query={'q': ['1', '2'], 'v': 'kdjf'}, fragment='fff')"
>>>
>>> sp.scheme
'https'
>>> sp.host
'www.baidu.com'
>>> sp.path
'/dslkjf/sdfhk/asdasd.png'
>>> sp.query
'q=1&v=kdjf&q=2'
>>> sp.fragment
'fff'
>>>
>>> sp.query_dict
{'q': ['1', '2'], 'v': 'kdjf'}
>>> sp.path_segments
['', 'dslkjf', 'sdfhk', 'asdasd.png']
>>> sp.filename
'asdasd.png'
"""
splitted = urlparse.urlsplit(url)
return SplitURL(url, splitted.scheme, splitted.netloc, splitted.path, splitted.query, splitted.fragment)