Enable Javascript in your browser and then refresh this page, for a much enhanced experience.
First solution in Uncategorized category for URL Normalization by _Chico_
import re
import string
def checkio(URL: str) -> str:
def resub(m):
i = int(m.group(1)[1:], 16)
c = chr(i).lower()
return c if c in string.ascii_lowercase + string.digits + '-._~' else '%{:X}'.format(i)
def shorten(path):
result = []
for part in path.split('/'):
if part == '..':
del result[-1]
elif part != '.':
result.append(part)
return '/'.join(result)
m = re.match(r'(?P\w+)://(?P[^:/]+)(?P:\d+)?(?P.*$)', URL)
scheme, host, port, path = m.group('scheme', 'host', 'port', 'path')
path = re.sub(r'(%[a-fA-F0-9]{2})', resub, path.lower())
return scheme.lower() + '://' + host.lower() + \
('' if port in [None, ':80'] else port) + shorten(path)
#These "asserts" using only for self-checking and not necessary for auto-testing
if __name__ == '__main__':
assert checkio("Http://Www.Checkio.org") == \
"http://www.checkio.org", "1st rule"
assert checkio("http://www.checkio.org/%cc%b1bac") == \
"http://www.checkio.org/%CC%B1bac", "2nd rule"
assert checkio("http://www.checkio.org/task%5F%31") == \
"http://www.checkio.org/task_1", "3rd rule"
assert checkio("http://www.checkio.org:80/home/") == \
"http://www.checkio.org/home/", "4th rule"
assert checkio("http://www.checkio.org:8080/home/") == \
"http://www.checkio.org:8080/home/", "4th rule again"
assert checkio("http://www.checkio.org/task/./1/../2/././name") == \
"http://www.checkio.org/task/2/name", "5th rule"
print('First set of tests done')
June 5, 2021