Enable Javascript in your browser and then refresh this page, for a much enhanced experience.
First solution in Clear category for Web Log Sessions by tom-tom
import re
from datetime import *
def checkio(log_text):
times = {}
sessions = []
def add_session(start, stop, username, site, requests):
sessions.append((username, site, int((stop - start).total_seconds()) + 1, requests))
for m in re.finditer(r"""
^ # line start
(?P[^;]+) # request date and time
;; # separator
(?P[^;]+) # username
;; # separator
\w+:// # protocol
(?:[^/.]+\.)* # third-, forth-, ... level domains
(?P[^/.]+\.[^/.]+?) # site
(?:($|/)) # end of URL
""",
log_text.lower(), re.MULTILINE | re.VERBOSE):
dt, username, site = m.group('datetime', 'username', 'site')
request_dt = datetime.strptime(dt, '%Y-%m-%d-%H-%M-%S')
if (username, site) in times:
start, stop, requests = times[(username, site)]
if request_dt > stop + timedelta(minutes=30):
add_session(start, stop, username, site, requests)
times[(username, site)] = (request_dt, request_dt, 1)
else:
times[(username, site)] = (start, request_dt, requests + 1)
else:
times[(username, site)] = (request_dt, request_dt, 1)
for (username, site), (start, stop, requests) in times.items():
add_session(start, stop, username, site, requests)
return '\n'.join(';;'.join(map(str, session)) for session in sorted(sessions))
#These "asserts" using only for self-checking and not necessary for auto-testing
if __name__ == '__main__':
assert (checkio(
"""2013-01-01-01-00-00;;Name;;http://checkio.org/task
2013-01-01-01-02-00;;name;;http://checkio.org/task2
2013-01-01-01-31-00;;Name;;https://admin.checkio.org
2013-01-01-03-00-00;;Name;;http://www.checkio.org/profile
2013-01-01-03-00-01;;Name;;http://example.com
2013-02-03-04-00-00;;user2;;http://checkio.org/task
2013-01-01-03-11-00;;Name;;http://checkio.org/task""")
==
"""name;;checkio.org;;661;;2
name;;checkio.org;;1861;;3
name;;example.com;;1;;1
user2;;checkio.org;;1;;1"""), "Example"
Aug. 22, 2017