Enable Javascript in your browser and then refresh this page, for a much enhanced experience.
First solution in Clear category for Web Log Sessions by PythonLearner
import re
from datetime import datetime
from collections import namedtuple
from itertools import groupby
from typing import Iterable
Request = namedtuple("Request", ("timestamp", "name", "site"))
Session = namedtuple("Session", ("name", "site", "duration", "quantity"))
def show_session(session: Session, delimiter: str) -> str:
return delimiter.join(map(str, session))
def parse_timestamp(timestamp: str, delimiter: str) -> datetime:
return datetime(*map(int, timestamp.split(delimiter)))
def parse_url(url: str, pattern: str) -> str:
return re.search(pattern, url).group()
def parse_request(request: str, delimiter: str) -> Request:
timestamp, user, url = request.split(delimiter)
timestamp_delimiter = '-'
pattern = "((?<=://)|(?<=\.))\w+\.\w+((?=/)|(?=$))"
return Request(parse_timestamp(timestamp, timestamp_delimiter), user, parse_url(url, pattern))
def split_timestamps(timestamps: Iterable[datetime], interval: int) -> Iterable[datetime]:
buffer = []
for timestamp in timestamps:
if not buffer or abs((buffer[-1]-timestamp).total_seconds()) < interval:
buffer.append(timestamp)
else:
yield buffer
buffer = [timestamp]
yield buffer
def checkio(log_text: str) -> str:
sessions = []
delimiter = ";;"
session_interval = 30*60
requests = (parse_request(request, delimiter) for request in log_text.lower().split())
sorted_requests = sorted(requests, key=lambda request: request.name)
for name, name_group in groupby(sorted_requests, key=lambda request: request.name):
sorted_name_group = sorted(name_group, key=lambda request: request.site)
for site, site_groupe in groupby(sorted_name_group, key=lambda request: request.site):
sorted_timestamps = sorted(request.timestamp for request in site_groupe)
buffer = []
for timestamps in split_timestamps(sorted_timestamps, session_interval):
quantity = len(timestamps)
duration = 1
if quantity > 1:
duration += int((max(timestamps)-min(timestamps)).total_seconds())
session = Session(name, site, duration, quantity)
buffer.append(session)
sessions.extend(sorted(buffer, key=lambda session: session.quantity))
return "\n".join(show_session(session, delimiter) for session in sessions)
#These "asserts" using only for self-checking and not necessary for auto-testing
if __name__ == '__main__':
assert (checkio(
"""2013-01-01-01-00-00;;Name;;http://checkio.org/task
2013-01-01-01-02-00;;name;;http://checkio.org/task2
2013-01-01-01-31-00;;Name;;https://admin.checkio.org
2013-01-01-03-00-00;;Name;;http://www.checkio.org/profile
2013-01-01-03-00-01;;Name;;http://example.com
2013-02-03-04-00-00;;user2;;http://checkio.org/task
2013-01-01-03-11-00;;Name;;http://checkio.org/task""")
==
"""name;;checkio.org;;661;;2
name;;checkio.org;;1861;;3
name;;example.com;;1;;1
user2;;checkio.org;;1;;1"""), "Example"
print("OK")
April 13, 2019