vbv/server/vbv_lernwelt/importer/utils.py

112 lines
3.1 KiB
Python

import datetime
import re
from typing import Any, List, Optional, Tuple, Union
from dateutil.parser import parse
from six import string_types
def parse_formats(dt_str, fmt_strs, **parser_kwargs):
for fmt in fmt_strs:
try:
return datetime.datetime.strptime(dt_str, fmt)
except ValueError:
pass
return parse(dt_str, **parser_kwargs)
def try_parse_int(x: Any, default: Optional[Any] = None) -> Tuple[bool, Any]:
try:
return True, int(x)
# pylint: disable=broad-except
except Exception:
if default is None:
return False, x
return False, default
def try_parse_date(
value: Union[str, datetime.date],
) -> Tuple[bool, Union[str, datetime.date]]:
if isinstance(value, datetime.date):
return True, value
elif isinstance(value, datetime.datetime):
return True, value.date()
elif isinstance(value, string_types):
if value.strip().replace(".", "", 1).isdigit():
return False, value
# date needs at least 3 parts
if len(re.split(r"[.-]", value)) < 3:
return False, value
try:
date_with_time = parse_formats(
value,
[
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%d",
"%d.%m.%Y",
],
dayfirst=True,
)
return True, date_with_time.date()
except ValueError:
return False, value
else:
return False, value
def try_parse_datetime(
value: Union[str, datetime.datetime],
) -> Tuple[bool, Union[str, datetime.datetime]]:
if isinstance(value, datetime.datetime):
return True, value
elif isinstance(value, string_types):
if value.strip().replace(".", "", 1).isdigit():
return False, value
# date needs at least 3 parts
if len(re.split(r"[.-]", value)) < 3:
return False, value
try:
date_with_time = parse_formats(
value,
[
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M",
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%d %H:%M",
"%d.%m.%Y, %H:%M",
"%d.%m.%Y, %H.%M",
],
dayfirst=True,
)
return True, date_with_time
except ValueError:
return False, value
else:
return False, value
def parse_circle_group_string(value: str) -> List[str]:
# This regex pattern matches any comma that is not inside parentheses
pattern = r",(?![^()]*\))"
# re.split() splits the string based on the pattern
return [x.strip() for x in re.split(pattern, value)]
def calc_header_tuple_list_from_pyxl_sheet(sheet):
header = [cell.value for cell in sheet[1]]
result = []
for row in sheet.iter_rows(min_row=2, values_only=True):
if all(cell_value is None for cell_value in row):
continue
result.append(list(zip(header, row)))
return result