#!/usr/bin/env python3

import argparse
import collections
import enum
import logging
import os
import os.path
import pprint
import re
import sys


PROCESSED_FILETYPES = (
    "mkv",
    "avi",
    "ts",
)
SEPARATORS = r"[() .!,_\[\]]"
SEPARATORS_HYPHEN = r"[\-" + SEPARATORS[1:]
LANGUAGES = r"(rus|eng|ukr|jap|ita|chi|kor|ger|fre|spa|pol)"
PATTERNS = (
    ("episode", r"s\d{1,2}(e\d{1,2})?"),
    ("year", r"(19|20)\d{2}"),
    ("edition", r"((theatrical|director'*s|extended|un)[-.]?cut"
                r"|imax[-.]edition"
                r"|noir[-.]edition"
                r"|black[-.]chrome[-.]edition"
                r"|extended[-.]edition"
                r"|hq[-.]edition"
                r"|theatrical)"),
    ("restrictions", r"(unrated)"),
    ("resolution", r"[0-9]{3,4}[pi]"),
    ("quality", r"((blu[-.]?ray|bd)[-.]?remux"
                r"|(blu[-.]?ray|bd|uhd|hd(dvd|tv)?|web([-.]?dl)?|dvd)[-.]?rip"
                r"|web[-.]?dl|blu[-.]?ray|hdtv|hddvd|dvd(9)?|f-hd|uhd|remastered"
                r"|amzn)"),
    ("codec", r"([hx]\.?26[45]|(mpeg4-)?avc|hevc(10)?|xvid|divx)"),
    ("hdr", r"(hdr(10)?|10bit)"),
    ("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES),
    ("subtitles", r"%s?sub" % LANGUAGES),
    ("language", r"(\d{1,2}x)?%s" % LANGUAGES),
    ("file_extension", r"mkv|avi"),
    ("unknown", r".*")
)


# noinspection PyInterpreter
class EnumAction(argparse.Action):
    """
    Argparse action for handling Enums
    """
    def __init__(self, **kwargs):
        # Pop off the type value
        enum_type = kwargs.pop("type", None)

        # Ensure an Enum subclass is provided
        if enum_type is None:
            raise ValueError("type must be assigned an Enum when using EnumAction")
        if not issubclass(enum_type, enum.Enum):
            raise TypeError("type must be an Enum when using EnumAction")

        # Generate choices from the Enum
        kwargs.setdefault("choices", tuple(e.value for e in enum_type))

        super(EnumAction, self).__init__(**kwargs)

        self._enum = enum_type

    def __call__(self, parser, namespace, values, option_string=None):
        # Convert value back into an Enum
        value = self._enum(values)
        setattr(namespace, self.dest, value)


class CliAction(enum.Enum):
    parse = "parse"
    rename = "rename"


_lg = logging.getLogger("spqr.movie-renamer")


def main():
    parser = argparse.ArgumentParser(description="Rename media files.")
    parser.add_argument("action", type=CliAction, action=EnumAction, metavar="ACTION",
                        help="what to do with media file/directory (%(choices)s)")
    parser.add_argument("target", type=str, metavar="TARGET",
                        help="path to the media file/directory")
    parser.add_argument("-v", "--verbose", action="store_true", default=False,
                        help="verbose output")
    args = parser.parse_args()

    loglevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loglevel)

    process_path(args.action, args.target)

    return 0


def process_path(action: CliAction, path):
    # process only files
    if os.path.isdir(path):
        for child_path in sorted(os.listdir(path)):
            process_path(action, os.path.join(path, child_path))

    # split filepath to dir path, title, and extension
    dir_path, fname = os.path.split(path)
    title, ext = os.path.splitext(fname)
    ext = ext[1:]
    if ext not in PROCESSED_FILETYPES:
        _lg.debug("Extension is not supported: %s", path)
        return

    parsed_title = parse_title(title)
    if action == CliAction.parse:
        print_parsed_title(title, parsed_title)
        return

    if action == CliAction.rename:
        pretty_title = generate_pretty_name(parsed_title)
        pretty_title += ".%s" % ext
        if pretty_title != fname:
            _lg.warning("%s -> %s", fname, pretty_title)
        return


def print_parsed_title(title, parsed):
    print(title)
    pprint.pprint(parsed, indent=4)


def generate_pretty_name(parsed_title):
    """ Create file name from parsed chunks. """
    chunk_order = [k for k, _ in PATTERNS]
    chunk_order = ["name"] + chunk_order
    ep_idx = chunk_order.index("episode") + 1
    chunk_order = chunk_order[:ep_idx] + ["episode_name"] + chunk_order[ep_idx:]

    result = []
    for chunk_type in chunk_order:
        if not parsed_title.get(chunk_type, []):
            continue
        result.append(".".join(parsed_title[chunk_type]))
    result = ".".join(result)
    return result


def _get_parsed_title_dict(chunk_list, chunk_map):
    """ Get {chunk_type: [chunk_value_1, ...,  chunk_value_n]} dictionary. """
    p_title = collections.defaultdict(list)
    for idx, chunk in enumerate(chunk_list):
        chunk_type = chunk_map[idx]
        p_title[chunk_type].append(chunk)
    return p_title


def _guess_combined(chunk_values, chunk_map):
    """ Try to combine unknown chunks in pairs and parse them. """
    is_changed = False
    p_title = _get_parsed_title_dict(chunk_values, chunk_map)
    if len(p_title["unknown"]) < 2:
        return is_changed, chunk_values, chunk_map

    # i - begin of slice, j - end of slice
    i = 0
    # process up to second-to-last element
    while i < len(chunk_map) - 1:
        # we need slice with at least two elements
        j = i + 2
        # we need only unknown elements
        while set(chunk_map[i:j]) == {"unknown"} and j <= len(chunk_map):
            # create combined chunk
            cmb_chunk = ".".join(chunk_values[i:j])
            cmb_chunk_type = guess_part(cmb_chunk)

            # add new combined chunk in lists
            # first subelement gets new chunk, rest - None
            # (will be removed later)
            if cmb_chunk_type != "unknown":
                is_changed = True
                chunk_values[i] = cmb_chunk
                chunk_map[i] = cmb_chunk_type
                for idx in range(i+1, j):
                    chunk_values[idx] = None
                    chunk_map[idx] = None
                    # to start checking next chunks right after the end of slice
                    i = idx
                break
            # try add more elements to combined chunk
            else:
                j += 1

        # start checking next value
        i += 1

    # clean up from None values
    chunk_values = list(filter(None, chunk_values))
    chunk_map = list(filter(None, chunk_map))

    return is_changed, chunk_values, chunk_map


def parse_title(title):
    """ Split media title to components. """

    chunk_values = filter(None, re.split(SEPARATORS, title))

    # remove non-word chunks (like single hyphens), but leave ampersands (&)
    chunk_values = list(filter(lambda ch: re.search(r"(\w|&)+", ch), chunk_values))

    chunk_map = []  # list of chunk_types
    # parse each chunk
    for ch_value in chunk_values:
        chunk_map.append(guess_part(ch_value))

    _, chunk_values, chunk_map = _guess_combined(chunk_values, chunk_map)

    # try to parse unknown chunks, replacing all hyphens in them with dots
    p_title = _get_parsed_title_dict(chunk_values, chunk_map)
    is_changed = False
    if p_title.get("unknown"):
        spl_ch_values = []
        spl_ch_map = []
        for idx, ch_value in enumerate(chunk_values):
            ch_type = chunk_map[idx]
            if ch_type == "unknown" and "-" in ch_value:
                spl_values = ch_value.split("-")
                for spl_val in spl_values:
                    if not spl_val:
                        continue
                    spl_type = guess_part(spl_val)
                    if spl_type != "unknown":
                        is_changed = True
                    spl_ch_values.append(spl_val)
                    spl_ch_map.append(spl_type)
            else:
                spl_ch_values.append(ch_value)
                spl_ch_map.append(ch_type)

        is_combined, spl_ch_values, spl_ch_map = _guess_combined(spl_ch_values, spl_ch_map)
        if is_changed or is_combined:
            chunk_values = spl_ch_values
            chunk_map = spl_ch_map

    # parse name and episode name
    # only if there is something except unknown chunks
    p_title = _get_parsed_title_dict(chunk_values, chunk_map)
    if len(p_title["unknown"]) != len(chunk_values):
        idx = 0
        while idx < len(chunk_map) and chunk_map[idx] == "unknown":
            chunk_map[idx] = "name"
            idx += 1
        # if episode number is found, next unknown chunks are episode name
        if p_title.get("episode"):
            idx = chunk_map.index("episode") + 1
            while idx < len(chunk_map) and chunk_map[idx] == "unknown":
                chunk_map[idx] = "episode_name"
                idx += 1

    # at last, strip hyphens from unknown chunks
    # only if there is something except unknown chunks
    p_title = _get_parsed_title_dict(chunk_values, chunk_map)
    if len(p_title["unknown"]) != len(chunk_values):
        for idx, chunk_type in enumerate(chunk_map):
            if chunk_type != "unknown":
                continue
            chunk_value = chunk_values[idx]
            if chunk_value[0] != "-" and chunk_value[-1] != "-":
                continue
            chunk_values[idx] = chunk_value.strip("-")

    p_title = _get_parsed_title_dict(chunk_values, chunk_map)
    return dict(p_title)


def guess_part(chunk_value):
    """ Return chunk type for given chunk value. """
    for chunk_type, pattern in PATTERNS:
        full_match_pat = r"^" + pattern + r"$"
        if re.match(full_match_pat, chunk_value, flags=re.I):
            return chunk_type
    raise RuntimeError("unhandled pattern type")


if __name__ == "__main__":
    sys.exit(main())