#!/usr/bin/env python3

import argparse
import collections
import logging
import os
import os.path
import re
import sys


PROCESSED_FILETYPES = (
    "mkv",
    "avi",
    "ts",
)
SEPARATORS = r"[() .!,_\[\]]"
SEPARATORS_HYPHEN = r"[\-" + SEPARATORS[1:]
LANGUAGES = r"(rus|eng|ukr|jap|ita|chi|kor|ger|fre|spa|pol)"
PATTERNS = (
    ("episode", r"s\d{1,2}(e\d{1,2})?"),
    ("year", r"(19|20)\d{2}"),
    ("edition", r"((theatrical|director'*s|extended|un)[-.]?cut"
                r"|imax[-.]edition"
                r"|noir[-.]edition"
                r"|extended[-.]edition"
                r"|theatrical)"),
    ("restrictions", r"(unrated)"),
    ("resolution", r"[0-9]{3,4}[pi]"),
    ("quality", r"((blu[-.]?ray|bd)[-.]?remux"
                r"|(blu[-.]?ray|bd|uhd|hd(dvd|tv)?|web([-.]?dl)?|dvd)[-.]?rip"
                r"|web[-.]?dl|blu[-.]?ray|hdtv|hddvd|dvd(9)?|f-hd|uhd|remastered"
                r"|amzn)"),
    ("codec", r"([hx]\.?26[45]|(mpeg4-)?avc|hevc(10)?|xvid|divx)"),
    ("hdr", r"(hdr(10)?|10bit)"),
    ("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES),
    ("subtitles", r"%s?sub" % LANGUAGES),
    ("language", r"(\d{1,2}x)?%s" % LANGUAGES),
    ("unknown", r".*")
)

_lg = logging.getLogger("spqr.movie-renamer")


def main():
    parser = argparse.ArgumentParser(description="Rename media files.")
    parser.add_argument("target", type=str,
                        help="path to the media file/directory")
    parser.add_argument("-v", "--verbose", action="store_true", default=False,
                        help="verbose output")
    args = parser.parse_args()

    loglevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loglevel)

    if os.path.isdir(args.target):
        process_dir(args.target)
    else:
        process_file(args.target)

    return 0


def process_dir(dir_path):
    for fname in os.listdir(dir_path):
        fpath = os.path.join(dir_path, fname)
        process_file(fpath)


def process_file(fpath):
    # process only files
    if not os.path.isfile(fpath):
        _lg.debug("Not a file: %s", fpath)
        return

    # split filepath to dir path, title, and extension
    dir_path, fname = os.path.split(fpath)
    title, ext = os.path.splitext(fname)
    ext = ext[1:]
    if ext not in PROCESSED_FILETYPES:
        _lg.debug("Extension is not supported: %s", fpath)
        return

    parsed_title = parse_title(title)

    # create file name from parsed chunks
    chunk_order = [k for k, _ in PATTERNS]
    chunk_order = ["name"] + chunk_order
    result = []
    for chunk_type in chunk_order:
        if not parsed_title.get(chunk_type, []):
            continue
        result.append(".".join(parsed_title[chunk_type]))
    result.append(ext)
    result = ".".join(result)

    if result != fname:
        _lg.warning("%s -> %s", fname, result)


def parse_title(title):
    """ Split media title to components. """

    chunks = list(filter(None, re.split(SEPARATORS, title)))
    p_title = collections.defaultdict(list)

    # remove non-word chunks (like single hyphens)
    chunks = list(filter(lambda ch: re.search(r"\w+", ch), chunks))

    # parse each chunk
    unknown_chunks = {}
    for idx, chunk in enumerate(chunks):
        pat_type = guess_part(chunk)
        if pat_type != "unknown":
            p_title[pat_type].append(chunk)
        else:
            unknown_chunks[idx] = chunk

    # try to combine unknown chunks in pairs and parse them
    if len(unknown_chunks) > 1:
        prev_idx = -1
        for idx in sorted(unknown_chunks.keys()):

            # first unknown chunk, skip
            if prev_idx < 0:
                prev_idx = idx
                continue
            # previous unknown chunk does not border with current, skip
            if (prev_idx + 1) != idx:
                prev_idx = idx
                continue

            # create combined chunk
            cmb_chunk = ".".join([unknown_chunks[prev_idx], unknown_chunks[idx]])
            cmb_chunk_type = guess_part(cmb_chunk)

            # check next pair if nothing
            if cmb_chunk_type == "unknown":
                prev_idx = idx
                continue

            # if combined chunk matches pattern, add it to found type
            # and remove from unknown chunks its parts
            p_title[cmb_chunk_type].append(cmb_chunk)
            del unknown_chunks[prev_idx]
            del unknown_chunks[idx]
            prev_idx = -1

    # try to parse unknown chunks, replacing all hyphens in them with dots
    if unknown_chunks:
        # create string from unknown_chunks with dots instead of hyphens
        u_chunks_str = ".".join(unknown_chunks.values())
        uc_title = ".".join(filter(None, re.split(SEPARATORS_HYPHEN, u_chunks_str)))
        # recursion exit condition
        if uc_title != title:
            p_uc_title = parse_title(uc_title)
            # if parsed uc_title has smth else than "unknown", update p_title
            if list(p_uc_title.keys()) != ["unknown"]:
                p_title.update(p_uc_title)
                # unknown_chunks should be cleared,
                # because it was processed in nested function call
                unknown_chunks = {}

    # cut name from unknown chunks
    # name is the first n consequent chunks
    # only if amount of unknown chunks differs from overall amount of chunks
    if len(unknown_chunks) != len(chunks):
        i = 0
        for idx in sorted(unknown_chunks.keys()):
            if idx != i:
                break
            p_title["name"].append(unknown_chunks[idx])
            del unknown_chunks[idx]
            i += 1

    for idx in sorted(unknown_chunks.keys()):
        p_title["unknown"].append(unknown_chunks[idx])
    return dict(p_title)


def guess_part(fname_part):
    for pat_type, pattern in PATTERNS:
        full_match_pat = r"^" + pattern + r"$"
        if re.match(full_match_pat, fname_part, flags=re.I):
            return pat_type
    raise RuntimeError("unhandled pattern type")


if __name__ == "__main__":
    sys.exit(main())