#!/usr/bin/env python3 import argparse import collections import enum import logging import os import os.path import pprint import re import sys PROCESSED_FILETYPES = ( "mkv", "avi", "ts", ) SEPARATORS = r"[() .!,_\[\]]" SEPARATORS_HYPHEN = r"[\-" + SEPARATORS[1:] LANGUAGES = r"(rus|eng|ukr|jap|ita|chi|kor|ger|fre|spa|pol)" PATTERNS = ( ("episode", r"s\d{1,2}(e\d{1,2})?"), ("year", r"(19|20)\d{2}"), ("edition", r"((theatrical|director'*s|extended|un)[-.]?cut" r"|imax[-.]edition" r"|noir[-.]edition" r"|black[-.]chrome[-.]edition" r"|extended[-.]edition" r"|hq[-.]edition" r"|theatrical)"), ("restrictions", r"(unrated)"), ("resolution", r"[0-9]{3,4}[pi]"), ("quality", r"((blu[-.]?ray|bd)[-.]?remux" r"|(blu[-.]?ray|bd|uhd|hd(dvd|tv)?|web([-.]?dl)?|dvd)[-.]?rip" r"|web[-.]?dl|blu[-.]?ray|hdtv|hddvd|dvd(9)?|f-hd|uhd|remastered" r"|amzn)"), ("codec", r"([hx]\.?26[45]|(mpeg4-)?avc|hevc(10)?|xvid|divx)"), ("hdr", r"(hdr(10)?|10bit)"), ("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES), ("subtitles", r"%s?sub" % LANGUAGES), ("language", r"(\d{1,2}x)?%s" % LANGUAGES), ("file_extension", r"mkv|avi"), ("unknown", r".*") ) # noinspection PyInterpreter class EnumAction(argparse.Action): """ Argparse action for handling Enums """ def __init__(self, **kwargs): # Pop off the type value enum_type = kwargs.pop("type", None) # Ensure an Enum subclass is provided if enum_type is None: raise ValueError("type must be assigned an Enum when using EnumAction") if not issubclass(enum_type, enum.Enum): raise TypeError("type must be an Enum when using EnumAction") # Generate choices from the Enum kwargs.setdefault("choices", tuple(e.value for e in enum_type)) super(EnumAction, self).__init__(**kwargs) self._enum = enum_type def __call__(self, parser, namespace, values, option_string=None): # Convert value back into an Enum value = self._enum(values) setattr(namespace, self.dest, value) class CliAction(enum.Enum): parse = "parse" rename = "rename" _lg = logging.getLogger("spqr.movie-renamer") def main(): parser = argparse.ArgumentParser(description="Rename media files.") parser.add_argument("action", type=CliAction, action=EnumAction, metavar="ACTION", help="what to do with media file/directory (%(choices)s)") parser.add_argument("target", type=str, metavar="TARGET", help="path to the media file/directory") parser.add_argument("-v", "--verbose", action="store_true", default=False, help="verbose output") args = parser.parse_args() loglevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loglevel) process_path(args.action, args.target) return 0 def process_path(action: CliAction, path): # process only files if os.path.isdir(path): for child_path in sorted(os.listdir(path)): process_path(action, os.path.join(path, child_path)) # split filepath to dir path, title, and extension dir_path, fname = os.path.split(path) title, ext = os.path.splitext(fname) ext = ext[1:] if ext not in PROCESSED_FILETYPES: _lg.debug("Extension is not supported: %s", path) return parsed_title = parse_title(title) if action == CliAction.parse: print_parsed_title(title, parsed_title) return if action == CliAction.rename: pretty_title = generate_pretty_name(parsed_title) pretty_title += ".%s" % ext if pretty_title != fname: _lg.warning("%s -> %s", fname, pretty_title) return def print_parsed_title(title, parsed): print(title) pprint.pprint(parsed, indent=4) def generate_pretty_name(parsed_title): """ Create file name from parsed chunks. """ chunk_order = [k for k, _ in PATTERNS] chunk_order = ["name"] + chunk_order ep_idx = chunk_order.index("episode") + 1 chunk_order = chunk_order[:ep_idx] + ["episode_name"] + chunk_order[ep_idx:] result = [] for chunk_type in chunk_order: if not parsed_title.get(chunk_type, []): continue result.append(".".join(parsed_title[chunk_type])) result = ".".join(result) return result def _get_parsed_title_dict(chunk_list, chunk_map): """ Get {chunk_type: [chunk_value_1, ..., chunk_value_n]} dictionary. """ p_title = collections.defaultdict(list) for idx, chunk in enumerate(chunk_list): chunk_type = chunk_map[idx] p_title[chunk_type].append(chunk) return p_title def _guess_combined(chunk_values, chunk_map): """ Try to combine unknown chunks in pairs and parse them. """ is_changed = False p_title = _get_parsed_title_dict(chunk_values, chunk_map) if len(p_title["unknown"]) < 2: return is_changed, chunk_values, chunk_map # i - begin of slice, j - end of slice i = 0 # process up to second-to-last element while i < len(chunk_map) - 1: # we need slice with at least two elements j = i + 2 # we need only unknown elements while set(chunk_map[i:j]) == {"unknown"} and j <= len(chunk_map): # create combined chunk cmb_chunk = ".".join(chunk_values[i:j]) cmb_chunk_type = guess_part(cmb_chunk) # add new combined chunk in lists # first subelement gets new chunk, rest - None # (will be removed later) if cmb_chunk_type != "unknown": is_changed = True chunk_values[i] = cmb_chunk chunk_map[i] = cmb_chunk_type for idx in range(i+1, j): chunk_values[idx] = None chunk_map[idx] = None # to start checking next chunks right after the end of slice i = idx break # try add more elements to combined chunk else: j += 1 # start checking next value i += 1 # clean up from None values chunk_values = list(filter(None, chunk_values)) chunk_map = list(filter(None, chunk_map)) return is_changed, chunk_values, chunk_map def parse_title(title): """ Split media title to components. """ chunk_values = filter(None, re.split(SEPARATORS, title)) # remove non-word chunks (like single hyphens), but leave ampersands (&) chunk_values = list(filter(lambda ch: re.search(r"(\w|&)+", ch), chunk_values)) chunk_map = [] # list of chunk_types # parse each chunk for ch_value in chunk_values: chunk_map.append(guess_part(ch_value)) _, chunk_values, chunk_map = _guess_combined(chunk_values, chunk_map) # try to parse unknown chunks, replacing all hyphens in them with dots p_title = _get_parsed_title_dict(chunk_values, chunk_map) is_changed = False if p_title.get("unknown"): spl_ch_values = [] spl_ch_map = [] for idx, ch_value in enumerate(chunk_values): ch_type = chunk_map[idx] if ch_type == "unknown" and "-" in ch_value: spl_values = ch_value.split("-") for spl_val in spl_values: if not spl_val: continue spl_type = guess_part(spl_val) if spl_type != "unknown": is_changed = True spl_ch_values.append(spl_val) spl_ch_map.append(spl_type) else: spl_ch_values.append(ch_value) spl_ch_map.append(ch_type) is_combined, spl_ch_values, spl_ch_map = _guess_combined(spl_ch_values, spl_ch_map) if is_changed or is_combined: chunk_values = spl_ch_values chunk_map = spl_ch_map # parse name and episode name # only if there is something except unknown chunks p_title = _get_parsed_title_dict(chunk_values, chunk_map) if len(p_title["unknown"]) != len(chunk_values): idx = 0 while idx < len(chunk_map) and chunk_map[idx] == "unknown": chunk_map[idx] = "name" idx += 1 # if episode number is found, next unknown chunks are episode name if p_title.get("episode"): idx = chunk_map.index("episode") + 1 while idx < len(chunk_map) and chunk_map[idx] == "unknown": chunk_map[idx] = "episode_name" idx += 1 # at last, strip hyphens from unknown chunks # only if there is something except unknown chunks p_title = _get_parsed_title_dict(chunk_values, chunk_map) if len(p_title["unknown"]) != len(chunk_values): for idx, chunk_type in enumerate(chunk_map): if chunk_type != "unknown": continue chunk_value = chunk_values[idx] if chunk_value[0] != "-" and chunk_value[-1] != "-": continue chunk_values[idx] = chunk_value.strip("-") p_title = _get_parsed_title_dict(chunk_values, chunk_map) return dict(p_title) def guess_part(chunk_value): """ Return chunk type for given chunk value. """ for chunk_type, pattern in PATTERNS: full_match_pat = r"^" + pattern + r"$" if re.match(full_match_pat, chunk_value, flags=re.I): return chunk_type raise RuntimeError("unhandled pattern type") if __name__ == "__main__": sys.exit(main())