#!/usr/bin/env python3 import argparse import collections import logging import os import os.path import re import string import sys PROCESSED_FILETYPES = ( "mkv", "avi", "ts", ) SEPARATORS = r"[() .!,_\[\]]" SEPARATORS_HYPHEN = r"[\-" + SEPARATORS[1:] LANGUAGES = r"(rus|eng|ukr|jap|ita|chi|kor|ger|fre|spa|pol)" PATTERNS = ( ("episode", r"s\d{1,2}(e\d{1,2})?"), ("year", r"(19|20)\d{2}"), ("edition", r"((theatrical|director'*s|extended|un)[-.]?cut" r"|imax[-.]edition" r"|noir[-.]edition" r"|theatrical)"), ("restrictions", r"(unrated)"), ("resolution", r"[0-9]{3,4}[pi]"), ("quality", r"((blu[-.]?ray|bd)[-.]?remux" r"|(blu[-.]?ray|bd|uhd|hd(dvd|tv)?|web([-.]?dl)?|dvd)[-.]?rip" r"|web[-.]?dl|blu[-.]?ray|hdtv|hddvd|dvd(9)?|f-hd|uhd|remastered" r"|amzn)"), ("codec", r"([hx]\.?26[45]|(mpeg4-)?avc|hevc(10)?|xvid|divx)"), ("hdr", r"(hdr(10)?|10bit)"), ("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES), ("subtitles", r"%s?sub" % LANGUAGES), ("language", r"(\d{1,2}x)?%s" % LANGUAGES), ("unknown", r".*") ) _lg = logging.getLogger("spqr.movie-renamer") def main(): parser = argparse.ArgumentParser(description="Rename media files.") parser.add_argument("target", type=str, help="path to the media file/directory") parser.add_argument("-v", "--verbose", action="store_true", default=False, help="verbose output") args = parser.parse_args() loglevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loglevel) if os.path.isdir(args.target): process_dir(args.target) else: process_file(args.target) return 0 def process_dir(dir_path): for fname in os.listdir(dir_path): fpath = os.path.join(dir_path, fname) process_file(fpath) def process_file(fpath): # process only files if not os.path.isfile(fpath): _lg.debug("Not a file: %s", fpath) return # split filepath to dir path, title, and extension dir_path, fname = os.path.split(fpath) title, ext = os.path.splitext(fname) ext = ext[1:] if ext not in PROCESSED_FILETYPES: _lg.debug("Extension is not supported: %s", fpath) return parsed_title = parse_title(title) # create file name from parsed chunks chunk_order = [k for k, _ in PATTERNS] chunk_order = ["name"] + chunk_order result = [] for chunk_type in chunk_order: if not parsed_title[chunk_type]: continue result.append(".".join(parsed_title[chunk_type])) result.append(ext) result = ".".join(result) if result != fname: _lg.warning("%s -> %s", fname, result) def parse_title(title): """ Split media title to components. """ chunks = list(filter(None, re.split(SEPARATORS, title))) p_title = collections.defaultdict(list) # parse each chunk is_name = True for chunk in chunks: pat_type = guess_part(chunk) # consider chunk as part of the name until meta info is found if is_name: if pat_type == "unknown": pat_type = "name" else: is_name = False p_title[pat_type].append(chunk) # if name is the only thing we have, then we parsed nothing if is_name: p_title["unknown"] = p_title["name"] del p_title["name"] # remove unknown chunks without alphanumerals (like single hyphens) u_chunks = p_title.get("unknown", []) clean_u_chunks = [] for u_chunk in u_chunks: acceptable_chars = set(string.digits + string.ascii_lowercase) if set(u_chunk.lower()) & acceptable_chars: clean_u_chunks.append(u_chunk) p_title["unknown"] = clean_u_chunks # try to combine unknown chunks in pairs and parse them u_chunks = p_title.get("unknown", []) if len(u_chunks) > 1: i = 0 while i < (len(u_chunks) - 1): # create combined chunk cmb_chunk = ".".join(u_chunks[i:i+2]) cmb_chunk_type = guess_part(cmb_chunk) # go to next pair if nothing if cmb_chunk_type == "unknown": i += 1 continue # if combined chunk matches pattern, add to found type # and remove from unknown its parts p_title[cmb_chunk_type].append(cmb_chunk) del u_chunks[i:i+2] # try to parse unknown chunks, replacing all hyphens in them with dots u_chunks = p_title.get("unknown", []) if u_chunks: # create string from u_chunks with dots instead of hyphens uc_title = ".".join(filter(None, re.split(SEPARATORS_HYPHEN, ".".join(u_chunks)))) # recursion exit condition if uc_title != title: p_uc_title = parse_title(uc_title) # if parsed uc_title has something else than "unknown", update p_title if list(p_uc_title.keys()) != ["unknown"]: p_title.update(p_uc_title) return p_title def guess_part(fname_part): for pat_type, pattern in PATTERNS: full_match_pat = r"^" + pattern + r"$" if re.match(full_match_pat, fname_part, flags=re.I): return pat_type raise RuntimeError("unhandled pattern type") if __name__ == "__main__": sys.exit(main())