#!/usr/bin/env python3 import argparse import collections import logging import os import os.path import re import sys PROCESSED_FILETYPES = ( "mkv", "avi", "ts", ) SEPARATORS = r"[() .!,_\[\]]" SEPARATORS_HYPHEN = r"[\-" + SEPARATORS[1:] LANGUAGES = r"(rus|eng|ukr|jap|ita|chi|kor|ger|fre|spa|pol)" PATTERNS = ( ("episode", r"s\d{1,2}(e\d{1,2})?"), ("year", r"(19|20)\d{2}"), ("edition", r"((theatrical|director'*s|extended|un)[-.]?cut" r"|imax[-.]edition" r"|noir[-.]edition" r"|extended[-.]edition" r"|theatrical)"), ("restrictions", r"(unrated)"), ("resolution", r"[0-9]{3,4}[pi]"), ("quality", r"((blu[-.]?ray|bd)[-.]?remux" r"|(blu[-.]?ray|bd|uhd|hd(dvd|tv)?|web([-.]?dl)?|dvd)[-.]?rip" r"|web[-.]?dl|blu[-.]?ray|hdtv|hddvd|dvd(9)?|f-hd|uhd|remastered" r"|amzn)"), ("codec", r"([hx]\.?26[45]|(mpeg4-)?avc|hevc(10)?|xvid|divx)"), ("hdr", r"(hdr(10)?|10bit)"), ("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES), ("subtitles", r"%s?sub" % LANGUAGES), ("language", r"(\d{1,2}x)?%s" % LANGUAGES), ("unknown", r".*") ) _lg = logging.getLogger("spqr.movie-renamer") def main(): parser = argparse.ArgumentParser(description="Rename media files.") parser.add_argument("target", type=str, help="path to the media file/directory") parser.add_argument("-v", "--verbose", action="store_true", default=False, help="verbose output") args = parser.parse_args() loglevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loglevel) if os.path.isdir(args.target): process_dir(args.target) else: process_file(args.target) return 0 def process_dir(dir_path): for fname in os.listdir(dir_path): fpath = os.path.join(dir_path, fname) process_file(fpath) def process_file(fpath): # process only files if not os.path.isfile(fpath): _lg.debug("Not a file: %s", fpath) return # split filepath to dir path, title, and extension dir_path, fname = os.path.split(fpath) title, ext = os.path.splitext(fname) ext = ext[1:] if ext not in PROCESSED_FILETYPES: _lg.debug("Extension is not supported: %s", fpath) return parsed_title = parse_title(title) # create file name from parsed chunks chunk_order = [k for k, _ in PATTERNS] chunk_order = ["name"] + chunk_order result = [] for chunk_type in chunk_order: if not parsed_title.get(chunk_type, []): continue result.append(".".join(parsed_title[chunk_type])) result.append(ext) result = ".".join(result) if result != fname: _lg.warning("%s -> %s", fname, result) def parse_title(title): """ Split media title to components. """ chunks = list(filter(None, re.split(SEPARATORS, title))) p_title = collections.defaultdict(list) # remove non-word chunks (like single hyphens) chunks = list(filter(lambda ch: re.search(r"\w+", ch), chunks)) # parse each chunk unknown_chunks = {} for idx, chunk in enumerate(chunks): pat_type = guess_part(chunk) if pat_type != "unknown": p_title[pat_type].append(chunk) else: unknown_chunks[idx] = chunk # try to combine unknown chunks in pairs and parse them if len(unknown_chunks) > 1: prev_idx = -1 for idx in sorted(unknown_chunks.keys()): # first unknown chunk, skip if prev_idx < 0: prev_idx = idx continue # previous unknown chunk does not border with current, skip if (prev_idx + 1) != idx: prev_idx = idx continue # create combined chunk cmb_chunk = ".".join([unknown_chunks[prev_idx], unknown_chunks[idx]]) cmb_chunk_type = guess_part(cmb_chunk) # check next pair if nothing if cmb_chunk_type == "unknown": prev_idx = idx continue # if combined chunk matches pattern, add it to found type # and remove from unknown chunks its parts p_title[cmb_chunk_type].append(cmb_chunk) del unknown_chunks[prev_idx] del unknown_chunks[idx] prev_idx = -1 # try to parse unknown chunks, replacing all hyphens in them with dots if unknown_chunks: # create string from unknown_chunks with dots instead of hyphens u_chunks_str = ".".join(unknown_chunks.values()) uc_title = ".".join(filter(None, re.split(SEPARATORS_HYPHEN, u_chunks_str))) # recursion exit condition if uc_title != title: p_uc_title = parse_title(uc_title) # if parsed uc_title has smth else than "unknown", update p_title if list(p_uc_title.keys()) != ["unknown"]: p_title.update(p_uc_title) # unknown_chunks should be cleared, # because it was processed in nested function call unknown_chunks = {} # cut name from unknown chunks # name is the first n consequent chunks # only if amount of unknown chunks differs from overall amount of chunks if len(unknown_chunks) != len(chunks): i = 0 for idx in sorted(unknown_chunks.keys()): if idx != i: break p_title["name"].append(unknown_chunks[idx]) del unknown_chunks[idx] i += 1 for idx in sorted(unknown_chunks.keys()): p_title["unknown"].append(unknown_chunks[idx]) return dict(p_title) def guess_part(fname_part): for pat_type, pattern in PATTERNS: full_match_pat = r"^" + pattern + r"$" if re.match(full_match_pat, fname_part, flags=re.I): return pat_type raise RuntimeError("unhandled pattern type") if __name__ == "__main__": sys.exit(main())