diff --git a/renamer.py b/renamer.py new file mode 100755 index 0000000..eb03b71 --- /dev/null +++ b/renamer.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 + +import argparse +import collections +import logging +import os +import os.path +import re +import string +import sys + + +PROCESSED_FILETYPES = ( + "mkv", + "avi", + "ts", +) +SEPARATORS = r"[() .!,_\[\]]" +SEPARATORS_HYPHEN = r"[\-" + SEPARATORS[1:] +LANGUAGES = r"(rus|eng|ukr|jap|ita|chi|kor|ger|fre|spa|pol)" +PATTERNS = ( + ("episode", r"s\d{1,2}(e\d{1,2})?"), + ("year", r"(19|20)\d{2}"), + ("edition", r"((theatrical|director'*s|extended|un)[-.]?cut" + r"|imax[-.]edition" + r"|noir[-.]edition" + r"|theatrical)"), + ("restrictions", r"(unrated)"), + ("resolution", r"[0-9]{3,4}[pi]"), + ("quality", r"((blu[-.]?ray|bd)[-.]?remux" + r"|(blu[-.]?ray|bd|uhd|hd(dvd|tv)?|web([-.]?dl)?|dvd)[-.]?rip" + r"|web[-.]?dl|blu[-.]?ray|hdtv|hddvd|dvd(9)?|f-hd|uhd|remastered" + r"|amzn)"), + ("codec", r"([hx]\.?26[45]|(mpeg4-)?avc|hevc(10)?|xvid|divx)"), + ("hdr", r"(hdr(10)?|10bit)"), + ("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES), + ("subtitles", r"%s?sub" % LANGUAGES), + ("language", r"(\d{1,2}x)?%s" % LANGUAGES), + ("unknown", r".*") +) + +_lg = logging.getLogger("spqr.movie-renamer") + + +def main(): + parser = argparse.ArgumentParser(description="Rename media files.") + parser.add_argument("target", type=str, + help="path to the media file/directory") + parser.add_argument("-v", "--verbose", action="store_true", default=False, + help="verbose output") + args = parser.parse_args() + + loglevel = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig(level=loglevel) + + if os.path.isdir(args.target): + process_dir(args.target) + else: + process_file(args.target) + + return 0 + + +def process_dir(dir_path): + for fname in os.listdir(dir_path): + fpath = os.path.join(dir_path, fname) + process_file(fpath) + + +def process_file(fpath): + # process only files + if not os.path.isfile(fpath): + _lg.debug("Not a file: %s", fpath) + return + + # split filepath to dir path, title, and extension + dir_path, fname = os.path.split(fpath) + title, ext = os.path.splitext(fname) + ext = ext[1:] + if ext not in PROCESSED_FILETYPES: + _lg.debug("Extension is not supported: %s", fpath) + return + + parsed_title = parse_title(title) + + # create file name from parsed chunks + chunk_order = [k for k, _ in PATTERNS] + chunk_order = ["name"] + chunk_order + result = [] + for chunk_type in chunk_order: + if not parsed_title[chunk_type]: + continue + result.append(".".join(parsed_title[chunk_type])) + result.append(ext) + result = ".".join(result) + + if result != fname: + _lg.warning("%s -> %s", fname, result) + + +def parse_title(title): + """ Split media title to components. """ + + chunks = list(filter(None, re.split(SEPARATORS, title))) + p_title = collections.defaultdict(list) + + # parse each chunk + is_name = True + for chunk in chunks: + pat_type = guess_part(chunk) + # consider chunk as part of the name until meta info is found + if is_name: + if pat_type == "unknown": + pat_type = "name" + else: + is_name = False + p_title[pat_type].append(chunk) + + # if name is the only thing we have, then we parsed nothing + if is_name: + p_title["unknown"] = p_title["name"] + del p_title["name"] + + # remove unknown chunks without alphanumerals (like single hyphens) + u_chunks = p_title.get("unknown", []) + clean_u_chunks = [] + for u_chunk in u_chunks: + acceptable_chars = set(string.digits + string.ascii_lowercase) + if set(u_chunk.lower()) & acceptable_chars: + clean_u_chunks.append(u_chunk) + p_title["unknown"] = clean_u_chunks + + # try to combine unknown chunks in pairs and parse them + u_chunks = p_title.get("unknown", []) + if len(u_chunks) > 1: + i = 0 + while i < (len(u_chunks) - 1): + # create combined chunk + cmb_chunk = ".".join(u_chunks[i:i+2]) + cmb_chunk_type = guess_part(cmb_chunk) + + # go to next pair if nothing + if cmb_chunk_type == "unknown": + i += 1 + continue + + # if combined chunk matches pattern, add to found type + # and remove from unknown its parts + p_title[cmb_chunk_type].append(cmb_chunk) + del u_chunks[i:i+2] + + # try to parse unknown chunks, replacing all hyphens in them with dots + u_chunks = p_title.get("unknown", []) + if u_chunks: + # create string from u_chunks with dots instead of hyphens + uc_title = ".".join(filter(None, re.split(SEPARATORS_HYPHEN, ".".join(u_chunks)))) + # recursion exit condition + if uc_title != title: + p_uc_title = parse_title(uc_title) + # if parsed uc_title has something else than "unknown", update p_title + if list(p_uc_title.keys()) != ["unknown"]: + p_title.update(p_uc_title) + + return p_title + + +def guess_part(fname_part): + for pat_type, pattern in PATTERNS: + full_match_pat = r"^" + pattern + r"$" + if re.match(full_match_pat, fname_part, flags=re.I): + return pat_type + raise RuntimeError("unhandled pattern type") + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..7fb0ea1 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1 @@ +pylint