movie-renamer/renamer.py

177 lines
5.3 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import collections
import logging
import os
import os.path
import re
import string
import sys
PROCESSED_FILETYPES = (
"mkv",
"avi",
"ts",
)
SEPARATORS = r"[() .!,_\[\]]"
SEPARATORS_HYPHEN = r"[\-" + SEPARATORS[1:]
LANGUAGES = r"(rus|eng|ukr|jap|ita|chi|kor|ger|fre|spa|pol)"
PATTERNS = (
("episode", r"s\d{1,2}(e\d{1,2})?"),
("year", r"(19|20)\d{2}"),
("edition", r"((theatrical|director'*s|extended|un)[-.]?cut"
r"|imax[-.]edition"
r"|noir[-.]edition"
r"|theatrical)"),
("restrictions", r"(unrated)"),
("resolution", r"[0-9]{3,4}[pi]"),
("quality", r"((blu[-.]?ray|bd)[-.]?remux"
r"|(blu[-.]?ray|bd|uhd|hd(dvd|tv)?|web([-.]?dl)?|dvd)[-.]?rip"
r"|web[-.]?dl|blu[-.]?ray|hdtv|hddvd|dvd(9)?|f-hd|uhd|remastered"
r"|amzn)"),
("codec", r"([hx]\.?26[45]|(mpeg4-)?avc|hevc(10)?|xvid|divx)"),
("hdr", r"(hdr(10)?|10bit)"),
("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES),
("subtitles", r"%s?sub" % LANGUAGES),
("language", r"(\d{1,2}x)?%s" % LANGUAGES),
("unknown", r".*")
)
_lg = logging.getLogger("spqr.movie-renamer")
def main():
parser = argparse.ArgumentParser(description="Rename media files.")
parser.add_argument("target", type=str,
help="path to the media file/directory")
parser.add_argument("-v", "--verbose", action="store_true", default=False,
help="verbose output")
args = parser.parse_args()
loglevel = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(level=loglevel)
if os.path.isdir(args.target):
process_dir(args.target)
else:
process_file(args.target)
return 0
def process_dir(dir_path):
for fname in os.listdir(dir_path):
fpath = os.path.join(dir_path, fname)
process_file(fpath)
def process_file(fpath):
# process only files
if not os.path.isfile(fpath):
_lg.debug("Not a file: %s", fpath)
return
# split filepath to dir path, title, and extension
dir_path, fname = os.path.split(fpath)
title, ext = os.path.splitext(fname)
ext = ext[1:]
if ext not in PROCESSED_FILETYPES:
_lg.debug("Extension is not supported: %s", fpath)
return
parsed_title = parse_title(title)
# create file name from parsed chunks
chunk_order = [k for k, _ in PATTERNS]
chunk_order = ["name"] + chunk_order
result = []
for chunk_type in chunk_order:
if not parsed_title[chunk_type]:
continue
result.append(".".join(parsed_title[chunk_type]))
result.append(ext)
result = ".".join(result)
if result != fname:
_lg.warning("%s -> %s", fname, result)
def parse_title(title):
""" Split media title to components. """
chunks = list(filter(None, re.split(SEPARATORS, title)))
p_title = collections.defaultdict(list)
# parse each chunk
is_name = True
for chunk in chunks:
pat_type = guess_part(chunk)
# consider chunk as part of the name until meta info is found
if is_name:
if pat_type == "unknown":
pat_type = "name"
else:
is_name = False
p_title[pat_type].append(chunk)
# if name is the only thing we have, then we parsed nothing
if is_name:
p_title["unknown"] = p_title["name"]
del p_title["name"]
# remove unknown chunks without alphanumerals (like single hyphens)
u_chunks = p_title.get("unknown", [])
clean_u_chunks = []
for u_chunk in u_chunks:
acceptable_chars = set(string.digits + string.ascii_lowercase)
if set(u_chunk.lower()) & acceptable_chars:
clean_u_chunks.append(u_chunk)
p_title["unknown"] = clean_u_chunks
# try to combine unknown chunks in pairs and parse them
u_chunks = p_title.get("unknown", [])
if len(u_chunks) > 1:
i = 0
while i < (len(u_chunks) - 1):
# create combined chunk
cmb_chunk = ".".join(u_chunks[i:i+2])
cmb_chunk_type = guess_part(cmb_chunk)
# go to next pair if nothing
if cmb_chunk_type == "unknown":
i += 1
continue
# if combined chunk matches pattern, add to found type
# and remove from unknown its parts
p_title[cmb_chunk_type].append(cmb_chunk)
del u_chunks[i:i+2]
# try to parse unknown chunks, replacing all hyphens in them with dots
u_chunks = p_title.get("unknown", [])
if u_chunks:
# create string from u_chunks with dots instead of hyphens
uc_title = ".".join(filter(None, re.split(SEPARATORS_HYPHEN, ".".join(u_chunks))))
# recursion exit condition
if uc_title != title:
p_uc_title = parse_title(uc_title)
# if parsed uc_title has something else than "unknown", update p_title
if list(p_uc_title.keys()) != ["unknown"]:
p_title.update(p_uc_title)
return p_title
def guess_part(fname_part):
for pat_type, pattern in PATTERNS:
full_match_pat = r"^" + pattern + r"$"
if re.match(full_match_pat, fname_part, flags=re.I):
return pat_type
raise RuntimeError("unhandled pattern type")
if __name__ == "__main__":
sys.exit(main())