2022-01-03 07:56:15 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import collections
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import os.path
|
|
|
|
|
import re
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PROCESSED_FILETYPES = (
|
|
|
|
|
"mkv",
|
|
|
|
|
"avi",
|
|
|
|
|
"ts",
|
|
|
|
|
)
|
|
|
|
|
SEPARATORS = r"[() .!,_\[\]]"
|
|
|
|
|
SEPARATORS_HYPHEN = r"[\-" + SEPARATORS[1:]
|
|
|
|
|
LANGUAGES = r"(rus|eng|ukr|jap|ita|chi|kor|ger|fre|spa|pol)"
|
|
|
|
|
PATTERNS = (
|
|
|
|
|
("episode", r"s\d{1,2}(e\d{1,2})?"),
|
|
|
|
|
("year", r"(19|20)\d{2}"),
|
|
|
|
|
("edition", r"((theatrical|director'*s|extended|un)[-.]?cut"
|
|
|
|
|
r"|imax[-.]edition"
|
|
|
|
|
r"|noir[-.]edition"
|
2022-01-12 20:24:10 +00:00
|
|
|
r"|black[-.]chrome[-.]edition"
|
2022-01-09 19:09:02 +00:00
|
|
|
r"|extended[-.]edition"
|
2022-01-03 07:56:15 +00:00
|
|
|
r"|theatrical)"),
|
|
|
|
|
("restrictions", r"(unrated)"),
|
|
|
|
|
("resolution", r"[0-9]{3,4}[pi]"),
|
|
|
|
|
("quality", r"((blu[-.]?ray|bd)[-.]?remux"
|
|
|
|
|
r"|(blu[-.]?ray|bd|uhd|hd(dvd|tv)?|web([-.]?dl)?|dvd)[-.]?rip"
|
|
|
|
|
r"|web[-.]?dl|blu[-.]?ray|hdtv|hddvd|dvd(9)?|f-hd|uhd|remastered"
|
|
|
|
|
r"|amzn)"),
|
|
|
|
|
("codec", r"([hx]\.?26[45]|(mpeg4-)?avc|hevc(10)?|xvid|divx)"),
|
|
|
|
|
("hdr", r"(hdr(10)?|10bit)"),
|
|
|
|
|
("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES),
|
|
|
|
|
("subtitles", r"%s?sub" % LANGUAGES),
|
|
|
|
|
("language", r"(\d{1,2}x)?%s" % LANGUAGES),
|
|
|
|
|
("unknown", r".*")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
_lg = logging.getLogger("spqr.movie-renamer")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
parser = argparse.ArgumentParser(description="Rename media files.")
|
|
|
|
|
parser.add_argument("target", type=str,
|
|
|
|
|
help="path to the media file/directory")
|
|
|
|
|
parser.add_argument("-v", "--verbose", action="store_true", default=False,
|
|
|
|
|
help="verbose output")
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
loglevel = logging.DEBUG if args.verbose else logging.INFO
|
|
|
|
|
logging.basicConfig(level=loglevel)
|
|
|
|
|
|
|
|
|
|
if os.path.isdir(args.target):
|
|
|
|
|
process_dir(args.target)
|
|
|
|
|
else:
|
|
|
|
|
process_file(args.target)
|
|
|
|
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_dir(dir_path):
|
|
|
|
|
for fname in os.listdir(dir_path):
|
|
|
|
|
fpath = os.path.join(dir_path, fname)
|
|
|
|
|
process_file(fpath)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_file(fpath):
|
|
|
|
|
# process only files
|
|
|
|
|
if not os.path.isfile(fpath):
|
|
|
|
|
_lg.debug("Not a file: %s", fpath)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# split filepath to dir path, title, and extension
|
|
|
|
|
dir_path, fname = os.path.split(fpath)
|
|
|
|
|
title, ext = os.path.splitext(fname)
|
|
|
|
|
ext = ext[1:]
|
|
|
|
|
if ext not in PROCESSED_FILETYPES:
|
|
|
|
|
_lg.debug("Extension is not supported: %s", fpath)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
parsed_title = parse_title(title)
|
|
|
|
|
|
|
|
|
|
# create file name from parsed chunks
|
|
|
|
|
chunk_order = [k for k, _ in PATTERNS]
|
|
|
|
|
chunk_order = ["name"] + chunk_order
|
|
|
|
|
result = []
|
|
|
|
|
for chunk_type in chunk_order:
|
2022-01-09 19:09:02 +00:00
|
|
|
if not parsed_title.get(chunk_type, []):
|
2022-01-03 07:56:15 +00:00
|
|
|
continue
|
|
|
|
|
result.append(".".join(parsed_title[chunk_type]))
|
|
|
|
|
result.append(ext)
|
|
|
|
|
result = ".".join(result)
|
|
|
|
|
|
|
|
|
|
if result != fname:
|
|
|
|
|
_lg.warning("%s -> %s", fname, result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_title(title):
|
|
|
|
|
""" Split media title to components. """
|
|
|
|
|
|
|
|
|
|
chunks = list(filter(None, re.split(SEPARATORS, title)))
|
|
|
|
|
p_title = collections.defaultdict(list)
|
|
|
|
|
|
2022-01-09 19:09:02 +00:00
|
|
|
# remove non-word chunks (like single hyphens)
|
|
|
|
|
chunks = list(filter(lambda ch: re.search(r"\w+", ch), chunks))
|
|
|
|
|
|
2022-01-03 07:56:15 +00:00
|
|
|
# parse each chunk
|
2022-01-09 19:09:02 +00:00
|
|
|
unknown_chunks = {}
|
|
|
|
|
for idx, chunk in enumerate(chunks):
|
2022-01-03 07:56:15 +00:00
|
|
|
pat_type = guess_part(chunk)
|
2022-01-09 19:09:02 +00:00
|
|
|
if pat_type != "unknown":
|
|
|
|
|
p_title[pat_type].append(chunk)
|
|
|
|
|
else:
|
|
|
|
|
unknown_chunks[idx] = chunk
|
2022-01-03 07:56:15 +00:00
|
|
|
|
|
|
|
|
# try to combine unknown chunks in pairs and parse them
|
2022-01-09 19:09:02 +00:00
|
|
|
if len(unknown_chunks) > 1:
|
|
|
|
|
prev_idx = -1
|
|
|
|
|
for idx in sorted(unknown_chunks.keys()):
|
|
|
|
|
|
|
|
|
|
# first unknown chunk, skip
|
|
|
|
|
if prev_idx < 0:
|
|
|
|
|
prev_idx = idx
|
|
|
|
|
continue
|
|
|
|
|
# previous unknown chunk does not border with current, skip
|
|
|
|
|
if (prev_idx + 1) != idx:
|
|
|
|
|
prev_idx = idx
|
|
|
|
|
continue
|
|
|
|
|
|
2022-01-03 07:56:15 +00:00
|
|
|
# create combined chunk
|
2022-01-09 19:09:02 +00:00
|
|
|
cmb_chunk = ".".join([unknown_chunks[prev_idx], unknown_chunks[idx]])
|
2022-01-03 07:56:15 +00:00
|
|
|
cmb_chunk_type = guess_part(cmb_chunk)
|
|
|
|
|
|
2022-01-09 19:09:02 +00:00
|
|
|
# check next pair if nothing
|
2022-01-03 07:56:15 +00:00
|
|
|
if cmb_chunk_type == "unknown":
|
2022-01-09 19:09:02 +00:00
|
|
|
prev_idx = idx
|
2022-01-03 07:56:15 +00:00
|
|
|
continue
|
|
|
|
|
|
2022-01-09 19:09:02 +00:00
|
|
|
# if combined chunk matches pattern, add it to found type
|
|
|
|
|
# and remove from unknown chunks its parts
|
2022-01-03 07:56:15 +00:00
|
|
|
p_title[cmb_chunk_type].append(cmb_chunk)
|
2022-01-09 19:09:02 +00:00
|
|
|
del unknown_chunks[prev_idx]
|
|
|
|
|
del unknown_chunks[idx]
|
|
|
|
|
prev_idx = -1
|
2022-01-03 07:56:15 +00:00
|
|
|
|
|
|
|
|
# try to parse unknown chunks, replacing all hyphens in them with dots
|
2022-01-09 19:09:02 +00:00
|
|
|
if unknown_chunks:
|
|
|
|
|
# create string from unknown_chunks with dots instead of hyphens
|
|
|
|
|
u_chunks_str = ".".join(unknown_chunks.values())
|
|
|
|
|
uc_title = ".".join(filter(None, re.split(SEPARATORS_HYPHEN, u_chunks_str)))
|
2022-01-03 07:56:15 +00:00
|
|
|
# recursion exit condition
|
|
|
|
|
if uc_title != title:
|
|
|
|
|
p_uc_title = parse_title(uc_title)
|
2022-01-09 19:09:02 +00:00
|
|
|
# if parsed uc_title has smth else than "unknown", update p_title
|
2022-01-03 07:56:15 +00:00
|
|
|
if list(p_uc_title.keys()) != ["unknown"]:
|
|
|
|
|
p_title.update(p_uc_title)
|
2022-01-09 19:09:02 +00:00
|
|
|
# unknown_chunks should be cleared,
|
|
|
|
|
# because it was processed in nested function call
|
|
|
|
|
unknown_chunks = {}
|
|
|
|
|
|
|
|
|
|
# cut name from unknown chunks
|
|
|
|
|
# name is the first n consequent chunks
|
|
|
|
|
# only if amount of unknown chunks differs from overall amount of chunks
|
|
|
|
|
if len(unknown_chunks) != len(chunks):
|
|
|
|
|
i = 0
|
|
|
|
|
for idx in sorted(unknown_chunks.keys()):
|
|
|
|
|
if idx != i:
|
|
|
|
|
break
|
|
|
|
|
p_title["name"].append(unknown_chunks[idx])
|
|
|
|
|
del unknown_chunks[idx]
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
for idx in sorted(unknown_chunks.keys()):
|
|
|
|
|
p_title["unknown"].append(unknown_chunks[idx])
|
|
|
|
|
return dict(p_title)
|
2022-01-03 07:56:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def guess_part(fname_part):
|
|
|
|
|
for pat_type, pattern in PATTERNS:
|
|
|
|
|
full_match_pat = r"^" + pattern + r"$"
|
|
|
|
|
if re.match(full_match_pat, fname_part, flags=re.I):
|
|
|
|
|
return pat_type
|
|
|
|
|
raise RuntimeError("unhandled pattern type")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
sys.exit(main())
|