Refactoring

This commit is contained in:
Maks Snegov 2022-02-08 23:45:55 +03:00
parent fe0d345aa2
commit 2204a3a2c3

View File

@ -65,11 +65,32 @@ def main():
def process_dir(dir_path): def process_dir(dir_path):
"""
Process dir with media files.
If this dir is season of some tv show -> process them as season.
Otherwise -> process each file individually.
"""
for fname in os.listdir(dir_path): for fname in os.listdir(dir_path):
fpath = os.path.join(dir_path, fname) fpath = os.path.join(dir_path, fname)
process_file(fpath) process_file(fpath)
def generate_pretty_name(parsed_title):
""" Create file name from parsed chunks. """
chunk_order = [k for k, _ in PATTERNS]
chunk_order = ["name"] + chunk_order
ep_idx = chunk_order.index("episode") + 1
chunk_order = chunk_order[:ep_idx] + ["episode_name"] + chunk_order[ep_idx:]
result = []
for chunk_type in chunk_order:
if not parsed_title.get(chunk_type, []):
continue
result.append(".".join(parsed_title[chunk_type]))
result = ".".join(result)
return result
def process_file(fpath): def process_file(fpath):
# process only files # process only files
if not os.path.isfile(fpath): if not os.path.isfile(fpath):
@ -85,25 +106,15 @@ def process_file(fpath):
return return
parsed_title = parse_title(title) parsed_title = parse_title(title)
pretty_title = generate_pretty_name(parsed_title)
pretty_title += ".%s" % ext
# create file name from parsed chunks if pretty_title != fname:
chunk_order = [k for k, _ in PATTERNS] _lg.warning("%s -> %s", fname, pretty_title)
chunk_order = ["name"] + chunk_order
episode_idx = chunk_order.index("episode") + 1
chunk_order = chunk_order[:episode_idx] + ["episode_name"] + chunk_order[episode_idx:]
result = []
for chunk_type in chunk_order:
if not parsed_title.get(chunk_type, []):
continue
result.append(".".join(parsed_title[chunk_type]))
result.append(ext)
result = ".".join(result)
if result != fname:
_lg.warning("%s -> %s", fname, result)
def _get_parsed_title_dict(chunk_list, chunk_map): def _get_parsed_title_dict(chunk_list, chunk_map):
""" Get {chunk_type: [chunk_value_1, ..., chunk_value_n]} dictionary. """
p_title = collections.defaultdict(list) p_title = collections.defaultdict(list)
for idx, chunk in enumerate(chunk_list): for idx, chunk in enumerate(chunk_list):
chunk_type = chunk_map[idx] chunk_type = chunk_map[idx]
@ -112,7 +123,7 @@ def _get_parsed_title_dict(chunk_list, chunk_map):
def _guess_combined(chunk_values, chunk_map): def _guess_combined(chunk_values, chunk_map):
""" Try to combine unknown chunks in pairs and parse them """ """ Try to combine unknown chunks in pairs and parse them. """
is_changed = False is_changed = False
p_title = _get_parsed_title_dict(chunk_values, chunk_map) p_title = _get_parsed_title_dict(chunk_values, chunk_map)
if len(p_title["unknown"]) < 2: if len(p_title["unknown"]) < 2:
@ -165,14 +176,14 @@ def parse_title(title):
# remove non-word chunks (like single hyphens), but leave ampersands (&) # remove non-word chunks (like single hyphens), but leave ampersands (&)
chunk_values = list(filter(lambda ch: re.search(r"(\w|&)+", ch), chunk_values)) chunk_values = list(filter(lambda ch: re.search(r"(\w|&)+", ch), chunk_values))
chunk_map = [] # list of chunk_types
# parse each chunk # parse each chunk
chunk_map = []
for ch_value in chunk_values: for ch_value in chunk_values:
chunk_map.append(guess_part(ch_value)) chunk_map.append(guess_part(ch_value))
_, chunk_values, chunk_map = _guess_combined(chunk_values, chunk_map) _, chunk_values, chunk_map = _guess_combined(chunk_values, chunk_map)
# # try to parse unknown chunks, replacing all hyphens in them with dots # try to parse unknown chunks, replacing all hyphens in them with dots
p_title = _get_parsed_title_dict(chunk_values, chunk_map) p_title = _get_parsed_title_dict(chunk_values, chunk_map)
is_changed = False is_changed = False
if p_title.get("unknown"): if p_title.get("unknown"):
@ -230,11 +241,12 @@ def parse_title(title):
return dict(p_title) return dict(p_title)
def guess_part(fname_part): def guess_part(chunk_value):
for pat_type, pattern in PATTERNS: """ Return chunk type for given chunk value. """
for chunk_type, pattern in PATTERNS:
full_match_pat = r"^" + pattern + r"$" full_match_pat = r"^" + pattern + r"$"
if re.match(full_match_pat, fname_part, flags=re.I): if re.match(full_match_pat, chunk_value, flags=re.I):
return pat_type return chunk_type
raise RuntimeError("unhandled pattern type") raise RuntimeError("unhandled pattern type")