Add support for episode names
This commit is contained in:
parent
58b1606a62
commit
463e5b4c9f
179
renamer.py
179
renamer.py
@ -25,6 +25,7 @@ PATTERNS = (
|
||||
r"|noir[-.]edition"
|
||||
r"|black[-.]chrome[-.]edition"
|
||||
r"|extended[-.]edition"
|
||||
r"|hq[-.]edition"
|
||||
r"|theatrical)"),
|
||||
("restrictions", r"(unrated)"),
|
||||
("resolution", r"[0-9]{3,4}[pi]"),
|
||||
@ -37,6 +38,7 @@ PATTERNS = (
|
||||
("audio", r"%s?(dts(-es)?|ac3|flac|dd5\.1|aac2\.0|dub-line)" % LANGUAGES),
|
||||
("subtitles", r"%s?sub" % LANGUAGES),
|
||||
("language", r"(\d{1,2}x)?%s" % LANGUAGES),
|
||||
("file_extension", r"mkv|avi"),
|
||||
("unknown", r".*")
|
||||
)
|
||||
|
||||
@ -99,83 +101,130 @@ def process_file(fpath):
|
||||
_lg.warning("%s -> %s", fname, result)
|
||||
|
||||
|
||||
def _get_parsed_title_dict(chunk_list, chunk_map):
|
||||
p_title = collections.defaultdict(list)
|
||||
for idx, chunk in enumerate(chunk_list):
|
||||
chunk_type = chunk_map[idx]
|
||||
p_title[chunk_type].append(chunk)
|
||||
return p_title
|
||||
|
||||
|
||||
def _guess_combined(chunk_values, chunk_map):
|
||||
""" Try to combine unknown chunks in pairs and parse them """
|
||||
is_changed = False
|
||||
p_title = _get_parsed_title_dict(chunk_values, chunk_map)
|
||||
if len(p_title["unknown"]) < 2:
|
||||
return is_changed, chunk_values, chunk_map
|
||||
|
||||
# i - begin of slice, j - end of slice
|
||||
i = 0
|
||||
# process up to second-to-last element
|
||||
while i < len(chunk_map) - 1:
|
||||
# we need slice with at least two elements
|
||||
j = i + 2
|
||||
# we need only unknown elements
|
||||
while set(chunk_map[i:j]) == {"unknown"} and j <= len(chunk_map):
|
||||
# create combined chunk
|
||||
cmb_chunk = ".".join(chunk_values[i:j])
|
||||
cmb_chunk_type = guess_part(cmb_chunk)
|
||||
|
||||
# add new combined chunk in lists
|
||||
# first subelement gets new chunk, rest - None
|
||||
# (will be removed later)
|
||||
if cmb_chunk_type != "unknown":
|
||||
is_changed = True
|
||||
chunk_values[i] = cmb_chunk
|
||||
chunk_map[i] = cmb_chunk_type
|
||||
for idx in range(i+1, j):
|
||||
chunk_values[idx] = None
|
||||
chunk_map[idx] = None
|
||||
# to start checking next chunks right after the end of slice
|
||||
i = idx
|
||||
break
|
||||
# try add more elements to combined chunk
|
||||
else:
|
||||
j += 1
|
||||
|
||||
# start checking next value
|
||||
i += 1
|
||||
|
||||
# clean up from None values
|
||||
chunk_values = list(filter(None, chunk_values))
|
||||
chunk_map = list(filter(None, chunk_map))
|
||||
|
||||
return is_changed, chunk_values, chunk_map
|
||||
|
||||
|
||||
def parse_title(title):
|
||||
""" Split media title to components. """
|
||||
|
||||
chunks = list(filter(None, re.split(SEPARATORS, title)))
|
||||
p_title = collections.defaultdict(list)
|
||||
chunk_values = filter(None, re.split(SEPARATORS, title))
|
||||
|
||||
# remove non-word chunks (like single hyphens)
|
||||
chunks = list(filter(lambda ch: re.search(r"\w+", ch), chunks))
|
||||
chunk_values = list(filter(lambda ch: re.search(r"\w+", ch), chunk_values))
|
||||
|
||||
# parse each chunk
|
||||
unknown_chunks = {}
|
||||
for idx, chunk in enumerate(chunks):
|
||||
pat_type = guess_part(chunk)
|
||||
if pat_type != "unknown":
|
||||
p_title[pat_type].append(chunk)
|
||||
else:
|
||||
unknown_chunks[idx] = chunk
|
||||
chunk_map = []
|
||||
for ch_value in chunk_values:
|
||||
chunk_map.append(guess_part(ch_value))
|
||||
|
||||
# try to combine unknown chunks in pairs and parse them
|
||||
if len(unknown_chunks) > 1:
|
||||
prev_idx = -1
|
||||
for idx in sorted(unknown_chunks.keys()):
|
||||
_, chunk_values, chunk_map = _guess_combined(chunk_values, chunk_map)
|
||||
|
||||
# first unknown chunk, skip
|
||||
if prev_idx < 0:
|
||||
prev_idx = idx
|
||||
# # try to parse unknown chunks, replacing all hyphens in them with dots
|
||||
p_title = _get_parsed_title_dict(chunk_values, chunk_map)
|
||||
is_changed = False
|
||||
if p_title.get("unknown"):
|
||||
spl_ch_values = []
|
||||
spl_ch_map = []
|
||||
for idx, ch_value in enumerate(chunk_values):
|
||||
ch_type = chunk_map[idx]
|
||||
if ch_type == "unknown" and "-" in ch_value:
|
||||
spl_values = ch_value.split("-")
|
||||
for spl_val in spl_values:
|
||||
if not spl_val:
|
||||
continue
|
||||
spl_type = guess_part(spl_val)
|
||||
if spl_type != "unknown":
|
||||
is_changed = True
|
||||
spl_ch_values.append(spl_val)
|
||||
spl_ch_map.append(spl_type)
|
||||
else:
|
||||
spl_ch_values.append(ch_value)
|
||||
spl_ch_map.append(ch_type)
|
||||
|
||||
is_combined, spl_ch_values, spl_ch_map = _guess_combined(spl_ch_values, spl_ch_map)
|
||||
if is_changed or is_combined:
|
||||
chunk_values = spl_ch_values
|
||||
chunk_map = spl_ch_map
|
||||
|
||||
# parse name and episode name
|
||||
# only if there is something except unknown chunks
|
||||
p_title = _get_parsed_title_dict(chunk_values, chunk_map)
|
||||
if len(p_title["unknown"]) != len(chunk_values):
|
||||
idx = 0
|
||||
while idx < len(chunk_map) and chunk_map[idx] == "unknown":
|
||||
chunk_map[idx] = "name"
|
||||
idx += 1
|
||||
# if episode number is found, next unknown chunks are episode name
|
||||
if p_title.get("episode"):
|
||||
idx = chunk_map.index("episode") + 1
|
||||
while idx < len(chunk_map) and chunk_map[idx] == "unknown":
|
||||
chunk_map[idx] = "episode_name"
|
||||
idx += 1
|
||||
|
||||
# at last, strip hyphens from unknown chunks
|
||||
# only if there is something except unknown chunks
|
||||
p_title = _get_parsed_title_dict(chunk_values, chunk_map)
|
||||
if len(p_title["unknown"]) != len(chunk_values):
|
||||
for idx, chunk_type in enumerate(chunk_map):
|
||||
if chunk_type != "unknown":
|
||||
continue
|
||||
# previous unknown chunk does not border with current, skip
|
||||
if (prev_idx + 1) != idx:
|
||||
prev_idx = idx
|
||||
chunk_value = chunk_values[idx]
|
||||
if chunk_value[0] != "-" and chunk_value[-1] != "-":
|
||||
continue
|
||||
chunk_values[idx] = chunk_value.strip("-")
|
||||
|
||||
# create combined chunk
|
||||
cmb_chunk = ".".join([unknown_chunks[prev_idx], unknown_chunks[idx]])
|
||||
cmb_chunk_type = guess_part(cmb_chunk)
|
||||
|
||||
# check next pair if nothing
|
||||
if cmb_chunk_type == "unknown":
|
||||
prev_idx = idx
|
||||
continue
|
||||
|
||||
# if combined chunk matches pattern, add it to found type
|
||||
# and remove from unknown chunks its parts
|
||||
p_title[cmb_chunk_type].append(cmb_chunk)
|
||||
del unknown_chunks[prev_idx]
|
||||
del unknown_chunks[idx]
|
||||
prev_idx = -1
|
||||
|
||||
# try to parse unknown chunks, replacing all hyphens in them with dots
|
||||
if unknown_chunks:
|
||||
# create string from unknown_chunks with dots instead of hyphens
|
||||
u_chunks_str = ".".join(unknown_chunks.values())
|
||||
uc_title = ".".join(filter(None, re.split(SEPARATORS_HYPHEN, u_chunks_str)))
|
||||
# recursion exit condition
|
||||
if uc_title != title:
|
||||
p_uc_title = parse_title(uc_title)
|
||||
# if parsed uc_title has smth else than "unknown", update p_title
|
||||
if list(p_uc_title.keys()) != ["unknown"]:
|
||||
p_title.update(p_uc_title)
|
||||
# unknown_chunks should be cleared,
|
||||
# because it was processed in nested function call
|
||||
unknown_chunks = {}
|
||||
|
||||
# cut name from unknown chunks
|
||||
# name is the first n consequent chunks
|
||||
# only if amount of unknown chunks differs from overall amount of chunks
|
||||
if len(unknown_chunks) != len(chunks):
|
||||
i = 0
|
||||
for idx in sorted(unknown_chunks.keys()):
|
||||
if idx != i:
|
||||
break
|
||||
p_title["name"].append(unknown_chunks[idx])
|
||||
del unknown_chunks[idx]
|
||||
i += 1
|
||||
|
||||
for idx in sorted(unknown_chunks.keys()):
|
||||
p_title["unknown"].append(unknown_chunks[idx])
|
||||
p_title = _get_parsed_title_dict(chunk_values, chunk_map)
|
||||
return dict(p_title)
|
||||
|
||||
|
||||
|
||||
@ -2,7 +2,71 @@ import unittest
|
||||
from renamer import parse_title
|
||||
|
||||
|
||||
class TestParser(unittest.TestCase):
|
||||
class TestParserParts(unittest.TestCase):
|
||||
def test_episode_name(self):
|
||||
title = "The.Walking.Dead.S04E06.Live.Bait.1080p.WEB-DL.Rus.Eng.HDCLUB"
|
||||
res = parse_title(title)
|
||||
self.assertEqual(["Live", "Bait"], res.get("episode_name"))
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"episode": ["S04E06"],
|
||||
"resolution": ["1080p"],
|
||||
"quality": ["WEB-DL"],
|
||||
"episode_name": ["Live", "Bait"],
|
||||
"language": ["Rus", "Eng"],
|
||||
"name": ["The", "Walking", "Dead"],
|
||||
"unknown": ["HDCLUB"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_episode_number(self):
|
||||
title = "Vikings.S01E01.720p.BluRay.4xRus.Eng.HDCLUB"
|
||||
res = parse_title(title)
|
||||
self.assertIn("S01E01", res.get("episode", []))
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"episode": ["S01E01"],
|
||||
"resolution": ["720p"],
|
||||
"quality": ["BluRay"],
|
||||
"language": ["4xRus", "Eng"],
|
||||
"name": ["Vikings"],
|
||||
"unknown": ["HDCLUB"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_subtitles(self):
|
||||
title = "Lives.of.Others.Blu-RayRip.720p.RusDTS.GerAC3.EngSub"
|
||||
res = parse_title(title)
|
||||
self.assertIn("EngSub", res.get("subtitles", []))
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"quality": ["Blu-RayRip"],
|
||||
"resolution": ["720p"],
|
||||
"audio": ["RusDTS", "GerAC3"],
|
||||
"subtitles": ["EngSub"],
|
||||
"name": ["Lives", "of", "Others"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_file_extension(self):
|
||||
title = "The Guild s04e06 Weird Respawn (by Swich).mkv"
|
||||
res = parse_title(title)
|
||||
self.assertIn("mkv", res.get("file_extension", []))
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"name": ["The", "Guild"],
|
||||
"episode": ["s04e06"],
|
||||
"episode_name": ["Weird", "Respawn", "by", "Swich"],
|
||||
"file_extension": ["mkv"],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TestCornerCases(unittest.TestCase):
|
||||
def test_misc_separators(self):
|
||||
title = "V tumane 2012 1080p BluRay DD5.1 x264-EA"
|
||||
res = parse_title(title)
|
||||
@ -22,6 +86,24 @@ class TestParser(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
|
||||
def test_misc_separators_and_combine(self):
|
||||
title = "The.Dawn.Patrol.1938.1080p.WEB-DL.AAC2.0.H.264-alinto"
|
||||
res = parse_title(title)
|
||||
self.assertIn("H.264", res.get("codec", []))
|
||||
self.assertIn("alinto", res.get("unknown", []))
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"year": ["1938"],
|
||||
"resolution": ["1080p"],
|
||||
"quality": ["WEB-DL"],
|
||||
"audio": ["AAC2.0"],
|
||||
"codec": ["H.264"],
|
||||
"name": ["The", "Dawn", "Patrol"],
|
||||
"unknown": ["alinto"],
|
||||
},
|
||||
)
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_russian_char_in_resolution(self):
|
||||
title = "Trouble.with.the.Curve.2012.1080р.BluRay.Rus.Eng.HDCLUB"
|
||||
@ -69,10 +151,10 @@ class TestParser(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
|
||||
def test_file_extension_goes_to_unknown(self):
|
||||
def test_file_extension_does_not_go_to_unknown(self):
|
||||
title = "Monsters,Inc.2001.BDRip.1080p.3xRus.Ukr.Eng.HDCLUB.mkv"
|
||||
res = parse_title(title)
|
||||
self.assertIn("mkv", res.get("unknown", []))
|
||||
self.assertNotIn("mkv", res.get("unknown", []))
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
@ -81,7 +163,8 @@ class TestParser(unittest.TestCase):
|
||||
"resolution": ["1080p"],
|
||||
"language": ["3xRus", "Ukr", "Eng"],
|
||||
"name": ["Monsters", "Inc"],
|
||||
"unknown": ["HDCLUB", "mkv"],
|
||||
"unknown": ["HDCLUB"],
|
||||
"file_extension": ["mkv"],
|
||||
},
|
||||
)
|
||||
|
||||
@ -102,7 +185,6 @@ class TestParser(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_chunk_from_three_parts(self):
|
||||
title = "Mad.Max.Road.of.Fury.Black.Chrome.edition.BDRip.1080p"
|
||||
self.assertEqual(
|
||||
@ -162,21 +244,6 @@ class TestParser(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
|
||||
def test_subtitles(self):
|
||||
title = "Lives.of.Others.Blu-RayRip.720p.RusDTS.GerAC3.EngSub"
|
||||
res = parse_title(title)
|
||||
self.assertIn("EngSub", res.get("subtitles", []))
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"quality": ["Blu-RayRip"],
|
||||
"resolution": ["720p"],
|
||||
"audio": ["RusDTS", "GerAC3"],
|
||||
"subtitles": ["EngSub"],
|
||||
"name": ["Lives", "of", "Others"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_strange_dot_in_name(self):
|
||||
title = "WALL·E.2008.1080p.BluRay.3xRus.Ukr.Eng.HDCLUB-Skazhutin"
|
||||
res = parse_title(title)
|
||||
@ -192,3 +259,68 @@ class TestParser(unittest.TestCase):
|
||||
"unknown": ["HDCLUB-Skazhutin"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_splitted_by_hyphens_chunk_ends_with_empty_part(self):
|
||||
title = "The.IT.Crowd.S01E04.The.Red.Door.HR.DVDRip.HQ.Edition.x264-N-(Rus.Eng)"
|
||||
res = parse_title(title)
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"name": ["The", "IT", "Crowd"],
|
||||
"episode": ["S01E04"],
|
||||
"episode_name": ["The", "Red", "Door", "HR"],
|
||||
"quality": ["DVDRip"],
|
||||
"edition": ["HQ.Edition"],
|
||||
"codec": ["x264"],
|
||||
"unknown": ["N"],
|
||||
"language": ["Rus", "Eng"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_year_before_episode_number_and_name(self):
|
||||
title = "The.Big.Bang.Theory.2019.S12E20.The.Decision.Reverberation.1080p.AMZN.WEB-DL.DD5.1.x264-NTb_EniaHD.mkv"
|
||||
res = parse_title(title)
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"name": ["The", "Big", "Bang", "Theory"],
|
||||
"year": ["2019"],
|
||||
"episode": ["S12E20"],
|
||||
"episode_name": ["The", "Decision", "Reverberation"],
|
||||
"unknown": ["NTb", "EniaHD"],
|
||||
"resolution": ["1080p"],
|
||||
"quality": ["AMZN", "WEB-DL"],
|
||||
"audio": ["DD5.1"],
|
||||
"codec": ["x264"],
|
||||
"file_extension": ["mkv"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_unknown_stripped_from_hyphens(self):
|
||||
title = "The.Big.Bang.Theory.S04E06.720p.WEB-DL.eng.rus.[Kuraj-Bambey.Ru]-jhonny2.mkv"
|
||||
res = parse_title(title)
|
||||
self.assertIn("jhonny2", res.get("unknown", []))
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"name": ["The", "Big", "Bang", "Theory"],
|
||||
"episode": ["S04E06"],
|
||||
"resolution": ["720p"],
|
||||
"quality": ["WEB-DL"],
|
||||
"language": ["eng", "rus"],
|
||||
"unknown": ["Kuraj-Bambey", "Ru", "jhonny2"],
|
||||
"file_extension": ["mkv"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_episode_name_without_series_name(self):
|
||||
title = "S27E01.Every Man's Dream.mkv"
|
||||
res = parse_title(title)
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"episode": ["S27E01"],
|
||||
"episode_name": ["Every", "Man's", "Dream"],
|
||||
"file_extension": ["mkv"],
|
||||
},
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user