Skip to content

Commit 193df5c

Browse files
committed
add video subtitles and description to full-text index
1 parent 1e3c1e3 commit 193df5c

File tree

3 files changed

+15
-1
lines changed

3 files changed

+15
-1
lines changed

archivebox/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
'--write-sub',
127127
'--all-subs',
128128
'--write-auto-sub',
129+
'--convert-subs=srt',
129130
'--yes-playlist',
130131
'--continue',
131132
'--ignore-errors',

archivebox/core/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class Tag(models.Model):
4141
Based on django-taggit model
4242
"""
4343
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
44-
44+
4545
name = models.CharField(unique=True, blank=False, max_length=100)
4646

4747
# slug is autoset on save from name, never set it manually

archivebox/extractors/media.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,24 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
7070
finally:
7171
timer.end()
7272

73+
# add video description and subtitles to full-text index
74+
index_texts = [
75+
text_file.read_text(encoding='utf-8').strip()
76+
for text_file in (
77+
*output_path.glob('*.description'),
78+
*output_path.glob('*.srt'),
79+
*output_path.glob('*.vtt'),
80+
*output_path.glob('*.lrc'),
81+
*output_path.glob('*.lrc'),
82+
)
83+
]
84+
7385
return ArchiveResult(
7486
cmd=cmd,
7587
pwd=str(out_dir),
7688
cmd_version=YOUTUBEDL_VERSION,
7789
output=output,
7890
status=status,
91+
index_texts=index_texts,
7992
**timer.stats,
8093
)

0 commit comments

Comments
 (0)