splitting with soft & hard cap

(and getting the splitted files is now more accurate)
This commit is contained in:
OMGeeky
2023-04-04 00:08:20 +02:00
parent fd94c2bc4d
commit 7f606a0914
4 changed files with 208 additions and 39 deletions

3
.gitignore vendored
View File

@@ -3,4 +3,5 @@
/temp/
/Cargo.lock
/downloader/logs/
/build
/build
/tests/test_data/tmp/

View File

@@ -15,4 +15,5 @@ google-bigquery2 = "4.0.1"
chrono = "0.4.23"
nameof = "1.2.2"
log = "0.4.17"
simplelog = "0.12.1"
simplelog = "0.12.1"
path-clean = "1.0.1"

View File

@@ -10,13 +10,15 @@ use downloader_config;
use downloader_config::Config;
use google_bigquery::{BigDataTable, BigqueryClient};
use google_youtube::{scopes, PrivacyStatus, YoutubeClient};
use log::{debug, error, info, trace, warn};
use nameof::name_of;
use path_clean::clean;
use tokio::io::BufReader;
use tokio::process::Command;
use twitch_data::{TwitchClient, Video};
use crate::data::{Streamers, VideoData};
use log::{debug, error, info, trace, warn};
pub mod data;
type Result<T> = std::result::Result<T, Box<dyn Error>>;
@@ -295,24 +297,42 @@ async fn upload_video_to_youtube<'a>(
Ok(())
}
async fn split_video_into_parts(
pub async fn split_video_into_parts(
path: PathBuf,
duration_soft_cap: Duration,
duration_hard_cap: Duration,
) -> Result<Vec<PathBuf>> {
trace!("split video into parts");
//region prepare paths
let filepath = path.canonicalize()?;
let parent_dir = path
.parent()
.unwrap()
.canonicalize()
.expect("Could not canonicalize parent dir");
let file_playlist = clean(Path::join(&parent_dir, "output.m3u8"));
//endregion
info!(
"Splitting video: {:?}\n\tinto parts with soft cap duration: {} minutes and hard cap duration: {} minutes",
filepath,
duration_soft_cap.num_minutes(),
duration_hard_cap.num_minutes()
);
let output_path_pattern = format!("{}_%03d.mp4", filepath.to_str().unwrap()); //TODO: maybe make the number of digits dynamic
warn!("The soft and hard cap duration are not implemented yet");
// todo!(implement the soft and hard cap duration);
let duration_str = duration_to_string(&duration_soft_cap);
//region run ffmpeg split command
//example: ffmpeg -i input.mp4 -c copy -map 0 -segment_time 00:20:00 -f segment output%03d.mp4
trace!(
"Running ffmpeg command: ffmpeg -i {:?} -c copy -map 0 -segment_time {} -reset_timestamps 1\
-segment_list {} -segment_list_type m3u8 -avoid_negative_ts 1 -f segment {}",
filepath,
duration_str,
file_playlist.display(),
output_path_pattern
);
Command::new("ffmpeg")
.args([
"-i",
@@ -323,46 +343,132 @@ async fn split_video_into_parts(
"0",
"-segment_time",
&duration_str,
"-reset_timestamps",
"1",
"-segment_list",
file_playlist.to_str().unwrap(),
"-segment_list_type",
"m3u8",
"-avoid_negative_ts",
"1",
"-f",
"segment",
&output_path_pattern,
])
.output()
.await?;
trace!("Finished running ffmpeg command");
//endregion
//region extract parts from playlist file (create by ffmpeg 'output.m3u8')
let mut res = vec![];
let parent_dir = path.parent().unwrap();
let read = std::fs::read_dir(parent_dir)?;
info!("Reading dir: {:?}", parent_dir);
for x in read {
// info!("Checking file: {:?}", x);
let path = x?.path();
if path.is_file() {
let file_name = path.canonicalize()?;
// let file_name = path.to_str().unwrap();
info!("Checking file: {:?}", file_name);
let filename_beginning_pattern = format!("{}_", &filepath.to_str().unwrap());
let filename_str = file_name.to_str().unwrap();
if filename_str.starts_with(&filename_beginning_pattern)
&& filename_str.ends_with(".mp4")
{
info!("Found file: {:?}", file_name);
res.push(path);
} else {
info!("Skipping file: {:?}", file_name);
info!("Filepath to compare: {:?}", filename_beginning_pattern);
info!(
"Starts with: {}",
filename_str.starts_with(&filename_beginning_pattern)
info!("Reading playlist file: {}", file_playlist.display());
let playlist = tokio::fs::read_to_string(&file_playlist)
.await
.expect(format!("Failed to read playlist {}", file_playlist.display()).as_str());
let mut last_time = 0.0;
let mut time = 0.0;
let mut last_path: Option<PathBuf> = None;
let mut current_path: Option<PathBuf> = None;
for line in playlist.lines() {
if line.starts_with("#") {
if line.starts_with("#EXTINF:") {
last_time = time;
time = line["#EXTINF:".len()..].parse::<f64>().unwrap_or(0.0);
}
continue;
}
last_path = current_path;
current_path = Some(Path::join(&parent_dir, line));
res.push(current_path.clone().unwrap());
}
//endregion
//region maybe join last two parts
trace!("Deciding if last two parts should be joined");
if let Some(last_path) = last_path {
if let Some(current_path) = current_path {
let joined_time = last_time + time;
if joined_time < duration_soft_cap.num_seconds() as f64 {
//region join last two parts
info!("Joining last two parts");
//remove the part from the result that is going to be joined
res.pop();
let join_txt_path = Path::join(&parent_dir, "join.txt");
let join_mp4_path = Path::join(&parent_dir, "join.mp4");
tokio::fs::write(
join_txt_path.clone(),
format!(
"file '{}'\nfile '{}'",
clean(&last_path)
.to_str()
.expect("to_str on path did not work!"),
clean(&current_path)
.to_str()
.expect("to_str on path did not work!")
),
)
.await?;
// example: ffmpeg -f concat -safe 0 -i join.txt -c copy joined.mp4
// content of join.txt:
// file 'output_002.mp4'
// file 'output_003.mp4'
let join_txt_path = clean(join_txt_path);
let join_mp4_path = clean(join_mp4_path);
trace!(
"Running ffmpeg command: ffmpeg -f concat -safe 0 -i {:?} -c copy {:?}",
join_txt_path,
join_mp4_path
);
info!("Ends with: {}", filename_str.ends_with(".mp4"));
Command::new("ffmpeg")
.args([
"-f",
"concat",
"-safe",
"0",
"-i",
join_txt_path
.to_str()
.expect("to_str on join_txt_path did not work!"),
"-c",
"copy",
join_mp4_path
.to_str()
.expect("to_str on join_mp4_path did not work!"),
])
.output()
.await?;
trace!("Finished running ffmpeg command");
//region remove files
trace!(
"Removing files: {:?}, {:?}, {:?} {:?}",
current_path,
last_path,
join_txt_path,
file_playlist,
);
tokio::fs::remove_file(current_path).await?;
tokio::fs::remove_file(&last_path).await?;
tokio::fs::remove_file(join_txt_path).await?;
tokio::fs::remove_file(file_playlist).await?;
//endregion
trace!("Renaming file: {:?} to {:?}", join_mp4_path, last_path);
tokio::fs::rename(join_mp4_path, last_path).await?;
info!("Joined last two parts");
//endregion
}
}
}
//endregion
info!("removing the original file");
tokio::fs::remove_file(&path).await?;
info!("Split video into {} parts", res.len());
// info!("Video parts: {:?}", res);
// stdin().read_line(&mut String::new()).unwrap();
Ok(res)
}

View File

@@ -1,13 +1,36 @@
use std::path::{Path, PathBuf};
use chrono::{DateTime, NaiveDateTime, Utc};
// use bigquery_googleapi::BigqueryClient;
use google_bigquery::BigqueryClient;
use log::LevelFilter;
use simplelog::{ColorChoice, TermLogger, TerminalMode};
use downloader;
use downloader::{get_playlist_title_from_twitch_video, get_video_prefix_from_twitch_video, get_video_title_from_twitch_video};
use downloader::data::{Streamers, VideoData, VideoMetadata, Videos};
use downloader::{
get_playlist_title_from_twitch_video, get_video_prefix_from_twitch_video,
get_video_title_from_twitch_video,
};
fn init_console_logging(log_level: LevelFilter) {
TermLogger::init(
log_level,
simplelog::Config::default(),
TerminalMode::Mixed,
ColorChoice::Auto,
)
.unwrap();
}
async fn get_sample_client() -> BigqueryClient {
BigqueryClient::new("twitchbackup-v1", "backup_data", Some("auth/bigquery_service_account.json")).await.unwrap()
BigqueryClient::new(
"twitchbackup-v1",
"backup_data",
Some("auth/bigquery_service_account.json"),
)
.await
.unwrap()
}
fn get_sample_video(client: &BigqueryClient) -> VideoData {
@@ -41,7 +64,7 @@ fn get_sample_video(client: &BigqueryClient) -> VideoData {
youtube_user: Some("NoPixel VODs".to_string()),
watched: Some(true),
public_videos_default: Some(false),
}
},
}
}
@@ -51,7 +74,8 @@ fn get_utc_from_string(s: &str) -> DateTime<Utc> {
utc
}
const LONG_TITLE: &'static str = "long title with over a hundred characters that is definitely going to \
const LONG_TITLE: &'static str =
"long title with over a hundred characters that is definitely going to \
be cut of because it does not fit into the maximum length that youtube requires";
#[tokio::test]
@@ -79,7 +103,10 @@ async fn get_video_title_single_part() {
video.video.title = Some(LONG_TITLE.to_string());
let title = get_video_title_from_twitch_video(&video, 1, 1).unwrap();
println!("single part title:\n{}", title);
assert_eq!(title, "long title with over a hundred characters that is definitely going to be...");
assert_eq!(
title,
"long title with over a hundred characters that is definitely going to be..."
);
}
#[tokio::test]
@@ -93,7 +120,10 @@ async fn get_playlist_title() {
video.video.title = Some(LONG_TITLE.to_string());
let title = get_playlist_title_from_twitch_video(&video).unwrap();
println!("playlist title:\n{}", title);
assert_eq!(title, "long title with over a hundred characters that is definitely going to be...");
assert_eq!(
title,
"long title with over a hundred characters that is definitely going to be..."
);
}
#[tokio::test]
@@ -104,4 +134,35 @@ async fn get_video_prefix() {
let prefix = get_video_prefix_from_twitch_video(&video, 5, 20).unwrap();
println!("prefix:\n{}", prefix);
assert_eq!(prefix, "[2021-01-01][Part 05/20]");
}
}
#[tokio::test]
async fn split_video_into_parts() {
init_console_logging(LevelFilter::Debug);
//region prepare test data
let video_source = Path::new("tests/test_data/short_video/short_video.mp4");
let tmp_folder_path = Path::new("tests/test_data/tmp/");
let video_path = Path::join(tmp_folder_path, "short_video/short_video.mp4");
if tmp_folder_path.exists() {
std::fs::remove_dir_all(tmp_folder_path).unwrap();
}
std::fs::create_dir_all(video_path.parent().unwrap()).unwrap();
std::fs::copy(video_source, &video_path).unwrap();
//endregion
let parts = downloader::split_video_into_parts(
PathBuf::from(&video_path),
chrono::Duration::seconds(5),
chrono::Duration::seconds(9),
)
.await;
//region clean up
std::fs::remove_dir_all(tmp_folder_path).unwrap();
//endregion
let parts = parts.expect("failed to split video into parts");
println!("parts: {:?}", parts);
assert_eq!(parts.len(), 5);
}