Swift Merge Audio and Video Files into One Video

Swift: Merge audio and video files

I was looking for the code to Merge audio and video files into one video but couldn't find anywhere so after spending hours while reading apple docs I wrote this code.

NOTE : This is tested and 100% working code for me.

Stap : 1
Import these modules in your viewController.

import UIKit
import AVFoundation
import AVKit
import AssetsLibrary

step 2:
Add this function in your code

func mergeFilesWithUrl(videoUrl:NSURL, audioUrl:NSURL)
{
    let mixComposition : AVMutableComposition = AVMutableComposition()
    var mutableCompositionVideoTrack : [AVMutableCompositionTrack] = []
    var mutableCompositionAudioTrack : [AVMutableCompositionTrack] = []
    let totalVideoCompositionInstruction : AVMutableVideoCompositionInstruction = AVMutableVideoCompositionInstruction()
    
    
    //start merge
    
    let aVideoAsset : AVAsset = AVAsset(URL: videoUrl)
    let aAudioAsset : AVAsset = AVAsset(URL: audioUrl)
    
    mutableCompositionVideoTrack.append(mixComposition.addMutableTrackWithMediaType(AVMediaTypeVideo, preferredTrackID: kCMPersistentTrackID_Invalid))
    mutableCompositionAudioTrack.append( mixComposition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: kCMPersistentTrackID_Invalid))
    
    let aVideoAssetTrack : AVAssetTrack = aVideoAsset.tracksWithMediaType(AVMediaTypeVideo)[0]
    let aAudioAssetTrack : AVAssetTrack = aAudioAsset.tracksWithMediaType(AVMediaTypeAudio)[0]
    
    
    
    do{
        try mutableCompositionVideoTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), ofTrack: aVideoAssetTrack, atTime: kCMTimeZero)
        
        //In my case my audio file is longer then video file so i took videoAsset duration
        //instead of audioAsset duration
        
        try mutableCompositionAudioTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), ofTrack: aAudioAssetTrack, atTime: kCMTimeZero)
        
        //Use this instead above line if your audiofile and video file's playing durations are same
        
        //            try mutableCompositionAudioTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aAudioAssetTrack.timeRange.duration), ofTrack: aAudioAssetTrack, atTime: kCMTimeZero)
        
    }catch{
        
    }
    
    totalVideoCompositionInstruction.timeRange = CMTimeRangeMake(kCMTimeZero,aVideoAssetTrack.timeRange.duration )
    
    let mutableVideoComposition : AVMutableVideoComposition = AVMutableVideoComposition()
    mutableVideoComposition.frameDuration = CMTimeMake(1, 30)
    
    mutableVideoComposition.renderSize = CGSizeMake(1280,720)
    
    //        playerItem = AVPlayerItem(asset: mixComposition)
    //        player = AVPlayer(playerItem: playerItem!)
    //
    //
    //        AVPlayerVC.player = player
    
    
    
    //find your video on this URl
    let savePathUrl : NSURL = NSURL(fileURLWithPath: NSHomeDirectory() + "/Documents/newVideo.mp4")
    
    let assetExport: AVAssetExportSession = AVAssetExportSession(asset: mixComposition, presetName: AVAssetExportPresetHighestQuality)!
    assetExport.outputFileType = AVFileTypeMPEG4
    assetExport.outputURL = savePathUrl
    assetExport.shouldOptimizeForNetworkUse = true
    
    assetExport.exportAsynchronouslyWithCompletionHandler { () -> Void in
        switch assetExport.status {
            
        case AVAssetExportSessionStatus.Completed:
            
            //Uncomment this if u want to store your video in asset
            
            //let assetsLib = ALAssetsLibrary()
            //assetsLib.writeVideoAtPathToSavedPhotosAlbum(savePathUrl, completionBlock: nil)
            
            print("success")
        case  AVAssetExportSessionStatus.Failed:
            print("failed \(assetExport.error)")
        case AVAssetExportSessionStatus.Cancelled:
            print("cancelled \(assetExport.error)")
        default:
            print("complete")
        }
    }
    
    
}

Step 3:
Call function where u want like this

let videoUrl : NSURL =  NSURL(fileURLWithPath: NSBundle.mainBundle().pathForResource("SampleVideo", ofType: "mp4")!)
let audioUrl : NSURL = NSURL(fileURLWithPath: NSBundle.mainBundle().pathForResource("SampleAudio", ofType: "mp3")!)
        
mergeFilesWithUrl(videoUrl, audioUrl: audioUrl)

hope this will help you and will save your time.

Swift Merge audio and video Swift3

func mergeFilesWithUrl(videoUrl: URL, audioUrl: URL) {

    let mixComposition: AVMutableComposition = AVMutableComposition()

    var mutableCompositionVideoTrack: [AVMutableCompositionTrack] = []
    var mutableCompositionAudioTrack: [AVMutableCompositionTrack] = []
    var mutableCompositionAudioOfVideoTrack: [AVMutableCompositionTrack] = []
    let totalVideoCompositionInstruction: AVMutableVideoCompositionInstruction = AVMutableVideoCompositionInstruction()

    let aVideoAsset: AVAsset = AVAsset(url: videoUrl)
    let aAudioAsset: AVAsset = AVAsset(url: audioUrl)

    mutableCompositionVideoTrack.append(mixComposition.addMutableTrack(withMediaType: AVMediaTypeVideo, preferredTrackID: kCMPersistentTrackID_Invalid))
    mutableCompositionAudioTrack.append(mixComposition.addMutableTrack(withMediaType: AVMediaTypeAudio, preferredTrackID: kCMPersistentTrackID_Invalid))
    mutableCompositionAudioOfVideoTrack.append(mixComposition.addMutableTrack(withMediaType: AVMediaTypeAudio, preferredTrackID: kCMPersistentTrackID_Invalid))

    let aAudioOfVideoTrack: AVAssetTrack = aVideoAsset.tracks(withMediaType: AVMediaTypeAudio)[0]
    let aVideoAssetTrack: AVAssetTrack = aVideoAsset.tracks(withMediaType: AVMediaTypeVideo)[0]
    let aAudioAssetTrack: AVAssetTrack = aAudioAsset.tracks(withMediaType: AVMediaTypeAudio)[0]

    do {
        try mutableCompositionAudioOfVideoTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), of: aAudioOfVideoTrack, at: kCMTimeZero)
        try mutableCompositionVideoTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), of: aVideoAssetTrack, at: kCMTimeZero)
        try mutableCompositionAudioTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), of: aAudioAssetTrack, at: kCMTimeZero)
    } catch {

    }

    totalVideoCompositionInstruction.timeRange = CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration)

    let mutableVideoComposition: AVMutableVideoComposition = AVMutableVideoComposition()
    mutableVideoComposition.frameDuration = CMTimeMake(1, 30)

    mutableVideoComposition.renderSize = CGSize(width: 1280, height: 720)//CGSize(1280,720)

    //find your video on this URl
    let savePathUrl: NSURL = NSURL(fileURLWithPath: NSHomeDirectory() + "/Documents/newVideo.mp4")

    do { // delete old video
        try FileManager.default.removeItem(at: savePathUrl as URL)
    } catch {
        print(error.localizedDescription)
    }

    let assetExport: AVAssetExportSession = AVAssetExportSession(asset: mixComposition, presetName: AVAssetExportPresetHighestQuality)!
    assetExport.outputFileType = AVFileTypeMPEG4
    assetExport.outputURL = savePathUrl as URL
    assetExport.shouldOptimizeForNetworkUse = true

    assetExport.exportAsynchronously {
        switch assetExport.status {
        case AVAssetExportSessionStatus.completed:
            print("success")
        case AVAssetExportSessionStatus.failed:
            print("failed \(assetExport.error)")
        case AVAssetExportSessionStatus.cancelled:
            print("cancelled \(assetExport.error)")
        default:
            print("complete")
        }
    }
}

how to merge video clips using avfoundation?

Don't use a Double to track the insertion time, this can result in gaps due to rounding errors. And don't use a preferredTimescale of 1 when converting seconds, this will effectively round everything to whole seconds (1000 would be a more common timescale for this).

Instead to track the insertion time use a CMTime initialized to kCMTimeZero, and use CMTimeAdd to advance it.

And one more thing: Video and audio tracks can have different durations, particularly when recorded. So to keep things in sync, you may want to use CMTimeRangeGetIntersection to get the common time range of audio and video in the asset, and then use result to for insertion in the composition.

How to merge 1 video and 2 or more audio files with AVFoundation

Ok, I just found what the problem was; basically, there is one golden rule that must be followed when using AVMutableComposition (at least to merge multiple audios), that is:

1 audio = 1 video + 1 instruction

In other words, for every audio, there must be 1 video and 1 instruction. Following this rule my previous code results in the following:

public void mergeAudios()
        {
            //This funtion merges the final video with the new audio

            #region HoldVideoTrack
            AVAsset video_asset = AVAsset.FromUrl(NSUrl.FromFilename(FinalVideo));

            //This range applies to the video, not to the mixcomposition
            CMTimeRange range = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = video_asset.Duration
            };
            #endregion

            AVMutableComposition mixComposition = new AVMutableComposition();

            #region AddsVideo
            AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
            AVAssetTrack assetVideoTrack = video_asset.TracksWithMediaType(AVMediaType.Video)[0];
            videoTrack.InsertTimeRange(range, assetVideoTrack, CMTime.Zero, out NSError error1);
            #endregion

            #region AddsVideo'sAudio
            //If the device can't use the microphone then the original video's audio will not exist
            AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);
            if (microphone != null)
            {
                AVMutableCompositionTrack audio_video_Track = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
                AVAssetTrack assetAudioVideoTrack = video_asset.TracksWithMediaType(AVMediaType.Audio)[0];
                audio_video_Track.InsertTimeRange(range, assetAudioVideoTrack, mixComposition.Duration, out NSError error2);
            }
            #endregion

            //[TTS_list.Count + 1]; +1 = original Video
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[TTS_list.Count + 1];
            //This instruction is for "FinalVideo"
            Instruction_Array[0] = SetInstruction(video_asset, mixComposition.Duration, videoTrack);

            #region TestingEnviroment
            //We will use counter to specify the position in Instruction_Array, we start with 1 because we have already added 1 instruction for "FinalVideo"
            int counter = 1;
            foreach(Audio _audioo in TTS_list)
            {
                #region Video
                AVMutableCompositionTrack videoTrack_forAudio = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
                AVAssetTrack assetVideoTrack_forAudio = video_asset.TracksWithMediaType(AVMediaType.Video)[0];

                //This range applies to the video, not to the mixcomposition, making its duration 0 and having no overall effect on the final video.
                //We have to declare 1 video for each audio in order to merge multiple audios. Doing it this way the videos have no effect, but the audios do
                CMTimeRange range0 = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = CMTime.FromSeconds(0, 600)
                };
                videoTrack_forAudio.InsertTimeRange(range0, assetVideoTrack_forAudio, mixComposition.Duration, out NSError error4);
                #endregion

                #region Audio
                AVAsset audio_asset = AVAsset.FromUrl(NSUrl.FromFilename(_audioo.Path));

                //This range applies to the video, not to the mixcomposition
                //We use _audio.Duration instead of audio_asset.Duration.Seconds because the audio's duration might be trimmed
                CMTimeRange audio_CMTime = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = CMTime.FromSeconds(_audioo.Duration, 600)
                };

                //This range applies to mixcomposition, not to the video
                var starting_CMTime = CMTime.FromSeconds(_audioo.Starting_Point, 600);

                AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
                AVAssetTrack assetAudioTrack = audio_asset.TracksWithMediaType(AVMediaType.Audio)[0];
                audioTrack.InsertTimeRange(audio_CMTime, assetAudioTrack, starting_CMTime, out NSError error5);
                #endregion

                #region Instruction
                Instruction_Array[counter] = SetInstruction(video_asset, mixComposition.Duration, videoTrack);
                counter += 1;
                #endregion
            }
            #endregion

            #region Instructions
            var mainInstruction = new AVMutableVideoCompositionInstruction();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };

            mainInstruction.BackgroundColor = UIColor.FromRGBA(0.63f, 0.84f, 0.82f, 1.000f).CGColor;
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;
            #endregion

            var mainComposition = new AVMutableVideoComposition()
            {
                Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
                FrameDuration = new CMTime(1, 30),
                RenderSize = new CoreGraphics.CGSize(UIScreenWidth, UIScreenHeight)
            };

            finalVideo_path = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/FinalVideoEdit.mov"));
            if (File.Exists(Path.GetTempPath() + "temporaryClip/FinalVideoEdit.mov"))
            {
                File.Delete(Path.GetTempPath() + "temporaryClip/FinalVideoEdit.mov");
            }

            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = finalVideo_path,
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }

How to merge multiple videos side by side swift 4

Is it better to do the merging on server or on user's phone?

It depends, really. Servers are generally costly but they give you more agility when it comes to support plus you only need to create the functionality once instead of once per platform. I would always go with doing it on device if possible. So in your case it seems "device" is the way to go.

How can i merge videos on Swift? I tried to find the solution by
googling it but i couldn't find any on Swift.

Swift itself will not be able to do what you ask. You will need tools/frameworks that allow such things. And you can find them for iOS.

First you should try to specify what exactly you are doing. A single video consists (in most cases) of video and audio track. From title I can deduct that video tracks should be placed side by side while there is no info about audio. Further more there is a question on how the "side by side" is done for videos that do not have same video resolution. There are a few possibilities for that. Also there is a question to what happens when not all videos have the same length?

The video part in your case should probably boil down to images at certain frame rate. For iOS that should be getting a UIImage for each of the videos at a given time, stitch images together, insert the new image into your stitched video file. So what you are looking for:

Get UIImage from video at certain time (try this)
Merge images together looks promising here
Create video from UIImages may need a bit more work but check this out

Naturally you still need to decide what output video you will choose. What to do with videos of different sizes and what to do with videos of different lengths. But these are all just decisions and all can be done on the level of UIImage.

You will need to decide what frame-rate to use. I guess there is no reason not to just use a fixed one like 30FPS which means you are iterating time as CMTimeMake(i, 30) where i is in range [0, videoDuration*30].

The only thing left is the audio part for which you may find some clues here.

So overall high level code should be something like:

func mergeVideos(urls: [URL]) -> URL {
    let overallSize: CGSize = sizeForStichedVideos(urls: urls)
    let overallDuration: TimeInterval = longestVideoFrom(urls: urls)
    let frameRate: Int = decideFrameRateForVideos(urls: urls)

    let stichedVideo: VideoObject = VideoObject(size: overallSize, duration: overallDuration, frameRate: frameRate)

    for frameIndex in 0..<Int(ceil(overallDuration))*frameRate {
        let time = CMTime(value: frameIndex, timescale: frameRate)
        let images: [UIImage?] = imagesFromVideos(urls: urls, at: time)
        let stichedImage: UIImage = stitchImages(images)
        stichedVideo.appendVideoSample(withImage: stichedImage, at: time)
        // TODO: append audio samples
    }
    stichedVideo.finalize()

    return stichedVideo.outputURL
}

Have fun.

Swift Merge Audio and Video Files into One Video