Merge Videos & Images in Avmutablecomposition Using Avmutablecompositiontrack, Not Avvideocompositioncoreanimationtool

After consulting others on SO and performing more web research, it seems like this is not possible. Merging an image with a video into a master video that is playable out of an app seems to require AVVideoCompositionCoreAnimationTool.

How to merge mp4 and MOV type videos using AVMutableComposition iOS Swift?

you can use this Code

    private func mergeAssets(arrayAssets:[AVAsset],audioAsset:AVAsset,completionHandler:@escaping MergedVideoCompletionHandler){
var insertTime = kCMTimeZero
let animation = true
var arrayLayerInstructions:[AVMutableVideoCompositionLayerInstruction] = []
var outputSize = CGSize.init(width: 0, height: 0)
// Determine video output size
for videoAsset in arrayAssets {
let videoTrack = videoAsset.tracks(withMediaType:[0]

let assetInfo = orientationFromTransform(transform: videoTrack.preferredTransform)

var videoSize = videoTrack.naturalSize
if assetInfo.isPortrait == true {
videoSize.width = videoTrack.naturalSize.height
videoSize.height = videoTrack.naturalSize.width

if videoSize.height > outputSize.height {
outputSize = videoSize

if outputSize.width == 0 || outputSize.height == 0 {
outputSize = defaultSize

let audioSoundTrack = audioAsset.tracks(withMediaType:
// Init composition
let mixComposition = AVMutableComposition.init()
for videoAsset in arrayAssets {
// Get video track
guard let videoTrack = videoAsset.tracks(withMediaType: else { continue }

// Get audio track

// Init video & audio composition track
let videoCompositionTrack = mixComposition.addMutableTrack(withMediaType:,
preferredTrackID: Int32(kCMPersistentTrackID_Invalid))

let audioCompositionTrack = mixComposition.addMutableTrack(withMediaType:,
preferredTrackID: Int32(kCMPersistentTrackID_Invalid))

do {
let startTime = kCMTimeZero
let duration = videoAsset.duration

// Add video track to video composition at specific time
try videoCompositionTrack?.insertTimeRange(CMTimeRangeMake(startTime, duration),
of: videoTrack,
at: insertTime)

// Add audio track to audio composition at specific time
if let audioTrack = audioSoundTrack {
try audioCompositionTrack?.insertTimeRange(CMTimeRangeMake(startTime, duration),
of: audioTrack,
at: insertTime)

// Add instruction for video track
let layerInstruction = videoCompositionInstructionForTrack(track: videoCompositionTrack!,
asset: videoAsset,
standardSize: outputSize,
atTime: insertTime)

// Hide video track before changing to new track
let endTime = CMTimeAdd(insertTime, duration)

if animation {
let timeScale = videoAsset.duration.timescale
let durationAnimation = CMTime.init(seconds: 1, preferredTimescale: timeScale)

layerInstruction.setOpacityRamp(fromStartOpacity: 1.0, toEndOpacity: 0.0, timeRange: CMTimeRange.init(start: endTime, duration: durationAnimation))
else {
layerInstruction.setOpacity(0, at: endTime)


// Increase the insert time
insertTime = CMTimeAdd(insertTime, duration)
catch {
let appError = getAppError(error, message: "Failed to loadvideoTrack")

// Main video composition instruction
let mainInstruction = AVMutableVideoCompositionInstruction()
mainInstruction.timeRange = CMTimeRangeMake(kCMTimeZero, insertTime)
mainInstruction.layerInstructions = arrayLayerInstructions

// Main video composition
let mainComposition = AVMutableVideoComposition()
mainComposition.instructions = [mainInstruction]
mainComposition.frameDuration = CMTimeMake(1, 30)
mainComposition.renderSize = outputSize
let path = NSTemporaryDirectory().appending("mergedVideo.mp4")
let exportURL = URL.init(fileURLWithPath: path)

// Remove file if existed
// //4 - Get path
// let fileName = IGMediaUtil.createNewFile(fileName: "mergeVideo", fileExtension: "mp4")
// guard let docSubFolder = IGMediaUtil.createFolderInDoc(folderName: Constants.kMergedVideosFolder) else{
// return
// }
// let mergingURL = docSubFolder.appendingPathComponent(fileName)
// // Remove file if existed
// FileManager.default.removeItemIfExisted(mergingURL)

let exporter = AVAssetExportSession.init(asset: mixComposition, presetName: AVAssetExportPresetHighestQuality)
exporter?.outputURL = exportURL
exporter?.outputFileType = AVFileType.mp4
exporter?.shouldOptimizeForNetworkUse = true
exporter?.videoComposition = mainComposition

// Do export
exporter?.exportAsynchronously(completionHandler: {
DispatchQueue.main.async {
self.exportDidFinish(exporter: exporter, videoURL: exportURL)
fileprivate func exportDidFinish(exporter:AVAssetExportSession?, videoURL:URL) {
var progressValue:Float = 0
if let status = exporter?.status {
switch status{
case .exporting:
progressValue = (exporter?.progress)!

case .failed, .cancelled,.unknown:
progressValue = 1.0
let appError = self.getAppError(exporter?.error,message:"Failed to create Data")
print( "localizedDescription ::::::AVExport ********** \(exporter?.error?.localizedDescription)" ?? "No Error")
if let exportBlock = self.callback{
exportBlock(nil ,appError)

case .waiting:
case .completed:
progressValue = 1.0;
print("Exported file: \(videoURL.absoluteString)")

if let exportBlock = self.callback{
exportBlock(videoURL ,nil)

if let progressBlock = self.progressCallback{
DispatchQueue.main.async {


extension MediaAudioMergerServiceManager{
fileprivate func orientationFromTransform(transform: CGAffineTransform) -> (orientation: UIImageOrientation, isPortrait: Bool) {
var assetOrientation = UIImageOrientation.up
var isPortrait = false
if transform.a == 0 && transform.b == 1.0 && transform.c == -1.0 && transform.d == 0 {
assetOrientation = .right
isPortrait = true
} else if transform.a == 0 && transform.b == -1.0 && transform.c == 1.0 && transform.d == 0 {
assetOrientation = .left
isPortrait = true
} else if transform.a == 1.0 && transform.b == 0 && transform.c == 0 && transform.d == 1.0 {
assetOrientation = .up
} else if transform.a == -1.0 && transform.b == 0 && transform.c == 0 && transform.d == -1.0 {
assetOrientation = .down
return (assetOrientation, isPortrait)
fileprivate func videoCompositionInstructionForTrack(track: AVCompositionTrack, asset: AVAsset, standardSize:CGSize, atTime: CMTime) -> AVMutableVideoCompositionLayerInstruction {
let instruction = AVMutableVideoCompositionLayerInstruction(assetTrack: track)
let assetTrack = asset.tracks(withMediaType:[0]

let transform = assetTrack.preferredTransform
let assetInfo = orientationFromTransform(transform: transform)

var aspectFillRatio:CGFloat = 1
if assetTrack.naturalSize.height < assetTrack.naturalSize.width {
aspectFillRatio = standardSize.height / assetTrack.naturalSize.height
else {
aspectFillRatio = standardSize.width / assetTrack.naturalSize.width

if assetInfo.isPortrait {
let scaleFactor = CGAffineTransform(scaleX: aspectFillRatio, y: aspectFillRatio)

let posX = standardSize.width/2 - (assetTrack.naturalSize.height * aspectFillRatio)/2
let posY = standardSize.height/2 - (assetTrack.naturalSize.width * aspectFillRatio)/2
let moveFactor = CGAffineTransform(translationX: posX, y: posY)

instruction.setTransform(assetTrack.preferredTransform.concatenating(scaleFactor).concatenating(moveFactor), at: atTime)

} else {
let scaleFactor = CGAffineTransform(scaleX: aspectFillRatio, y: aspectFillRatio)

let posX = standardSize.width/2 - (assetTrack.naturalSize.width * aspectFillRatio)/2
let posY = standardSize.height/2 - (assetTrack.naturalSize.height * aspectFillRatio)/2
let moveFactor = CGAffineTransform(translationX: posX, y: posY)

var concat = assetTrack.preferredTransform.concatenating(scaleFactor).concatenating(moveFactor)

if assetInfo.orientation == .down {
let fixUpsideDown = CGAffineTransform(rotationAngle: CGFloat(Double.pi))
concat = fixUpsideDown.concatenating(scaleFactor).concatenating(moveFactor)

instruction.setTransform(concat, at: atTime)
return instruction

How to merge a *Single Image* with a video

Right, so I actually dealt with this problem a while ago. The issue is indeed with how you are creating the video from the picture. What you need to do is add the pixel buffer at time zero, and then AGAIN at the end, otherwise you will end up with an empty video until the very last frame, like you are experiencing.

The following code will be my best attempt to update your code. At the very end I will post my solution which is in Objective-C in case it helps anyone else.

func build(completion:() -> Void) {
guard let videoWriter = try? AVAssetWriter(URL: outputURL, fileType: AVFileTypeQuickTimeMovie) else {
fatalError("AVAssetWriter error")

// This might not be a problem for you but width HAS to be divisible by 16 or the movie will come out distorted... don't ask me why. So this is a safeguard
let pixelsToRemove: Double = fmod(image.size.width, 16)
let pixelsToAdd: Double = 16 - pixelsToRemove
let size: CGSize = CGSizeMake(image.size.width + pixelsToAdd, image.size.height)

let outputSettings = [AVVideoCodecKey : AVVideoCodecH264, AVVideoWidthKey : NSNumber(float: Float(size.width)), AVVideoHeightKey : NSNumber(float: Float(size.height))]

guard videoWriter.canApplyOutputSettings(outputSettings, forMediaType: AVMediaTypeVideo) else {
fatalError("Negative : Can't apply the Output settings...")

let videoWriterInput = AVAssetWriterInput(mediaType: AVMediaTypeVideo, outputSettings: outputSettings)
let sourcePixelBufferAttributesDictionary = [kCVPixelBufferPixelFormatTypeKey as String : NSNumber(unsignedInt: kCVPixelFormatType_32ARGB), kCVPixelBufferWidthKey as String: NSNumber(float: Float(size.width)), kCVPixelBufferHeightKey as String: NSNumber(float: Float(size.height))]
let pixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoWriterInput, sourcePixelBufferAttributes: sourcePixelBufferAttributesDictionary)

if videoWriter.canAddInput(videoWriterInput) {

if videoWriter.startWriting() {
assert(pixelBufferAdaptor.pixelBufferPool != nil)

// For simplicity, I'm going to remove the media queue you created and instead explicitly wait until I can append since i am only writing one pixel buffer at two different times

var pixelBufferCreated = true
var pixelBuffer: CVPixelBuffer? = nil
let status: CVReturn = CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pixelBufferAdaptor.pixelBufferPool!, &pixelBuffer)

if let pixelBuffer = pixelBuffer where status == 0 {
let managedPixelBuffer = pixelBuffer
CVPixelBufferLockBaseAddress(managedPixelBuffer, 0)

let data = CVPixelBufferGetBaseAddress(managedPixelBuffer)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
let context = CGBitmapContextCreate(data, Int(size.width), Int(size.height), 8, CVPixelBufferGetBytesPerRow(managedPixelBuffer), rgbColorSpace, CGImageAlphaInfo.PremultipliedFirst.rawValue)

CGContextClearRect(context, CGRectMake(0, 0, CGFloat(size.width), CGFloat(size.height)))

CGContextDrawImage(context, CGRectMake(0, 0, size.width, size.height), self.image.CGImage)

CVPixelBufferUnlockBaseAddress(managedPixelBuffer, 0)
} else {
print("Failed to allocate pixel buffer")
pixelBufferCreated = false

if (pixelBufferCreated) {
// Here is where the magic happens, we have our pixelBuffer it's time to start writing

// FIRST - add at time zero
var appendSucceeded = pixelBufferAdaptor.appendPixelBuffer(pixelBuffer, withPresentationTime: kCMTimeZero];
if (!appendSucceeded) {
// something went wrong, up to you to handle. Should probably return so the rest of the code is not executed though
// SECOND - wait until the writer is ready for more data with an empty while
while !writerInput.readyForMoreMediaData {}

// THIRD - make a CMTime with the desired length of your picture-video. I am going to arbitrarily make it 5 seconds here
let frameTime: CMTime = CMTimeMake(5, 1) // 5 seconds

// FOURTH - add the same exact pixel to the end of the video you are creating
appendSucceeded = pixelBufferAdaptor.appendPixelBuffer(pixelBuffer, withPresentationTime: frameTime];
if (!appendSucceeded) {
// something went wrong, up to you to handle. Should probably return so the rest of the code is not executed though

videoWriterInput.markAsFinished() {
videoWriter.finishWritingWithCompletionHandler { () -> Void in
if videoWriter.status != .Completed {
// Error writing the video... handle appropriately
} else {

How I managed to do this in Obj-C

Note: I had to make some edits to make this standalone, so this method will return a string holding the path the video WILL BE written to. It is returned before the video writing finishes so it may be possible to access it before it is ready if you are not careful

-(NSString *)makeMovieFromImageData:(NSData *)imageData {
NSError *error;
UIImage *image = [UIImage imageWithData:imageData];

// width has to be divisible by 16 or the movie comes out distorted... don't ask me why
double pixelsToRemove = fmod(image.size.width, 16);

double pixelsToAdd = 16 - pixelsToRemove;

CGSize size = CGSizeMake(image.size.width+pixelsToAdd, image.size.height);

BOOL hasFoundValidPath = NO;
NSURL *tempFileURL;
NSString *outputFile;

while (!hasFoundValidPath) {

NSString *guid = [[NSUUID new] UUIDString];
outputFile = [NSString stringWithFormat:@"picture_%@.mp4", guid];

NSString *outputDirectory = [NSSearchPathForDirectoriesInDomains(NSTemporaryDirectory, NSUserDomainMask, YES) objectAtIndex:0];

NSString *tempPath = [outputDirectory stringByAppendingPathComponent:outputFile];

// Will fail if destination already has a file
if ([[NSFileManager defaultManager] fileExistsAtPath:tempPath]) {
} else {
hasFoundValidPath = YES;
tempFileURL = [NSURL fileURLWithPath:tempPath];

// Start writing
AVAssetWriter *videoWriter = [[AVAssetWriter alloc] initWithURL:tempFileURL

if (error) {
// handle error

NSDictionary *videoSettings = [NSDictionary dictionaryWithObjectsAndKeys:
AVVideoCodecH264, AVVideoCodecKey,
[NSNumber numberWithInt:size.width], AVVideoWidthKey,
[NSNumber numberWithInt:size.height], AVVideoHeightKey,

AVAssetWriterInput* writerInput = [AVAssetWriterInput assetWriterInputWithMediaType:AVMediaTypeVideo

NSDictionary *bufferAttributes = [NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithInt:kCVPixelFormatType_32ARGB], kCVPixelBufferPixelFormatTypeKey, nil];

AVAssetWriterInputPixelBufferAdaptor *adaptor = [AVAssetWriterInputPixelBufferAdaptor assetWriterInputPixelBufferAdaptorWithAssetWriterInput:writerInput
if ([videoWriter canAddInput:writerInput]) {
[videoWriter addInput:writerInput];
} else {
// handle error

[videoWriter startWriting];

[videoWriter startSessionAtSourceTime:kCMTimeZero];

CGImageRef img = [image CGImage];

// Now I am going to create the bixelBuffer
NSDictionary *options = [NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithBool:YES], kCVPixelBufferCGImageCompatibilityKey,
[NSNumber numberWithBool:YES], kCVPixelBufferCGBitmapContextCompatibilityKey,
CVPixelBufferRef buffer = NULL;

CVReturn status = CVPixelBufferCreate(kCFAllocatorDefault, size.width,
size.height, kCVPixelFormatType_32ARGB, (__bridge CFDictionaryRef) options,

if ( !(status == kCVReturnSuccess && pxbuffer != NULL) ) {
NSLog(@"There be some issue. We didn't get a buffer from the image");

CVPixelBufferLockBaseAddress(buffer, 0);
void *pxdata = CVPixelBufferGetBaseAddress(buffer);

CGColorSpaceRef rgbColorSpace = CGColorSpaceCreateDeviceRGB();

CGContextRef context = CGBitmapContextCreate(pxdata, size.width,
size.height, 8, 4*size.width, rgbColorSpace,
CGContextSetRGBFillColor(context, 0, 0, 0, 0);

CGContextConcatCTM(context, CGAffineTransformIdentity);

CGContextDrawImage(context, CGRectMake(0, 0, size.width,
size.height), image);

CVPixelBufferUnlockBaseAddress(buffer, 0);

// At this point we have our buffer so we are going to start by adding to time zero

[adaptor appendPixelBuffer:buffer withPresentationTime:kCMTimeZero];

while (!writerInput.readyForMoreMediaData) {} // wait until ready

CMTime frameTime = CMTimeMake(5, 1); // 5 second frame

[adaptor appendPixelBuffer:buffer withPresentationTime:frameTime];

[writerInput markAsFinished];

[videoWriter endSessionAtSourceTime:frameTime];

[videoWriter finishWritingWithCompletionHandler:^{
if (videoWriter.status != AVAssetWriterStatusCompleted) {
// Error
}]; // end videoWriter finishWriting Block

// NOTE: the URL is actually being returned before the videoWriter finishes writing so be careful to not access it until it's ready
return outputFile;

CATextLayer doesn't appear in an AVMutableComposition when running from a unit test

Further investigation is needed, but AFAICT right now CATextLayer inside an AVMutableVideoComposition simply doesn't work from within a logic unit tests target, and this feature must be tested from a regular target.

AVAssetExportSession combine video files and freeze frame between videos

AVMutableComposition can only stitch videos together. I did it by doing two things:

  • Extracting last frame of the first video as image.
  • Making a video using this image(duration depends on your requirement).

Then you can compose these three videos (V1,V2 and your single image video). Both tasks are very easy to do.

For extracting the image out of the video, look at this link. If you don't want to use MPMoviePlayerController,which is used by accepted answer, then look at other answer by Steve.

For making video using the image check out this link. Question is about the issue of audio but I don't think you need audio. So just look at the method mentioned in question itself.

There is an easier way but it comes with a disadvantage. You can have two AVPlayer. First one plays your video which has white frames in between. Other one sits behind paused at last frame of video 1. So when the middle part comes, you will see the second AVPlayer loaded with last frame. So as a whole it will look like video 1 is paused. And trust me naked eye can't make out when player got changed. But the obvious disadvantage is that your exported video will be same with blank frames. So if you are just going to play it back in your app only, you can go with this approach.

Video merging in background iOS

Did alot RND on this issue, Didn't found solution for it.

Want to share few links hope it will help the stack community if they are in same problem[requirement].

Link1: AVExportSession to run in background

Quote related to question[copied from above Link1]

Sadly, since AVAssetExportSession uses the gpu to do some of it's
work, it cannot run in the background if you are using an

Link2: Starting AVAssetExportSession in the Background

Quote related to question[copied from above Link2]

You can start AVAssetExportSession in background. The only limitations
in AVFoundation to performing work in the background, are using
AVVideoCompositions or AVMutableVideoCompositions. AVVideoCompositions
are using the GPU, and the GPU cannot be used in the background

Url(s) for background tasks:



Stack question

How to combine video clips with different orientation using AVFoundation

This is what I do. I then use an AVAssetExportSession to create the actual file. but I warn you, the CGAffineTransforms are sometimes applied late, so you'll see a or two of the original before the video transforms. I have no clue why this happens, a different combination of videos will yield the expected result, but sometimes its off.

AVMutableComposition *composition = [AVMutableComposition composition];    
AVMutableCompositionTrack *compositionVideoTrack = [composition addMutableTrackWithMediaType:AVMediaTypeVideo preferredTrackID:kCMPersistentTrackID_Invalid];
AVMutableVideoComposition *videoComposition = [AVMutableVideoComposition videoComposition];
videoComposition.frameDuration = CMTimeMake(1,30);
videoComposition.renderScale = 1.0;

AVMutableVideoCompositionInstruction *instruction = [AVMutableVideoCompositionInstruction videoCompositionInstruction];
AVMutableVideoCompositionLayerInstruction *layerInstruction = [AVMutableVideoCompositionLayerInstruction videoCompositionLayerInstructionWithAssetTrack:compositionVideoTrack];

// Get only paths the user selected NSMutableArray *array = [NSMutableArray array]; for(NSString* string in videoPathArray){
if(![string isEqualToString:@""]){
[array addObject:string];

self.videoPathArray = array;

float time = 0;

for (int i = 0; i<self.videoPathArray.count; i++) {

AVURLAsset *sourceAsset = [AVURLAsset URLAssetWithURL:[NSURL fileURLWithPath:[videoPathArray objectAtIndex:i]] options:[NSDictionary dictionaryWithObject:[NSNumber numberWithBool:YES] forKey:AVURLAssetPreferPreciseDurationAndTimingKey]];

NSError *error = nil;

BOOL ok = NO;
AVAssetTrack *sourceVideoTrack = [[sourceAsset tracksWithMediaType:AVMediaTypeVideo] objectAtIndex:0];

CGSize temp = CGSizeApplyAffineTransform(sourceVideoTrack.naturalSize, sourceVideoTrack.preferredTransform);
CGSize size = CGSizeMake(fabsf(temp.width), fabsf(temp.height));
CGAffineTransform transform = sourceVideoTrack.preferredTransform;

videoComposition.renderSize = sourceVideoTrack.naturalSize;
if (size.width > size.height) {
[layerInstruction setTransform:transform atTime:CMTimeMakeWithSeconds(time, 30)];
} else {

float s = size.width/size.height;

CGAffineTransform new = CGAffineTransformConcat(transform, CGAffineTransformMakeScale(s,s));

float x = (size.height - size.width*s)/2;

CGAffineTransform newer = CGAffineTransformConcat(new, CGAffineTransformMakeTranslation(x, 0));

[layerInstruction setTransform:newer atTime:CMTimeMakeWithSeconds(time, 30)];

ok = [compositionVideoTrack insertTimeRange:sourceVideoTrack.timeRange ofTrack:sourceVideoTrack atTime:[composition duration] error:&error];

if (!ok) {
// Deal with the error.
NSLog(@"something went wrong");

NSLog(@"\n source asset duration is %f \n source vid track timerange is %f %f \n composition duration is %f \n composition vid track time range is %f %f",CMTimeGetSeconds([sourceAsset duration]), CMTimeGetSeconds(sourceVideoTrack.timeRange.start),CMTimeGetSeconds(sourceVideoTrack.timeRange.duration),CMTimeGetSeconds([composition duration]), CMTimeGetSeconds(compositionVideoTrack.timeRange.start),CMTimeGetSeconds(compositionVideoTrack.timeRange.duration));

time += CMTimeGetSeconds(sourceVideoTrack.timeRange.duration);

instruction.layerInstructions = [NSArray arrayWithObject:layerInstruction];
instruction.timeRange = compositionVideoTrack.timeRange;
videoComposition.instructions = [NSArray arrayWithObject:instruction];

