Here is a guide on how to perform a multipart upload of a file to AWS S3

This tutorial will guide you through using the AWS SDK to perform multipart uploads of large files in Node.js. While the tutorial focuses on Node.js, you can apply the same principles to perform multipart uploads in JavaScript on the frontend.

 Prerequisite 
 - AWS SDK 
 - Nodejs

Step 1: Setup the project
    Before we begin, let's make sure you have a Node.js project set up. If you don't already have a Node.js project, you can create one by running the npm init command and following the prompts.
npm init

Step 2: Install the package
    To get started, you'll need to install the AWS SDK. You can do this by running the following command in your terminal:
npm install aws-sdk
This will install the necessary package for interacting with AWS services in your project."

Before we dive into the details of performing a multipart upload, let's discuss the overall approach and the three methods we'll be using to upload the file in chunks

-  Create Multipart 
 - Upload Parts 
-  Complete Part 


Step 1: Create multipart method
    The first step in performing a multipart upload is to initiate the upload process and obtain an upload ID, which will be used for all subsequent parts of the upload. This can be done using the createMultipartUpload method, which also allows you to provide additional file or metadata information for the object being uploaded. Once you obtain the upload ID from this step, save it in a variable called uploadId for later use.

Step 2: Slice you file and start uploading with uploadPart method

     To perform a multipart upload, we need to read the file in chunks using the createReadStream method of the fs module. It's important to note that S3 has a minimum chunk size requirement of 5 MB (5242999 bytes), except for the final chunk which can be smaller. Therefore, we'll need to ensure that each chunk is at least 5 MB in size.

Once we have our file chunks, we can start uploading them using the uploadPart method. Each uploaded chunk will return an ETag value, which we'll need to keep track of along with its corresponding part number. We can store this information in an array for later use.

After uploading all the chunks, we can call the completeMultipartUpload method to finalize the upload process. Please refer to the code for more details and clarity



Step 3: Complete multi-part once all parts  uploaded.
    After all the chunks have been uploaded, we can call the completeMultipartUpload method to finalize the upload process. This method will return an object containing information about the uploaded file.

Lets start writing code.
     1. Read File in chunks (5mb).
     2. Upload File's chunks

The readFileChunksAndUpload method is the entry point for our multipart upload code. It first calls the createMultipartUpload method to obtain an upload ID, and then reads the file in chunks using a createReadStream method. Once the first chunk of around 5 MB is read, the stream is paused to avoid overloading the network with too many requests. The remaining chunks are uploaded without pausing and resuming the stream.

We use the FILE_NAME variable as the key for our S3 object. It's worth noting that the s3Client method provides the S3 object service with advanced features. For example, if you're using a local S3 instance, it will behave like a local service, while on production it will use the S3 web service


    You can also check this how to use s3 locally to development.

/**
 * this will create s3 object you can use this s3 service with local s3 also
 */
function s3Client() {
    // if code in local development the use local s3 
    let option = {
        region: 'us-east-1',
        apiVersion: '2006-03-01'
    }
    if (process.env.DEV === 'true') {
        option.region = 'eu-west-1'
        option['endpoint'] = 'http://localhost:5002'
        option['s3ForcePathStyle'] = true
    }
    else {
        option['credentials'] = {
            accessKeyId: 'Your aws access key',
            secretAccessKey: 'Your aws secret key'
        }
    }
    return new AWS.S3(option);
}

// -- index.js---
var AWS = require('aws-sdk');
var fs = require('fs');
var s3 = s3Client();
//var s3 = new AWS.S3();

var uploadId; // use to hold the value of upload id of create part method
var MIME_TYPE 'video/mp4' //mime type depend upon file type
//this is used to hold the meta info that is used to concate the file in s3 side
var multipartMap = {
    Parts: []
};
var partNumber = 0; // part number to maintain the sequence of file
const FILE_NAME = 'avengers-endgame-4k-4gb.mp4'

function readFileChunksAndUpload() {
    uploadId = await createMultipart(FILE_NAMEMIME_TYPE );
    let stream = fs.createReadStream(FILE_NAME, { highWaterMark: 5000 * 1024 })
    stream.on('data', (chunk=> {
        partNumber++;
        stream.pause(); //pause until upload part complete you can use async also without await
        let partRes = await uploadParts(chunkFILE_NAMEpartNumberuploadId)
        multipartMap.Parts[partNumber] = partRes;
        stream.resume();

    }).on('end'function () {
        console.log("done");
        let resp = await completePart(FILE_NAMEmultipartMapuploadId);
        console.log(resp);
    });
}

 //CreateMultipartUpload
function createMultipart(fileNametype) {
    return new Promise((resrej=> {
        try {
            var params = {
                Bucket: BUCKET, //bucket name
                Key: fileName,
                ContentType: type
            };
            s3.createMultipartUpload(params, (errdata=> {
                if (err) { 
                    console.log("createMultipart"err);
                    throw err;
                 }
                else {
                    res(data.UploadId)
                }
            })
        } catch (error) {
            rej(err);
        }
    })
}

//UploadPart
function uploadParts(fileBufferfilenamepartNumberuploadId) {
    return new Promise((resrej=> {
        try {
            var params = {
                Body: fileBuffer,
                Bucket: BUCKET,
                Key: filename,
                PartNumber: partNumber,
                UploadId: uploadId
            };
            s3.uploadPart(params, (errdata=> {
                if (err) {
                    console.log("uploadParts"err);
                    throw err;
                }
                else {
                    res({
                        ETag: data.ETag,
                        PartNumber: partNumber
                    });
                }
            });
        } catch (error) {
              rej(error)
        }
    })
}
//CompleteMultipartUpload
function completePart(keymultipartMapuploadId) {
    return new Promise((resrej=> {
        try {
            var params = {
                Bucket: BUCKET,
                Key: key,
                MultipartUpload: multipartMap,
                UploadId: uploadId
            };
            s3.completeMultipartUpload(params, (errdata=> {
                if (err) {
                    console.log("completePart"err)
                    throw err;
                }
                else {
                    res(data);
                }
            });
        } catch (error) {
            rej(error)
        }

    })
}
These are the code snippet copy it in your index file and start the executing it using node index.js




Comments