AWS S3 Select Query JSON or CSV File

Kapilvishwakarma
2 min readApr 10, 2021

Amazon S3 Provides capability to query the CSV or JSON file on the S3 itself. By using the S3 Select option you can query any JSON or CSV file by going to AWS Console. By following the below steps:-
1. Open the S3 Object and “Query with S3 Select”

2. Select the options and insert the SQL Query and hit “Run SQL Query”

For more information on S3 Select https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-glacier-select-sql-reference-select.html

S3 Select Using NodeJS SDK

var AWS = require("aws-sdk");var s3 = new AWS.S3();let queryObj = async(bucket, prefix, query) => {  const params = {    Bucket: bucket,    Key: prefix,    ExpressionType: 'SQL',    Expression: query,    InputSerialization: {'JSON': { Type : 'Document'}},    OutputSerialization: {'JSON': { RecordDelimiter: ',' }}  };  const res = await s3.selectObjectContent(params).promise();  return res;};let listFiles = async(bucketName, prefix) => {  const files = await s3.listObjectsV2({    Bucket: bucketName,    Prefix: prefix    }).promise();   return files;};const getRecord = async ( payload )  => {  return new Promise ((resolve, reject) => {    // This is a stream of events    payload.on('data', (event) => {      if (event.Records) {       resolve(Buffer.from(event.Records.Payload).toString('utf-8'));      }else {       resolve(null);      }    });   });};let handler = async() => {  const query = 'SELECT * FROM s3object s where s.name=\'kapil9\'';  const bucket = 'new-bucket-test-1009';  const prefix = 'analysis';  const res = await listFiles(bucket, prefix);  console.log("Total Files: " + res.KeyCount);  // Sorting based on the LastModified (Assending Order)  const sortedFileList = res.Contents.sort((a, b) => a.LastModified - b.LastModified);  // iterating over each files  for (let index = 1; index < sortedFileList.length; index++) {    const file = sortedFileList[index];    console.log(">>>>> " + index + ". File Name " + file.Key);    // Query using S3 Select    const result = await queryObj(bucket, file.Key, query);   // Transforming record from the Payload   let record = await getRecord(result.Payload);   if (record != null) {     console.log(record);     // break;    } else {     console.log("Not Found")    }  }};handler();

--

--