AWS S3 Select Query JSON or CSV File

Kapilvishwakarma
2 min readApr 10, 2021

Amazon S3 Provides capability to query the CSV or JSON file on the S3 itself. By using the S3 Select option you can query any JSON or CSV file by going to AWS Console. By following the below steps:-
1. Open the S3 Object and “Query with S3 Select”

2. Select the options and insert the SQL Query and hit “Run SQL Query”

For more information on S3 Select https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-glacier-select-sql-reference-select.html

S3 Select Using NodeJS SDK

var AWS = require("aws-sdk");var s3 = new AWS.S3();let queryObj = async(bucket, prefix, query) => {  const params = {    Bucket: bucket,    Key: prefix,    ExpressionType: 'SQL',    Expression: query,    InputSerialization: {'JSON': { Type : 'Document'}},    OutputSerialization: {'JSON': { RecordDelimiter: ',' }}  };  const res = await s3.selectObjectContent(params).promise();  return res;};let listFiles = async(bucketName, prefix) => {  const files = await s3.listObjectsV2({    Bucket: bucketName,    Prefix: prefix    }).promise();   return files;};const getRecord = async ( payload )  => {  return new Promise ((resolve, reject) => {    // This is a stream of events    payload.on('data', (event) => {      if (event.Records) {       resolve(Buffer.from(event.Records.Payload).toString('utf-8'));      }else {       resolve(null);      }    });   });};let handler = async() => {  const query = 'SELECT * FROM s3object s where s.name=\'kapil9\'';  const bucket = 'new-bucket-test-1009';  const prefix = 'analysis';  const res = await listFiles(bucket, prefix);  console.log("Total Files: " + res.KeyCount);  // Sorting based on the LastModified (Assending Order)  const sortedFileList = res.Contents.sort((a, b) => a.LastModified - b.LastModified);  // iterating over each files  for (let index = 1; index < sortedFileList.length; index++) {    const file = sortedFileList[index];    console.log(">>>>> " + index + ". File Name " + file.Key);    // Query using S3 Select    const result = await queryObj(bucket, file.Key, query);   // Transforming record from the Payload   let record = await getRecord(result.Payload);   if (record != null) {     console.log(record);     // break;    } else {     console.log("Not Found")    }  }};handler();

Sign up to discover human stories that deepen your understanding of the world.

Free

Distraction-free reading. No ads.

Organize your knowledge with lists and highlights.

Tell your story. Find your audience.

Membership

Read member-only stories

Support writers you read most

Earn money for your writing

Listen to audio narrations

Read offline with the Medium app

No responses yet

Write a response