AWS S3 Select Query JSON or CSV File
2 min readApr 10, 2021
Amazon S3 Provides capability to query the CSV or JSON file on the S3 itself. By using the S3 Select option you can query any JSON or CSV file by going to AWS Console. By following the below steps:-
1. Open the S3 Object and “Query with S3 Select”
2. Select the options and insert the SQL Query and hit “Run SQL Query”
For more information on S3 Select https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-glacier-select-sql-reference-select.html
S3 Select Using NodeJS SDK
var AWS = require("aws-sdk");var s3 = new AWS.S3();let queryObj = async(bucket, prefix, query) => { const params = { Bucket: bucket, Key: prefix, ExpressionType: 'SQL', Expression: query, InputSerialization: {'JSON': { Type : 'Document'}}, OutputSerialization: {'JSON': { RecordDelimiter: ',' }} }; const res = await s3.selectObjectContent(params).promise(); return res;};let listFiles = async(bucketName, prefix) => { const files = await s3.listObjectsV2({ Bucket: bucketName, Prefix: prefix }).promise(); return files;};const getRecord = async ( payload ) => { return new Promise ((resolve, reject) => { // This is a stream of events payload.on('data', (event) => { if (event.Records) { resolve(Buffer.from(event.Records.Payload).toString('utf-8')); }else { resolve(null); } }); });};let handler = async() => { const query = 'SELECT * FROM s3object s where s.name=\'kapil9\''; const bucket = 'new-bucket-test-1009'; const prefix = 'analysis'; const res = await listFiles(bucket, prefix); console.log("Total Files: " + res.KeyCount); // Sorting based on the LastModified (Assending Order) const sortedFileList = res.Contents.sort((a, b) => a.LastModified - b.LastModified); // iterating over each files for (let index = 1; index < sortedFileList.length; index++) { const file = sortedFileList[index]; console.log(">>>>> " + index + ". File Name " + file.Key); // Query using S3 Select const result = await queryObj(bucket, file.Key, query); // Transforming record from the Payload let record = await getRecord(result.Payload); if (record != null) { console.log(record); // break; } else { console.log("Not Found") } }};handler();