A lightweight and efficient ETL Engine in TypeScript, suitable for various operations.
- π Read from various data sources (CSV, TSV, JSON, NDJSON, FixedWidth, etc.)
- βοΈ Write to multiple formats (JSON, NDJSON, CSV, TSV, FixedWidth, SQL, Console, etc.)
- βοΈ Filter and transform data with powerful field filters
- π Supports complex filtering expressions
- π Chainable nd high performance operations for flexible data processing
- π Supports data validation and transformation
- π Ideal for data cleaning, migration, and analysis
- π§© Modular design for easy integration into existing projects
- π§ͺ Easy to use with TypeScript/JavaScript/Browser
- π Secure and reliable with TypeScript's type safety
- π§ Easy to install and get started (with examples)
Install from npm:
npm install @pujansrt/data-genieOr, with yarn:
yarn add @pujansrt/data-genieDevelopment install (clone & build)
git clone https://github.com/pujansrt/data-genie.git
cd data-genie
npm install
npm run buildimport { ConsoleWriter, CSVReader, Job, SetCalculatedField, TransformingReader, RemoveDuplicatesReader, RemoveFields } from '@pujansrt/data-genie';
async function runExample() {
let reader: any = new CSVReader('input/credit-balance-01.csv').setFieldNamesInFirstRow(true);
reader = new RemoveDuplicatesReader(reader, 'Rating', 'CreditLimit');
reader = new TransformingReader(reader)
.add(new SetCalculatedField('AvailableCredit', 'parseFloat(record.CreditLimit) - parseFloat(record.Balance)').transform())
.add(new RemoveFields('CreditLimit', 'Balance').transform());
await Job.run(reader, new ConsoleWriter());
// await Job.run(filteringReader, new JsonWriter('output/filtered-data.json'));
// await Job.run(filteringReader, new CsvWriter('output/filtered-data.csv'));
// await Job.run(filteringReader, new FixedWidthWriter('output/filtered-data.fw').setFieldNamesInFirstRow(true).setFieldWidths(10, 15, 10, 15));
}
runExample().catch(console.error);const fwWriter = new FixedWidthWriter('output/ex-simulated.fw').setFieldNamesInFirstRow(true).setFieldWidths(10, 15, 10, 15);
await Job.run(reader, fwWriter);import { ConsoleWriter, CSVReader, FieldFilter, FilterExpression, FilteringReader, IsNotNull, IsType, Job, PatternMatch, ValueMatch } from "@pujansrt/data-genie";
async function runExample() {
const reader = new CSVReader('input/example.csv').setFieldNamesInFirstRow(true);
const filteringReader = new FilteringReader(reader)
.add(new FieldFilter('Rating').addRule(IsNotNull()).addRule(IsType('string')).addRule(ValueMatch('B', 'C')).createRecordFilter())
.add(new FieldFilter('Account').addRule(IsNotNull()).addRule(IsType('string')).addRule(PatternMatch('[0-9]*')).createRecordFilter())
.add(
new FilterExpression(
'record.CreditLimit !== undefined && record.Balance !== undefined && parseFloat(record.CreditLimit) >= 0 && parseFloat(record.CreditLimit) <= 5000 && parseFloat(record.Balance) <= parseFloat(record.CreditLimit)'
).createRecordFilter()
);
await Job.run(filteringReader, new ConsoleWriter());
}
runExample().catch(console.error);import {ConsoleWriter, Job, JsonReader, SetCalculatedField, TransformingReader} from "@pujansrt/data-genie";
async function runExample() {
let reader: any = new JsonReader('input/simple-json-input.json');
reader = new TransformingReader(reader)
.setCondition((record) => record.balance < 0)
.add(new SetCalculatedField('balance', '0.0').transform()); // Using SetCalculatedField for dynamic value
await Job.run(reader, new ConsoleWriter());
}
runExample().catch(console.error);import {ConsoleWriter, FixedWidthReader, Job} from "@pujansrt/data-genie";
async function runExample() {
let reader: any = new FixedWidthReader('input/credit-balance-01.fw');
reader.setFieldWidths(8, 16, 16, 12, 14, 16, 7);
reader.setFieldNamesInFirstRow(true);
await Job.run(reader, new ConsoleWriter());
}
runExample().catch(console.error);import {ConsoleWriter, CSVReader, Job, RemoveDuplicatesReader, RemoveFields, SetCalculatedField, TransformingReader} from "@pujansrt/data-genie";
async function runExample() {
let reader: any = new CSVReader('input/credit-balance-01.csv').setFieldNamesInFirstRow(true);
reader = new RemoveDuplicatesReader(reader, 'Rating', 'CreditLimit');
reader = new TransformingReader(reader)
.add(new SetCalculatedField('AvailableCredit', 'parseFloat(record.CreditLimit) - parseFloat(record.Balance)').transform())
.add(new RemoveFields('CreditLimit', 'Balance').transform());
await Job.run(reader, new ConsoleWriter());
}
runExample().catch(console.error);- Support for Apache Avro
- Support for Apache Parquet
- π Enhanced data validation rules
- Data cleaning and transformation
- Data validation and filtering
- Data migration and ETL processes
- Data analysis and reporting
- Data integration from multiple sources
Contributions are welcome! Please open an issue or submit a pull request.
MIT License β free for personal and commercial use.
Developed and maintained by Pujan Srivastava, a mathematician and software engineer with 18+ years of programming experience.
