[{"id":"test-010","handle":"APIBuilder","repo_url":"https://github.com/apibuilder/fastapi-data-api","demo_url":"https://api.datadivas.dev","summary":"Built FastAPI service exposing data warehouse through REST API with authentication, rate limiting, and caching","broke_fix":"API was getting hammered and slowing down. Added Redis caching for expensive queries and implemented rate limiting with API keys.","validation":"Serving 1000+ requests/day with 50ms average response time. 10+ applications consuming the API.","resume_bullets":"• Developed REST API with FastAPI serving data to 10+ applications with sub-100ms response times\\n• Implemented caching and rate limiting reducing database load by 75%","points":20,"featured":0,"created_at":"2026-03-12 05:09:37","user_id":null},{"id":"test-009","handle":"CloudArchitect","repo_url":"https://github.com/cloudarchitect/terraform-data-stack","demo_url":null,"summary":"Infrastructure as Code for complete data platform on AWS: S3, Glue, Redshift, Airflow using Terraform","broke_fix":"Terraform state conflicts when multiple people ran deployments. Implemented remote state with S3 + DynamoDB locking.","validation":"Deployed to 3 environments (dev, staging, prod). Infrastructure changes now tracked in Git with proper reviews.","resume_bullets":"• Automated cloud infrastructure deployment using Terraform reducing setup time from days to hours\\n• Implemented IaC best practices with remote state management and modular design","points":20,"featured":1,"created_at":"2026-03-12 04:09:37","user_id":null},{"id":"test-008","handle":"TechDiva","repo_url":"https://github.com/techdiva/great-expectations-suite","demo_url":null,"summary":"Implemented comprehensive data quality framework with Great Expectations covering 200+ validation rules","broke_fix":"Tests were too strict and failing on edge cases. Worked with stakeholders to define acceptable ranges and added business rule validations.","validation":"Caught 15 data quality issues in first month. Zero bad data incidents since implementation.","resume_bullets":"• Established data quality framework with Great Expectations implementing 200+ validation rules\\n• Prevented data quality incidents reducing downstream errors by 100%","points":15,"featured":0,"created_at":"2026-03-12 03:09:37","user_id":null},{"id":"test-007","handle":"SparkMaster","repo_url":"https://github.com/sparkmaster/pyspark-etl","demo_url":null,"summary":"Scaled ETL job from single-node Python to distributed PySpark processing 50GB+ daily datasets","broke_fix":"Initial PySpark job was slower than Python! Issue was too many small partitions. Repartitioned data properly and tuned executor memory.","validation":"Processing time reduced from 6 hours to 45 minutes. Handles 3x data growth without changes.","resume_bullets":"• Migrated ETL pipeline to PySpark achieving 8x performance improvement on large datasets\\n• Optimized Spark job configuration reducing cost by 60% through efficient resource utilization","points":20,"featured":1,"created_at":"2026-03-12 00:09:37","user_id":null},{"id":"test-006","handle":"DataViz","repo_url":"https://github.com/dataviz/tableau-pipeline","demo_url":"https://public.tableau.com/profile/dataviz","summary":"Built automated data pipeline feeding Tableau dashboards with dbt transformations and Prefect orchestration","broke_fix":"Tableau extracts were stale by several hours. Implemented webhook triggers from dbt to refresh Tableau extracts automatically after successful runs.","validation":"Dashboards now update within 10 minutes of source data changes. 25+ stakeholders using daily.","resume_bullets":"• Automated BI pipeline with dbt and Prefect reducing dashboard refresh time by 80%\\n• Built 15+ Tableau dashboards serving 25+ stakeholders with near real-time data","points":15,"featured":0,"created_at":"2026-03-11 18:09:37","user_id":null},{"id":"test-005","handle":"DockerNinja","repo_url":"https://github.com/dockerninja/warehouse-in-a-box","demo_url":"https://dockerninja.dev/demo","summary":"Containerized local data warehouse with Postgres, dbt, Metabase, and Airflow - complete dev environment in Docker Compose","broke_fix":"Containers kept running out of memory. Set proper resource limits, optimized Postgres config, and added volume persistence for data.","validation":"One-command setup working on Mac, Linux, and Windows. 10+ team members using for local development.","resume_bullets":"• Created containerized data stack with Docker Compose enabling one-command environment setup\\n• Reduced local development setup time from 3 hours to 5 minutes","points":15,"featured":0,"created_at":"2026-03-11 06:09:37","user_id":null},{"id":"test-004","handle":"SQLQueen","repo_url":"https://github.com/sqlqueen/postgres-warehouse","demo_url":null,"summary":"Designed dimensional data warehouse with 15 fact tables and 30 dimension tables for e-commerce analytics","broke_fix":"Query performance was terrible on fact tables. Added composite indexes, partitioning by date, and implemented materialized views for common aggregations.","validation":"Dashboard queries went from 45 seconds to under 2 seconds. Successfully serving 50+ concurrent users.","resume_bullets":"• Designed star schema data warehouse with 15 fact and 30 dimension tables supporting analytics\\n• Optimized query performance achieving 95% improvement through indexing and partitioning","points":15,"featured":0,"created_at":"2026-03-10 06:09:37","user_id":null},{"id":"test-003","handle":"PipelinePro","repo_url":"https://github.com/pipelinepro/streaming-kafka","demo_url":"https://demo.pipelinepro.dev","summary":"Real-time streaming data pipeline with Kafka, PySpark, and PostgreSQL processing IoT sensor data","broke_fix":"Kafka consumer lag kept growing. Issue was inefficient batch processing. Implemented micro-batching with Spark Structured Streaming and tuned partition count.","validation":"Processing 10K events/sec with <100ms latency, zero data loss during 72-hour stress test","resume_bullets":"• Built real-time streaming pipeline with Kafka and PySpark processing 10K events/second\\n• Achieved <100ms latency for IoT data ingestion and processing","points":20,"featured":1,"created_at":"2026-03-09 06:09:37","user_id":null},{"id":"test-002","handle":"CodeWarrior","repo_url":"https://github.com/codewarrior/dbt-transform","demo_url":null,"summary":"Migrated legacy SQL transforms to dbt with 50+ models, documentation, and automated testing","broke_fix":"dbt tests were taking 2+ hours to run. Optimized by creating test macros and parallelizing test execution. Reduced runtime to 15 minutes.","validation":"All 50 models tested and documented, integrated with CI/CD pipeline, deployed to production serving 20+ analysts","resume_bullets":"• Migrated 50+ SQL transformations to dbt framework improving code maintainability and testing\\n• Reduced data transformation time by 60% through query optimization and incremental models","points":20,"featured":1,"created_at":"2026-03-08 06:09:37","user_id":null},{"id":"test-001","handle":"DataDiva93","repo_url":"https://github.com/DataDiva93/airflow-pipeline","demo_url":"https://demo.datadivas.org/airflow","summary":"Built production Airflow DAG for ETL pipeline processing 1M+ records daily from Postgres to Snowflake with Great Expectations quality gates","broke_fix":"Airflow scheduler kept failing due to zombie processes. Fixed by implementing proper task cleanup and health checks. Added monitoring with Grafana.","validation":"Successfully processed 30-day backfill, all quality checks passed, pipeline runs daily at 2AM UTC with 99.9% success rate","resume_bullets":"• Engineered ETL pipeline using Apache Airflow processing 1M+ daily records with 99.9% reliability\\n• Implemented data quality validation with Great Expectations reducing data errors by 85%","points":25,"featured":1,"created_at":"2026-03-07 06:09:37","user_id":null}]