{"payload":{"pageCount":2,"repositories":[{"type":"Public","name":"TinyChatEngine","owner":"mit-han-lab","isFork":false,"description":"TinyChatEngine: On-Device LLM Inference Library","topicNames":["c","arm","deep-learning","cpp","x86-64","quantization","edge-computing","cuda-programming","on-device-ai","large-language-models"],"topicsNotShown":0,"allTopics":["c","arm","deep-learning","cpp","x86-64","quantization","edge-computing","cuda-programming","on-device-ai","large-language-models"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":2,"issueCount":23,"starsCount":559,"forksCount":52,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-16T15:36:09.014Z"}},{"type":"Public","name":"qserve","owner":"mit-han-lab","isFork":false,"description":"QServe: W4A8KV4 Quantization and System Co-design for Efficient LLM Serving","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":6,"starsCount":209,"forksCount":4,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-14T16:49:05.246Z"}},{"type":"Public","name":"llm-awq","owner":"mit-han-lab","isFork":false,"description":"[MLSys 2024 Best Paper Award] AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":98,"starsCount":1893,"forksCount":134,"license":"MIT License","participation":[0,0,11,0,0,8,7,4,0,19,6,1,3,2,2,5,1,1,1,3,0,0,2,11,5,0,0,5,0,0,0,0,0,0,0,0,0,0,0,2,8,2,2,0,0,1,0,0,0,2,2,1],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-13T15:58:52.192Z"}},{"type":"Public","name":"lmquant","owner":"mit-han-lab","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":50,"forksCount":0,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-11T17:26:11.936Z"}},{"type":"Public","name":"efficientvit","owner":"mit-han-lab","isFork":false,"description":"EfficientViT is a new family of vision models for efficient high-resolution vision.","topicNames":["imagenet","segmentation","high-resolution","vision-transformer","efficientvit","segment-anything"],"topicsNotShown":0,"allTopics":["imagenet","segmentation","high-resolution","vision-transformer","efficientvit","segment-anything"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":67,"starsCount":1451,"forksCount":126,"license":"Apache License 2.0","participation":[0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,10,3,11,2,0,0,1,2,0,0,2,0,0,0,0,1,0,0,1,0,0,1,7,2,0,2,7,4,1,2,3,0,1,2,4,2],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-10T15:12:48.753Z"}},{"type":"Public","name":"distrifuser","owner":"mit-han-lab","isFork":false,"description":"[CVPR 2024 Highlight] DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models","topicNames":["acceleration","parallelism","generative-model","diffusion-models","generative-ai"],"topicsNotShown":0,"allTopics":["acceleration","parallelism","generative-model","diffusion-models","generative-ai"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":447,"forksCount":11,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,9,0,0,0,0,2,0,3,2,2,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-05T03:22:43.304Z"}},{"type":"Public","name":"spatten-llm","owner":"mit-han-lab","isFork":false,"description":"[HPCA'21] SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning","topicNames":["rtl","attention","hardware-acceleration","spinalhdl","llm-inference"],"topicsNotShown":0,"allTopics":["rtl","attention","hardware-acceleration","spinalhdl","llm-inference"],"primaryLanguage":{"name":"Scala","color":"#c22d40"},"pullRequestCount":0,"issueCount":1,"starsCount":48,"forksCount":3,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-03T21:28:22.876Z"}},{"type":"Public","name":"torchquantum","owner":"mit-han-lab","isFork":false,"description":"A PyTorch-based framework for Quantum Classical Simulation, Quantum Machine Learning, Quantum Neural Networks, Parameterized Quantum Circuits with support for easy deployments on real quantum computers.","topicNames":["machine-learning","system","deep-learning","neural-network","quantum","pytorch","quantum-computing","quantum-machine-learning","quantum-simulation","ml-for-systems"],"topicsNotShown":3,"allTopics":["machine-learning","system","deep-learning","neural-network","quantum","pytorch","quantum-computing","quantum-machine-learning","quantum-simulation","ml-for-systems","pytorch-quantum","quantum-neural-network","parameterized-quantum-circuit"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":4,"issueCount":58,"starsCount":1200,"forksCount":169,"license":"MIT License","participation":[1,24,36,74,6,0,2,0,6,0,11,5,0,11,20,22,1,5,0,0,1,12,6,1,9,3,4,21,9,2,0,13,2,0,2,14,6,1,2,24,0,0,0,0,2,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-30T02:01:11.471Z"}},{"type":"Public","name":"smoothquant","owner":"mit-han-lab","isFork":false,"description":"[ICML 2023] SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":53,"starsCount":1042,"forksCount":116,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,4,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,1,5,1,0,0,1,1,0,0,0,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-28T17:17:11.771Z"}},{"type":"Public","name":"sparsevit","owner":"mit-han-lab","isFork":false,"description":"[CVPR'23] SparseViT: Revisiting Activation Sparsity for Efficient High-Resolution Vision Transformer","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":54,"forksCount":2,"license":"Apache License 2.0","participation":[0,0,4,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-24T01:30:59.362Z"}},{"type":"Public","name":"patch_conv","owner":"mit-han-lab","isFork":false,"description":"Patch convolution to avoid large GPU memory usage of Conv2D","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":54,"forksCount":3,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-03T19:53:18.091Z"}},{"type":"Public","name":"mcunet","owner":"mit-han-lab","isFork":false,"description":"[NeurIPS 2020] MCUNet: Tiny Deep Learning on IoT Devices; [NeurIPS 2021] MCUNetV2: Memory-Efficient Patch-based Inference for Tiny Deep Learning","topicNames":["deep-learning","pytorch","neural-architecture-search","tinyml","microncontroller"],"topicsNotShown":0,"allTopics":["deep-learning","pytorch","neural-architecture-search","tinyml","microncontroller"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":20,"starsCount":406,"forksCount":77,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-29T18:17:37.080Z"}},{"type":"Public","name":"tinyengine","owner":"mit-han-lab","isFork":false,"description":"[NeurIPS 2020] MCUNet: Tiny Deep Learning on IoT Devices; [NeurIPS 2021] MCUNetV2: Memory-Efficient Patch-based Inference for Tiny Deep Learning; [NeurIPS 2022] MCUNetV3: On-Device Training Under 256KB Memory","topicNames":["c","microcontroller","cpp","pytorch","codegenerator","tinyml","deep-learning","quantization","edge-computing","neural-architecture-search"],"topicsNotShown":0,"allTopics":["c","microcontroller","cpp","pytorch","codegenerator","tinyml","deep-learning","quantization","edge-computing","neural-architecture-search"],"primaryLanguage":{"name":"C","color":"#555555"},"pullRequestCount":1,"issueCount":31,"starsCount":747,"forksCount":126,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-29T18:16:51.627Z"}},{"type":"Public","name":"tiny-training","owner":"mit-han-lab","isFork":false,"description":"On-Device Training Under 256KB Memory [NeurIPS'22]","topicNames":["edge-ai","on-device-training","learning-on-the-edge"],"topicsNotShown":0,"allTopics":["edge-ai","on-device-training","learning-on-the-edge"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":7,"starsCount":402,"forksCount":55,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-29T18:15:19.456Z"}},{"type":"Public","name":"bevfusion","owner":"mit-han-lab","isFork":false,"description":"[ICRA'23] BEVFusion: Multi-Task Multi-Sensor Fusion with Unified Bird's-Eye View Representation","topicNames":["camera","pytorch","lidar","object-detection","sensor-fusion","semantic-segmentation","3d-perception"],"topicsNotShown":0,"allTopics":["camera","pytorch","lidar","object-detection","sensor-fusion","semantic-segmentation","3d-perception"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":4,"issueCount":48,"starsCount":2045,"forksCount":371,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-29T00:41:58.791Z"}},{"type":"Public","name":"torchsparse","owner":"mit-han-lab","isFork":false,"description":"[MICRO'23, MLSys'22] TorchSparse: Efficient Training and Inference Framework for Sparse Convolution on GPUs.","topicNames":["acceleration","pytorch"],"topicsNotShown":0,"allTopics":["acceleration","pytorch"],"primaryLanguage":{"name":"Cuda","color":"#3A4E3A"},"pullRequestCount":1,"issueCount":20,"starsCount":1122,"forksCount":125,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-23T17:59:14.161Z"}},{"type":"Public","name":"streaming-llm","owner":"mit-han-lab","isFork":false,"description":"[ICLR 2024] Efficient Streaming Language Models with Attention Sinks","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":34,"starsCount":6249,"forksCount":353,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-20T05:46:26.421Z"}},{"type":"Public","name":"tinychat-tutorial","owner":"mit-han-lab","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":1,"issueCount":3,"starsCount":30,"forksCount":9,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-01T23:32:08.814Z"}},{"type":"Public","name":"once-for-all","owner":"mit-han-lab","isFork":false,"description":"[ICLR 2020] Once for All: Train One Network and Specialize it for Efficient Deployment","topicNames":["acceleration","nas","automl","edge-ai","efficient-model","tinyml"],"topicsNotShown":0,"allTopics":["acceleration","nas","automl","edge-ai","efficient-model","tinyml"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":53,"starsCount":1842,"forksCount":332,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-14T04:44:52.937Z"}},{"type":"Public","name":"fastcomposer","owner":"mit-han-lab","isFork":false,"description":"FastComposer: Tuning-Free Multi-Subject Image Generation with Localized Attention","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":13,"starsCount":599,"forksCount":34,"license":"MIT License","participation":[10,0,3,12,0,2,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-09T03:14:15.846Z"}},{"type":"Public","name":"tinyml","owner":"mit-han-lab","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":6,"starsCount":711,"forksCount":129,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-29T04:22:13.211Z"}},{"type":"Public","name":"offsite-tuning","owner":"mit-han-lab","isFork":false,"description":"Offsite-Tuning: Transfer Learning without Full Model","topicNames":["deep-learning","transfer-learning"],"topicsNotShown":0,"allTopics":["deep-learning","transfer-learning"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":5,"starsCount":360,"forksCount":36,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-27T17:39:38.113Z"}},{"type":"Public","name":"amc","owner":"mit-han-lab","isFork":false,"description":"[ECCV 2018] AMC: AutoML for Model Compression and Acceleration on Mobile Devices","topicNames":["automl","model-compression","channel-pruning","automl-for-compression","efficient-model","on-device-ai"],"topicsNotShown":0,"allTopics":["automl","model-compression","channel-pruning","automl-for-compression","efficient-model","on-device-ai"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":16,"starsCount":418,"forksCount":108,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-22T23:33:25.504Z"}},{"type":"Public","name":"data-efficient-gans","owner":"mit-han-lab","isFork":false,"description":"[NeurIPS 2020] Differentiable Augmentation for Data-Efficient GAN Training","topicNames":["tensorflow","generative-adversarial-network","image-generation","gans","data-efficient","neurips-2020","pytorch"],"topicsNotShown":0,"allTopics":["tensorflow","generative-adversarial-network","image-generation","gans","data-efficient","neurips-2020","pytorch"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":26,"starsCount":1260,"forksCount":175,"license":"BSD 2-Clause \"Simplified\" License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-12T00:16:36.633Z"}},{"type":"Public","name":"flatformer","owner":"mit-han-lab","isFork":false,"description":"[CVPR'23] FlatFormer: Flattened Window Attention for Efficient Point Cloud Transformer","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":107,"forksCount":12,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-27T04:02:56.896Z"}},{"type":"Public","name":"litepose","owner":"mit-han-lab","isFork":false,"description":"[CVPR'22] Lite Pose: Efficient Architecture Design for 2D Human Pose Estimation","topicNames":["pose-estimation","efficient-models","litepose"],"topicsNotShown":0,"allTopics":["pose-estimation","efficient-models","litepose"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":18,"starsCount":294,"forksCount":35,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-04T17:00:47.770Z"}},{"type":"Public","name":"anycost-gan","owner":"mit-han-lab","isFork":false,"description":"[CVPR 2021] Anycost GANs for Interactive Image Synthesis and Editing","topicNames":["computer-vision","deep-learning","computer-graphics","pytorch","image-editing","generative-adversarial-network","gan","image-manipulation","image-generation","gans"],"topicsNotShown":1,"allTopics":["computer-vision","deep-learning","computer-graphics","pytorch","image-editing","generative-adversarial-network","gan","image-manipulation","image-generation","gans","stylegan2"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":6,"starsCount":769,"forksCount":95,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-03T15:20:42.622Z"}},{"type":"Public","name":"temporal-shift-module","owner":"mit-han-lab","isFork":false,"description":"[ICCV 2019] TSM: Temporal Shift Module for Efficient Video Understanding","topicNames":["acceleration","low-latency","video-understanding","efficient-model","temporal-modeling","tsm","nvidia-jetson-nano"],"topicsNotShown":0,"allTopics":["acceleration","low-latency","video-understanding","efficient-model","temporal-modeling","tsm","nvidia-jetson-nano"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":93,"starsCount":2022,"forksCount":417,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-03T15:14:16.654Z"}},{"type":"Public","name":"gan-compression","owner":"mit-han-lab","isFork":false,"description":"[CVPR 2020] GAN Compression: Efficient Architectures for Interactive Conditional GANs","topicNames":["compression","pytorch","gans","pix2pix","cyclegan","image-to-image-translation","conditional-gans","gaugan"],"topicsNotShown":0,"allTopics":["compression","pytorch","gans","pix2pix","cyclegan","image-to-image-translation","conditional-gans","gaugan"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":4,"starsCount":1092,"forksCount":147,"license":"Other","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-01T14:30:07.248Z"}},{"type":"Public","name":"proxylessnas","owner":"mit-han-lab","isFork":false,"description":"[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware","topicNames":["acceleration","automl","specialization","efficient-model","on-device-ai","hardware-aware"],"topicsNotShown":0,"allTopics":["acceleration","automl","specialization","efficient-model","on-device-ai","hardware-aware"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":1412,"forksCount":282,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-26T01:16:43.630Z"}}],"repositoryCount":50,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}