{"payload":{"pageCount":2,"repositories":[{"type":"Public","name":"lightllm","owner":"ModelTC","isFork":false,"description":"LightLLM is a Python-based LLM (Large Language Model) inference and serving framework, notable for its lightweight design, easy scalability, and high-speed performance.","topicNames":["nlp","deep-learning","llama","gpt","model-serving","llm","openai-triton"],"topicsNotShown":0,"allTopics":["nlp","deep-learning","llama","gpt","model-serving","llm","openai-triton"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":5,"issueCount":51,"starsCount":1903,"forksCount":169,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,2,7,10,33,9,7,2,2,4,9,3,0,1,4,4,3,7,9,2,9,3,3,10,6,9,9,3,3,0,1,0,3,3,7,2,13,5,1,6,1,0,0,0,1,3],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-24T11:14:29.513Z"}},{"type":"Public","name":"llmc","owner":"ModelTC","isFork":false,"description":"This is the official implementation of \"LLM-QBench: A Benchmark Towards the Best Practice for Post-training Quantization of Large Language Models\", and it is also an efficient LLM compression tool with various advanced compression methods, supporting multiple inference backends. ","topicNames":["benchmark","deployment","tool","evaluation","pruning","quantization","large-language-models","llm"],"topicsNotShown":0,"allTopics":["benchmark","deployment","tool","evaluation","pruning","quantization","large-language-models","llm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":79,"forksCount":8,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,3,0,3,1,4,0,5,2],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-24T08:18:12.398Z"}},{"type":"Public","name":"general-sam-py","owner":"ModelTC","isFork":false,"description":"Python bindings for general-sam and some utilities","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":1,"forksCount":0,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,16,4,0,12,0,0,4,1,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,3,0,0,0,0,0,0,3,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-20T23:57:08.212Z"}},{"type":"Public template","name":"FCPTS","owner":"ModelTC","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-14T07:19:01.008Z"}},{"type":"Public","name":"msbench","owner":"ModelTC","isFork":false,"description":"A tool for model sparse based on torch.fx","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-14T05:49:03.883Z"}},{"type":"Public","name":"TFMQ-DM","owner":"ModelTC","isFork":false,"description":"[CVPR 2024 Highlight] TFMQ-DM: Temporal Feature Maintenance Quantization for Diffusion Models","topicNames":["highlight","quantization","cvpr","ldm","diffusion-models","post-training-quantization","ddim","stable-diffusion","cvpr2024"],"topicsNotShown":0,"allTopics":["highlight","quantization","cvpr","ldm","diffusion-models","post-training-quantization","ddim","stable-diffusion","cvpr2024"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":26,"forksCount":3,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1,0,0,12,1,0,0,0,0,3,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-13T06:53:00.119Z"}},{"type":"Public","name":"mtc-token-healing","owner":"ModelTC","isFork":false,"description":"Token healing implementation in Rust","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-13T05:26:04.168Z"}},{"type":"Public","name":"statecs","owner":"ModelTC","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":1,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,1,4,0,0,0,0,0,0,0,0,0,1,1,1,3,1,0,0,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-10T08:42:35.770Z"}},{"type":"Public","name":"L2_Compression","owner":"ModelTC","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":10,"forksCount":0,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-27T15:16:45.322Z"}},{"type":"Public","name":"general-sam","owner":"ModelTC","isFork":false,"description":"A general suffix automaton implementation in Rust with Python bindings","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-25T04:03:54.723Z"}},{"type":"Public","name":"MQBench","owner":"ModelTC","isFork":false,"description":"Model Quantization Benchmark","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Shell","color":"#89e051"},"pullRequestCount":5,"issueCount":2,"starsCount":728,"forksCount":135,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-24T16:48:28.388Z"}},{"type":"Public","name":"DeepSpeed","owner":"ModelTC","isFork":true,"description":"DeepSpeed is a deep learning optimization library that makes distributed training and inference easy, efficient, and effective.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":3898,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-28T02:25:12.749Z"}},{"type":"Public","name":"greedy-tokenizer","owner":"ModelTC","isFork":false,"description":"Greedily tokenize strings with the longest tokens iteratively.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-27T12:02:50.777Z"}},{"type":"Public","name":"QLLM","owner":"ModelTC","isFork":false,"description":"[ICLR 2024] This is the official PyTorch implementation of \"QLLM: Accurate and Efficient Low-Bitwidth Quantization for Large Language Models\"","topicNames":["transformers","pytorch","llama","quantization","post-training-quantization","llm","llama2"],"topicsNotShown":0,"allTopics":["transformers","pytorch","llama","quantization","post-training-quantization","llm","llama2"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":22,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-11T02:56:00.115Z"}},{"type":"Public","name":"EasyLLM","owner":"ModelTC","isFork":false,"description":"Built upon Megatron-Deepspeed and HuggingFace Trainer, EasyLLM has reorganized the code logic with a focus on usability. While enhancing usability, it also ensures training efficiency.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":29,"forksCount":2,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-24T04:18:16.831Z"}},{"type":"Public","name":"Dipoorlet","owner":"ModelTC","isFork":false,"description":"Offline Quantization Tools for Deploy.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":8,"starsCount":103,"forksCount":13,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-28T08:59:47.725Z"}},{"type":"Public","name":"awesome-lm-system","owner":"ModelTC","isFork":false,"description":"Summary of system papers/frameworks/codes/tools on training or serving large model","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":56,"forksCount":6,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-17T10:24:11.923Z"}},{"type":"Public","name":"LPCV_2023_solution","owner":"ModelTC","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":18,"forksCount":2,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-29T06:40:44.597Z"}},{"type":"Public","name":"Outlier_Suppression_Plus","owner":"ModelTC","isFork":false,"description":"Official implementation of the EMNLP23 paper: Outlier Suppression+: Accurate quantization of large language models by equivalent and optimal shifting and scaling","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":33,"forksCount":3,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-21T20:55:56.108Z"}},{"type":"Public","name":"UP_LPCV2023_Plugin","owner":"ModelTC","isFork":true,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-11T07:55:39.936Z"}},{"type":"Public","name":"ChatGLM-6B","owner":"ModelTC","isFork":true,"description":"ChatGLM-6B: An Open Bilingual Dialogue Language Model | 开源双语对话语言模型","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":5132,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-06-20T06:03:18.228Z"}},{"type":"Public","name":"pyvlova","owner":"ModelTC","isFork":false,"description":"Yet another Polyhedra Compiler for DeepLearning","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":19,"forksCount":4,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-04-14T17:28:07.956Z"}},{"type":"Public","name":"systemnoise_web","owner":"ModelTC","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-04-03T10:29:28.939Z"}},{"type":"Public","name":"NART","owner":"ModelTC","isFork":false,"description":"NART = NART is not A RunTime, a deep learning inference framework.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":37,"forksCount":13,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-03-02T14:50:37.065Z"}},{"type":"Public","name":"United-Perception","owner":"ModelTC","isFork":false,"description":"United Perception","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":27,"starsCount":424,"forksCount":65,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-05T05:12:54.221Z"}},{"type":"Public","name":"AAAI2023_EAMPD","owner":"ModelTC","isFork":false,"description":"AAAI2023 Efficient and Accurate Models towards Practical Deep Learning Baseline","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":2,"starsCount":13,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-11-29T13:51:35.232Z"}},{"type":"Public","name":"NNLQP","owner":"ModelTC","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":7,"starsCount":33,"forksCount":3,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-08-05T08:08:22.038Z"}},{"type":"Public","name":"Imagenet-S","owner":"ModelTC","isFork":false,"description":"Robustness for real-world system noise","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-03-22T07:49:22.576Z"}},{"type":"Public","name":"LPCV2021_Winner_Solution","owner":"ModelTC","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":28,"forksCount":7,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-11-05T14:05:16.670Z"}},{"type":"Public","name":"Prototype","owner":"ModelTC","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":12,"forksCount":3,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-09-20T16:28:44.674Z"}}],"repositoryCount":35,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}