{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"petals","owner":"bigscience-workshop","isFork":false,"description":"🌸 Run LLMs at home, BitTorrent-style. Fine-tuning and inference up to 10x faster than offloading","topicNames":["nlp","bloom","distributed-systems","machine-learning","deep-learning","chatbot","pytorch","falcon","transformer","neural-networks"],"topicsNotShown":10,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":14,"issueCount":73,"starsCount":8730,"forksCount":466,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-29T20:13:42.990Z"}},{"type":"Public","name":"Megatron-DeepSpeed","owner":"bigscience-workshop","isFork":false,"description":"Ongoing research training transformer language models at scale, including: BERT & GPT-2","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":45,"issueCount":74,"starsCount":1252,"forksCount":209,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-20T16:10:21.859Z"}},{"type":"Public","name":"biomedical","owner":"bigscience-workshop","isFork":false,"description":"Tools for curating biomedical training data for large-scale language modeling ","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":32,"issueCount":160,"starsCount":422,"forksCount":111,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-20T14:47:12.669Z"}},{"type":"Public","name":"multilingual-modeling","owner":"bigscience-workshop","isFork":false,"description":"BLOOM+1: Adapting BLOOM model to support a new unseen language","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":13,"starsCount":65,"forksCount":14,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-02T07:54:24.098Z"}},{"type":"Public","name":"promptsource","owner":"bigscience-workshop","isFork":false,"description":"Toolkit for creating, sharing and using natural language prompts.","topicNames":["nlp","machine-learning","natural-language-processing"],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":32,"issueCount":11,"starsCount":2517,"forksCount":336,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-23T17:59:41.052Z"}},{"type":"Public","name":"massive-probing-framework","owner":"bigscience-workshop","isFork":true,"description":"Framework for BLOOM probing","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":10,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,16,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-17T23:16:51.372Z"}},{"type":"Public","name":"architecture-objective","owner":"bigscience-workshop","isFork":true,"description":"","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":5,"issueCount":4,"starsCount":87,"forksCount":290,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-07-25T11:55:39.194Z"}},{"type":"Public","name":"metadata","owner":"bigscience-workshop","isFork":false,"description":"Experiments on including metadata such as URLs, timestamps, website descriptions and HTML tags during pretraining.","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":13,"issueCount":25,"starsCount":30,"forksCount":12,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-06-12T08:48:03.054Z"}},{"type":"Public","name":"lm-evaluation-harness","owner":"bigscience-workshop","isFork":true,"description":"A framework for few-shot evaluation of autoregressive language models.","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":8,"issueCount":7,"starsCount":91,"forksCount":1354,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-09T09:21:46.980Z"}},{"type":"Public","name":"multilingual-modeling-1","owner":"bigscience-workshop","isFork":true,"description":"","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":14,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-05T08:14:51.500Z"}},{"type":"Public","name":"t-zero","owner":"bigscience-workshop","isFork":false,"description":"Reproduce results and replicate training fo T0 (Multitask Prompted Training Enables Zero-Shot Task Generalization)","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":7,"starsCount":448,"forksCount":52,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-11-05T11:29:45.730Z"}},{"type":"Public","name":"pii_processing","owner":"bigscience-workshop","isFork":false,"description":"PII Processing code to detect and remediate PII in BigScience datasets. Reference implementation for the PII Hackathon","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":7,"starsCount":8,"forksCount":6,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-10-06T23:35:10.367Z"}},{"type":"Public","name":"bloom-dechonk","owner":"bigscience-workshop","isFork":false,"description":"A repo for running model shrinking experiments","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":8,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-06-21T10:46:35.125Z"}},{"type":"Public","name":"evaluation","owner":"bigscience-workshop","isFork":false,"description":"Code and Data for Evaluation WG","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":9,"issueCount":41,"starsCount":41,"forksCount":24,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-05-04T03:04:06.270Z"}},{"type":"Public","name":"evaluation-robustness-consistency","owner":"bigscience-workshop","isFork":false,"description":"Tools for evaluating model robustness and consistency ","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":0,"starsCount":2,"forksCount":2,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-03-09T15:07:41.627Z"}},{"type":"Public","name":"tokenization","owner":"bigscience-workshop","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":1,"starsCount":11,"forksCount":3,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-02-16T14:46:49.417Z"}},{"type":"Public","name":"transformers","owner":"bigscience-workshop","isFork":true,"description":"🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX.","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":24962,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-11-22T14:14:12.751Z"}},{"type":"Public","name":"data_sourcing","owner":"bigscience-workshop","isFork":false,"description":"This directory gathers the tools developed by the Data Sourcing Working Group","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":5,"starsCount":31,"forksCount":6,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-10-25T19:16:46.697Z"}},{"type":"Public","name":"codecarbon","owner":"bigscience-workshop","isFork":true,"description":"Track emissions from Compute and recommend ways to reduce their impact on the environment.","topicNames":[],"topicsNotShown":0,"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":149,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-08-14T06:45:03.466Z"}}],"repositoryCount":19,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"mirror","text":"Mirrors"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}