{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"vaporetto","owner":"daac-tools","isFork":false,"description":"🛥 Vaporetto: Very accelerated pointwise prediction based tokenizer","allTopics":["nlp","rust","analyzer","segmentation","morphological-analysis","tokenization","japanese","tokenizer"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":4,"issueCount":0,"starsCount":218,"forksCount":10,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-31T00:16:02.947Z"}},{"type":"Public","name":"vibrato","owner":"daac-tools","isFork":false,"description":"🎤 vibrato: Viterbi-based accelerated tokenizer","allTopics":["nlp","rust","japanese","tokenizer","segmentation","morphological-analysis","tokenization"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":5,"starsCount":303,"forksCount":14,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-30T10:17:07.375Z"}},{"type":"Public","name":"daachorse","owner":"daac-tools","isFork":false,"description":"🐎 A fast implementation of the Aho-Corasick algorithm using the compact double-array data structure in Rust.","allTopics":["search","finite-state-machine","text-processing","aho-corasick","no-std","double-array","substring-matching","rust"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":1,"issueCount":1,"starsCount":190,"forksCount":12,"license":"Apache License 2.0","participation":[0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-30T00:20:36.567Z"}},{"type":"Public","name":"trie-match","owner":"daac-tools","isFork":false,"description":"Fast match expression optimized for string comparison","allTopics":["rust","performance","no-std","double-array"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":31,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-29T04:00:16.880Z"}},{"type":"Public","name":"python-vibrato","owner":"daac-tools","isFork":false,"description":"Viterbi-based accelerated tokenizer (Python wrapper)","allTopics":["python","nlp","tokenizer","segmentation","morphological-analysis"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":34,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-05T13:17:00.375Z"}},{"type":"Public","name":"python-vaporetto","owner":"daac-tools","isFork":false,"description":"🛥 Vaporetto is a fast and lightweight pointwise prediction based tokenizer. This is a Python wrapper for Vaporetto.","allTopics":["python","nlp","rust","japanese","tokenizer","analyzer","segmentation","morphological-analysis","tokenization"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":20,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-05T13:15:50.966Z"}},{"type":"Public","name":"vaporetto-models","owner":"daac-tools","isFork":false,"description":"Tokenization models and training scripts for Vaporetto fast tokenizer","allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-30T08:37:23.964Z"}},{"type":"Public","name":"crawdad","owner":"daac-tools","isFork":false,"description":"🦞 Rust library of natural language dictionaries using character-wise double-array tries.","allTopics":["trie","data-structures","no-std","cjk-characters","double-array","search","rust"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":27,"forksCount":2,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-02-20T13:23:26.154Z"}},{"type":"Public","name":"include-bytes-zstd","owner":"daac-tools","isFork":false,"description":"Includes a file with zstd compression in Rust","allTopics":["rust","zstd","zstandard"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":9,"forksCount":0,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-02-17T09:26:02.336Z"}},{"type":"Public","name":"guidelines","owner":"daac-tools","isFork":false,"description":"Guidelines for daac-tools community","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-02-16T04:20:25.223Z"}},{"type":"Public","name":"python-daachorse","owner":"daac-tools","isFork":false,"description":"🐎 A fast implementation of the Aho-Corasick algorithm using the compact double-array data structure. (Python wrapper for daachorse)","allTopics":["python","search","finite-state-machine","text-processing","aho-corasick","double-array","substring-matching"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":13,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-02-02T08:17:41.308Z"}},{"type":"Public","name":"rucrf","owner":"daac-tools","isFork":false,"description":"Conditional Random Fields implemented in pure Rust","allTopics":["rust","machine-learning","crf"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":6,"forksCount":2,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-14T02:58:19.773Z"}},{"type":"Public","name":"find-simdoc","owner":"daac-tools","isFork":false,"description":"Finding all pairs of similar documents time- and memory-efficiently","allTopics":["rust","similarity-search","document-search","all-pairs"],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":1,"starsCount":56,"forksCount":3,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-09-27T02:39:30.207Z"}}],"repositoryCount":13,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}