{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"bicleaner-ai","owner":"bitextor","isFork":false,"description":"Bicleaner fork that uses neural networks","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":32,"forksCount":4,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-21T11:00:37.938Z"}},{"type":"Public","name":"bitextor","owner":"bitextor","isFork":false,"description":"Bitextor generates translation memories from multilingual websites","allTopics":["crawler","dictionaries","tokenizer","machine-translation","wget","apertium","neural-machine-translation","warc","tmx","statistical-machine-translation","corpus-generator","sentence-segmentation","corpus-tools","corpus-processing","hunalign","parallel-corpora","document-aligner","bicleaner","bleualign","bitextor"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":3,"issueCount":4,"starsCount":282,"forksCount":43,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-21T05:40:22.396Z"}},{"type":"Public","name":"warc2text","owner":"bitextor","isFork":false,"description":"Extracts plain text, language identification and more metadata from WARC records","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":3,"issueCount":11,"starsCount":18,"forksCount":5,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-08T14:07:29.184Z"}},{"type":"Public","name":"bicleaner-hardrules","owner":"bitextor","isFork":false,"description":"Pre-filtering step for bicleaner","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":2,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-16T14:37:58.988Z"}},{"type":"Public","name":"biroamer","owner":"bitextor","isFork":false,"description":"Utility that will help you to ROAM (Random Omit Anonymize and Mix) your parallel corpus.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":8,"forksCount":2,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-26T11:51:08.981Z"}},{"type":"Public","name":"bicleaner","owner":"bitextor","isFork":false,"description":"Bicleaner is a parallel corpus classifier/cleaner that aims at detecting noisy sentence pairs in a parallel corpus.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":148,"forksCount":22,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-09T08:22:34.114Z"}},{"type":"Public","name":"monocleaner","owner":"bitextor","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":6,"forksCount":1,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-06T11:15:29.952Z"}},{"type":"Public","name":"monotextor","owner":"bitextor","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":1,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-31T14:32:42.818Z"}},{"type":"Public","name":"bifixer","owner":"bitextor","isFork":false,"description":"Tool to fix bitexts and tag near-duplicates for removal","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":27,"forksCount":3,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-31T09:52:02.549Z"}},{"type":"Public","name":"bitextor-testing-output","owner":"bitextor","isFork":false,"description":"Repository for storing testing outputs from Bitextor","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-29T08:56:59.080Z"}},{"type":"Public","name":"prevertical2text","owner":"bitextor","isFork":false,"description":"Extracts plain text, language identification and more metadata from Spiderling prevertical files ","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-17T10:43:56.880Z"}},{"type":"Public","name":"fastText","owner":"bitextor","isFork":true,"description":"Library for fast text representation and classification.","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":4688,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-04T14:10:43.103Z"}},{"type":"Public","name":"deferred-crawling","owner":"bitextor","isFork":false,"description":"Reconstructs sentences using deferred crawling standoff annotations from Bitextor","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-04T10:42:49.124Z"}},{"type":"Public","name":"bicleaner-ai-data","owner":"bitextor","isFork":false,"description":"Repository of Bicleaner AI models","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":0,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-03-28T15:23:34.135Z"}},{"type":"Public","name":"pdf-extract","owner":"bitextor","isFork":false,"description":"PDF parser and converter to HTML","allTopics":[],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":0,"issueCount":4,"starsCount":80,"forksCount":14,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-03-28T05:19:50.847Z"}},{"type":"Public","name":"bleualign-cpp","owner":"bitextor","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":2,"starsCount":7,"forksCount":2,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-03-10T21:02:57.755Z"}},{"type":"Public","name":"bicleaner-data","owner":"bitextor","isFork":false,"description":"Repository for data models, dictionaries and more resources for Bicleaner","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":0,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-15T07:36:51.075Z"}},{"type":"Public","name":"vecalign","owner":"bitextor","isFork":true,"description":"Improved Sentence Alignment in Linear Time and Space","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":27,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-04T10:43:57.464Z"}},{"type":"Public","name":"python-apachetika","owner":"bitextor","isFork":true,"description":"Python interface to Apache Tika, HTML extraction from PDF","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":143,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-11-30T09:09:25.460Z"}},{"type":"Public","name":"loomchild-segment-py","owner":"bitextor","isFork":false,"description":"Python module to interface with Java Loomchild sentence segmenter","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":1,"forksCount":1,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-11-28T09:43:33.361Z"}},{"type":"Public","name":"bicleaner-ai-glove","owner":"bitextor","isFork":true,"description":"Fork of glove-python to distribute binary builds","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":319,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-08-12T13:00:11.412Z"}},{"type":"Public","name":"neural-document-aligner","owner":"bitextor","isFork":false,"description":"Document aligner which uses neural technologies to search matches across bilingual documents","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":2,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-06-09T10:43:43.372Z"}},{"type":"Public archive","name":"bitextor-neural","owner":"bitextor","isFork":false,"description":"Bitextor Neural generates translation memories from multilingual websites using state-of-the-art Machine Learning tools","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":0,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-06-03T08:45:42.622Z"}},{"type":"Public","name":"monocleaner-data","owner":"bitextor","isFork":false,"description":"Monocleaner models repository","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-11-18T11:34:10.363Z"}},{"type":"Public","name":"hunalign","owner":"bitextor","isFork":true,"description":"Sentence aligner","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":37,"license":"GNU Lesser General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-05-21T13:09:30.795Z"}},{"type":"Public","name":"cld2","owner":"bitextor","isFork":true,"description":"Compact Language Detector 2","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":124,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-05-04T08:41:38.899Z"}},{"type":"Public","name":"python-pdfextract","owner":"bitextor","isFork":true,"description":"Python interface to pdf-extract, HTML extraction from PDF","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":6,"forksCount":143,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-09-03T08:39:07.584Z"}},{"type":"Public","name":"bitextor-data","owner":"bitextor","isFork":false,"description":"Repository for data models, dictionaries and more resources for Bitextor","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-02-07T14:45:15.231Z"}}],"repositoryCount":28,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}