{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"efficient_ocr","owner":"dell-research-harvard","isFork":false,"description":"Efficient OCR for Building a Diverse Digital History","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-12T16:21:04.913Z"}},{"type":"Public","name":"newsdejavu","owner":"dell-research-harvard","isFork":false,"description":"Python package for News Deja Vu","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-09T20:33:36.895Z"}},{"type":"Public","name":"linktransformer","owner":"dell-research-harvard","isFork":false,"description":"A convenient way to link, deduplicate, aggregate and cluster data(frames) in Python using deep learning","topicNames":["python","nlp","deep-learning","record-linkage","entity-resolution","transformers","entity-matching","sentence-transformers","huggingface-transformers"],"topicsNotShown":0,"allTopics":["python","nlp","deep-learning","record-linkage","entity-resolution","transformers","entity-matching","sentence-transformers","huggingface-transformers"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":3,"starsCount":80,"forksCount":7,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-05T22:06:53.318Z"}},{"type":"Public","name":"AmericanStories","owner":"dell-research-harvard","isFork":false,"description":"The official Github for the American Stories dataset as in {link}","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":5,"starsCount":95,"forksCount":7,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-07T01:05:57.346Z"}},{"type":"Public","name":"HomoglyphsCJKTraining","owner":"dell-research-harvard","isFork":false,"description":"Quantifying Character Similarity with Vision Transformers","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-27T03:09:02.631Z"}},{"type":"Public","name":"HomoglyphsCJK","owner":"dell-research-harvard","isFork":false,"description":"An efficient and useful tool to fuzzy match Japanese, Korean, Simplified Chinese or Traditional Chinese words.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-13T00:30:08.717Z"}},{"type":"Public","name":"Associating-Press","owner":"dell-research-harvard","isFork":false,"description":"Associating layout elements from newspapers into full articles","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-15T23:17:56.068Z"}},{"type":"Public","name":"DPR","owner":"dell-research-harvard","isFork":true,"description":"Dense Passage Retriever - is a set of tools and models for open domain Q&A task.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":293,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-15T14:39:03.345Z"}},{"type":"Public","name":"linktransformer-readthedocs","owner":"dell-research-harvard","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-06T13:35:54.255Z"}},{"type":"Public","name":"clippings","owner":"dell-research-harvard","isFork":false,"description":"The official implementation (English) of the paper \"Linking Representations with Multimodal Contrastive Learning\" : https://arxiv.org/abs/2304.03464","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-06-20T05:21:21.863Z"}},{"type":"Public","name":"effsynth","owner":"dell-research-harvard","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-06-18T04:58:19.833Z"}},{"type":"Public","name":"NEWS-COPY","owner":"dell-research-harvard","isFork":false,"description":"Noise-robust de-duplication at scale ","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":15,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-04-09T20:54:59.238Z"}},{"type":"Public","name":"effocr","owner":"dell-research-harvard","isFork":false,"description":"A model(ing framework) for sample efficient OCR","topicNames":["ocr"],"topicsNotShown":0,"allTopics":["ocr"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":39,"forksCount":5,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-04-07T14:37:57.211Z"}},{"type":"Public","name":"clippings-japanese","owner":"dell-research-harvard","isFork":false,"description":"The official implementation (Japanese) of the paper \"Linking Representations with Multimodal Contrastive Learning\" : https://arxiv.org/abs/2304.03464","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-04-04T16:18:07.816Z"}},{"type":"Public","name":"nnsplit","owner":"dell-research-harvard","isFork":true,"description":"Semantic text segmentation. For sentence boundary detection, compound splitting and more.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":34,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-03-05T19:15:20.544Z"}},{"type":"Public","name":"sbert-wrapper","owner":"dell-research-harvard","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-02-23T09:08:24.873Z"}},{"type":"Public","name":"BertGCN","owner":"dell-research-harvard","isFork":true,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":81,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-08-26T13:35:37.906Z"}},{"type":"Public","name":"HJDataset","owner":"dell-research-harvard","isFork":false,"description":"A Large Dataset of Historical Japanese Documents with Complex Layouts","topicNames":["python","layout-analysis","dataset","detectron2"],"topicsNotShown":0,"allTopics":["python","layout-analysis","dataset","detectron2"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":1,"issueCount":0,"starsCount":28,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-07-22T06:26:45.970Z"}},{"type":"Public","name":"NeedlemanWunschNames","owner":"dell-research-harvard","isFork":false,"description":"Applies Needleman-Wunsch algorithm to sequences of strings using Levenshtein distance as a scoring metric.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":1,"issueCount":0,"starsCount":3,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-06-22T01:21:35.193Z"}},{"type":"Public","name":"mmdetection","owner":"dell-research-harvard","isFork":true,"description":"OpenMMLab Detection Toolbox and Benchmark","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":9226,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-10-30T23:14:22.308Z"}},{"type":"Public","name":"EasyOCR","owner":"dell-research-harvard","isFork":true,"description":"Ready-to-use OCR with 80+ supported languages and all popular writing scripts including Latin, Chinese, Arabic, Devanagari, Cyrillic and etc.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":2968,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-08-20T18:23:59.971Z"}},{"type":"Public","name":"taming-transformers","owner":"dell-research-harvard","isFork":true,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":1093,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-06-23T21:13:20.157Z"}},{"type":"Public","name":"cocosplit","owner":"dell-research-harvard","isFork":true,"description":"Simple tool to split COCO annotations into train/test datasets.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":0,"forksCount":92,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-12-06T18:12:13.562Z"}},{"type":"Public","name":"label-studio","owner":"dell-research-harvard","isFork":true,"description":"Label Studio is a multi-type data labeling and annotation tool with standardized output format","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":2083,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-09-11T20:35:02.554Z"}},{"type":"Public","name":"cutter-incident","owner":"dell-research-harvard","isFork":false,"description":"Repo for the cutter incident project","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Stata","color":"#1a5f91"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-07-08T15:47:12.634Z"}},{"type":"Public","name":"detectron2-ResNeSt","owner":"dell-research-harvard","isFork":true,"description":"A fork of Detectron2 with ResNeSt backbone","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":7274,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-06-06T20:16:18.192Z"}},{"type":"Public","name":"glassdoor-review-scraper","owner":"dell-research-harvard","isFork":true,"description":"Scrape reviews from Glassdoor","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":251,"license":"BSD 2-Clause \"Simplified\" License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-06-01T11:13:56.717Z"}},{"type":"Public archive","name":"js-dataverse","owner":"dell-research-harvard","isFork":false,"description":"A JavaScript/TypeScript module for Dataverse","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"TypeScript","color":"#3178c6"},"pullRequestCount":0,"issueCount":1,"starsCount":0,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-09-16T16:24:18.930Z"}}],"repositoryCount":28,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}