{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":745066552,"defaultBranch":"main","name":"cog-triton","ownerLogin":"replicate","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2024-01-18T15:22:36.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/60410876?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1715989728.0","currentOid":""},"activityList":{"items":[{"before":"109f72c5f255f18aa17f9583e3a20ec5368a60a6","after":"35a9ca08a7bea2b353c96ad522074fb93b677d70","ref":"refs/heads/main","pushedAt":"2024-05-30T00:22:26.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"double connect timeout to 120s","shortMessageHtmlLink":"double connect timeout to 120s"}},{"before":"b2705f2670fd48238d52598f06f73d632277b24c","after":"109f72c5f255f18aa17f9583e3a20ec5368a60a6","ref":"refs/heads/main","pushedAt":"2024-05-29T16:39:52.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"count tokens in formatted prompt","shortMessageHtmlLink":"count tokens in formatted prompt"}},{"before":"70d0ccc03cb33880468c8555b4b3e6d91bce6e56","after":"b2705f2670fd48238d52598f06f73d632277b24c","ref":"refs/heads/main","pushedAt":"2024-05-20T19:42:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"yorickvP","name":"Yorick","path":"/yorickvP","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/647076?s=80&v=4"},"commit":{"message":"Update for run.cog.cog_version -> run.cog.version","shortMessageHtmlLink":"Update for run.cog.cog_version -> run.cog.version"}},{"before":"bd8ea54157d3efe37d7b836df45698d26fd67fef","after":"70d0ccc03cb33880468c8555b4b3e6d91bce6e56","ref":"refs/heads/main","pushedAt":"2024-05-20T19:34:33.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"yorickvP","name":"Yorick","path":"/yorickvP","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/647076?s=80&v=4"},"commit":{"message":"update cognix so `pip` is symlinked to `pip3`","shortMessageHtmlLink":"update cognix so pip is symlinked to pip3"}},{"before":"50f08496d213f85f9cb26d9998d1a640d6853051","after":"bd8ea54157d3efe37d7b836df45698d26fd67fef","ref":"refs/heads/main","pushedAt":"2024-05-18T14:29:02.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"yorickvP","name":"Yorick","path":"/yorickvP","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/647076?s=80&v=4"},"commit":{"message":"bump cognix with concurrency fix","shortMessageHtmlLink":"bump cognix with concurrency fix"}},{"before":"8700fe818e722a70ffb6997183a15d36fbabc953","after":"50f08496d213f85f9cb26d9998d1a640d6853051","ref":"refs/heads/main","pushedAt":"2024-05-18T00:34:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"update lock with new cog","shortMessageHtmlLink":"update lock with new cog"}},{"before":"b726e2047c37fbf1b55eb9b0b08e00fd0fe022f7","after":null,"ref":"refs/heads/syl/metrics","pushedAt":"2024-05-17T23:48:48.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"}},{"before":"8df3a2b91e813e63a8f767e273d07b52f78dd53e","after":"8700fe818e722a70ffb6997183a15d36fbabc953","ref":"refs/heads/main","pushedAt":"2024-05-17T23:48:47.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"emit input_token_count and output_token_count metrics","shortMessageHtmlLink":"emit input_token_count and output_token_count metrics"}},{"before":"6f59c7c0f467a4c416a34fb5b7e1fcc34727bad6","after":"8df3a2b91e813e63a8f767e273d07b52f78dd53e","ref":"refs/heads/main","pushedAt":"2024-05-17T23:19:44.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"change CI to run on workflow_dispatch instead of every commit (for now)","shortMessageHtmlLink":"change CI to run on workflow_dispatch instead of every commit (for now)"}},{"before":"2421897535c81c513512b69d3fcf27a0c83f77bf","after":"6f59c7c0f467a4c416a34fb5b7e1fcc34727bad6","ref":"refs/heads/main","pushedAt":"2024-05-17T23:12:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"unconditionally log into huggingface","shortMessageHtmlLink":"unconditionally log into huggingface"}},{"before":"b6142a3e795234daa2f077842f6288e0d3e61a60","after":null,"ref":"refs/heads/yorickvp/merge-cog-trt-llm","pushedAt":"2024-05-17T23:04:44.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"}},{"before":"2f893d25189a2b38b8762b7af6ad42bab1b08190","after":"2421897535c81c513512b69d3fcf27a0c83f77bf","ref":"refs/heads/main","pushedAt":"2024-05-17T23:04:43.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"merge cog-trt-llm into this repo (#38)\n\n* Initial commit\r\n\r\n* add /usr/local/tensorrt/targets/x86_64-linux-gnu/lib/ to LD_LIBRARY_PATH to fix libnvinfer.so.9 import\r\n\r\n* Add pget install to dockerfile\r\n\r\n* successfully compiled gpt2 with cog predict -- hello world\r\n\r\n* Add config and brain dump gpt2 readme\r\n\r\n* model downloading and uploading, in progress\r\n\r\n* everything works\r\n\r\n* output a tar instead of tar.gz\r\n\r\n* local smoke test\r\n\r\n* Add .gitignore file and remove test cache files\r\n\r\n* Add development notes and implementation details\r\n\r\n* Add instructions for running a dev environment\r\n\r\n* Refactor tarball creation in TRTLLMBuilder\r\n\r\n* added huggingface authentication for gated models\r\n\r\n* Add TensorRT-LLM submodule and update Dockerfile and config\r\n\r\n* Add instructions for running a dev environment\r\n\r\n* added hf-enable-hf-transfer\r\n\r\n* added starcoder config, changed predict.py, added requirements for hf-transfer\r\n\r\n* readme backticks\r\n\r\n* added a run to predict.py; fixed model configs\r\n\r\n* downloader no symlinks, check if model already on disk. fix borked\r\n\r\n* Clean up README and update model configuration\r\n\r\n* Remove TensorRT-LLM submodule\r\n\r\n* Update Dockerfile dependencies and file paths\r\n\r\n* merge downloader from main\r\n\r\n* Add get_gpu_info function to utils.py\r\n\r\n* Update predict.py with TRT-LLM backend and GPU info\r\n\r\n* merge with main\r\n\r\n* add async cog\r\n\r\n* Remove unused imports from config_parser.py\r\n\r\n* print filenames as they're added to engine.tar\r\n\r\n* Fix Hugging Face token login issue\r\n\r\n* log info that predict is logging into HF Hub\r\n\r\n* update readme\r\n\r\n* Add support for specifying weight format in downloader.py and predict.py and disable hf_hub progress bar\r\n\r\n* ignore .tar\r\n\r\n* add pget download method\r\n\r\n* refactor config processing and add support for yaml argument.\r\n\r\n* bug fixes\r\n\r\n* add ammo and drop dependencies\r\n\r\n* don't read token from config\r\n\r\n* :bug: Fix huggingface login flow\r\n\r\n* update for 080\r\n\r\n* update examples\r\n\r\n* move examples copy to the end of dockerfile\r\n\r\n* add tensorrtllm_backend submodule\r\n\r\n* update readme with local, no build instructions\r\n\r\n* add new llama config examples\r\n\r\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* Read TRTLLM_DIR from env with fallback\r\n\r\n* fix dockerfile\r\n\r\n* rm .cog\r\n\r\n* fix instructions\r\n\r\n* clarify\r\n\r\n* add notes\r\n\r\n* add notes\r\n\r\n* fit out dir\r\n\r\n* Update README.md\r\n\r\nCo-authored-by: Nathan Raw \r\n\r\n* Update README.md\r\n\r\n* Switch from fetched cog-trt-llm to subdirectory\r\n\r\n* Move cog-trt-llm ignore to .dockerignore\r\n\r\n---------\r\n\r\nCo-authored-by: Joe Hoover \r\nCo-authored-by: joe \r\nCo-authored-by: Dhruv Singal \r\nCo-authored-by: technillogue \r\nCo-authored-by: sylvie \r\nCo-authored-by: Nathan Raw \r\nCo-authored-by: Hamel Husain ","shortMessageHtmlLink":"merge cog-trt-llm into this repo (#38)"}},{"before":null,"after":"b726e2047c37fbf1b55eb9b0b08e00fd0fe022f7","ref":"refs/heads/syl/metrics","pushedAt":"2024-05-17T23:02:09.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"emit input_token_count and output_token_count metrics","shortMessageHtmlLink":"emit input_token_count and output_token_count metrics"}},{"before":"72f560d078e2e6a28a6849f93e7e065b116c3100","after":"2f893d25189a2b38b8762b7af6ad42bab1b08190","ref":"refs/heads/main","pushedAt":"2024-05-17T22:46:27.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"format","shortMessageHtmlLink":"format"}},{"before":"9c4579683a6938900de3cd55849d555e1ccc1805","after":null,"ref":"refs/heads/joe/push-runner-86-to-triton-base-sm86-in-ci","pushedAt":"2024-05-17T13:30:59.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"yorickvP","name":"Yorick","path":"/yorickvP","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/647076?s=80&v=4"}},{"before":"7c96b423980b82b0ced597b7686f59a41148bc0c","after":"72f560d078e2e6a28a6849f93e7e065b116c3100","ref":"refs/heads/main","pushedAt":"2024-05-17T13:30:58.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"yorickvP","name":"Yorick","path":"/yorickvP","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/647076?s=80&v=4"},"commit":{"message":"Merge pull request #39 from replicate/joe/push-runner-86-to-triton-base-sm86-in-ci\n\nUpdate nix CI so that runner-86 is pushed to it's","shortMessageHtmlLink":"Merge pull request #39 from replicate/joe/push-runner-86-to-triton-ba…"}},{"before":null,"after":"9c4579683a6938900de3cd55849d555e1ccc1805","ref":"refs/heads/joe/push-runner-86-to-triton-base-sm86-in-ci","pushedAt":"2024-05-16T15:49:34.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"joehoover","name":"Joe Hoover","path":"/joehoover","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11277670?s=80&v=4"},"commit":{"message":"push runner-86 to replicate-internal/triton-base-sm86","shortMessageHtmlLink":"push runner-86 to replicate-internal/triton-base-sm86"}},{"before":"8e0cb7a47b850bfdc1fe7cdc5823650b2d5d6136","after":"b6142a3e795234daa2f077842f6288e0d3e61a60","ref":"refs/heads/yorickvp/merge-cog-trt-llm","pushedAt":"2024-05-16T15:41:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"yorickvP","name":"Yorick","path":"/yorickvP","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/647076?s=80&v=4"},"commit":{"message":"Move cog-trt-llm ignore to .dockerignore","shortMessageHtmlLink":"Move cog-trt-llm ignore to .dockerignore"}},{"before":null,"after":"8e0cb7a47b850bfdc1fe7cdc5823650b2d5d6136","ref":"refs/heads/yorickvp/merge-cog-trt-llm","pushedAt":"2024-05-14T09:51:24.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"yorickvP","name":"Yorick","path":"/yorickvP","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/647076?s=80&v=4"},"commit":{"message":"Switch from fetched cog-trt-llm to subdirectory","shortMessageHtmlLink":"Switch from fetched cog-trt-llm to subdirectory"}},{"before":null,"after":"7c96b423980b82b0ced597b7686f59a41148bc0c","ref":"refs/heads/joe/lang-260-prototype-cog-triton-with-support-for-speculative-decoding","pushedAt":"2024-05-13T19:07:41.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"joehoover","name":"Joe Hoover","path":"/joehoover","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11277670?s=80&v=4"},"commit":{"message":"fix max tokens (and optimize imports) (#37)\n\n* accept max_tokens\r\n\r\n* settle on min/max_tokens to match openai\r\n\r\n* fix type errors\r\n\r\n* fix descriptions","shortMessageHtmlLink":"fix max tokens (and optimize imports) (#37)"}},{"before":"6e79d40b35b2359995bdc0f56be70e858ecb6197","after":null,"ref":"refs/heads/syl/reuse-download","pushedAt":"2024-05-03T01:28:42.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"}},{"before":"338dce2aca1b0d7cc32b005bf33ce48850fad808","after":null,"ref":"refs/heads/syl/bump-concurrency","pushedAt":"2024-05-03T01:28:23.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"}},{"before":"65fc17d8c311dcf17728ec7e100d562da2ed832f","after":null,"ref":"refs/heads/syl/explicit-deps","pushedAt":"2024-05-03T01:25:15.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"}},{"before":"94d2c9d9272169d38001abf6784d43b89f729e66","after":null,"ref":"refs/heads/syl/really-fix-max-tokens","pushedAt":"2024-05-02T20:10:06.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"}},{"before":"384a39e27b7cf35836315dda3deba76cc717d8a3","after":"7c96b423980b82b0ced597b7686f59a41148bc0c","ref":"refs/heads/main","pushedAt":"2024-05-02T20:10:05.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"fix max tokens (and optimize imports) (#37)\n\n* accept max_tokens\r\n\r\n* settle on min/max_tokens to match openai\r\n\r\n* fix type errors\r\n\r\n* fix descriptions","shortMessageHtmlLink":"fix max tokens (and optimize imports) (#37)"}},{"before":null,"after":"94d2c9d9272169d38001abf6784d43b89f729e66","ref":"refs/heads/syl/really-fix-max-tokens","pushedAt":"2024-05-01T21:08:31.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"fix descriptions","shortMessageHtmlLink":"fix descriptions"}},{"before":"21ff68053ae8b99f098bcfb27799a1e4e1142224","after":"f7140b8116ae60fb01742db2b0a95ba686cb075e","ref":"refs/heads/syl/backport-max-tokens","pushedAt":"2024-05-01T20:31:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"fix descriptions","shortMessageHtmlLink":"fix descriptions"}},{"before":"94c0f8ed9bc1729a8d7cb7e8ae1b9ed597c6fc08","after":"21ff68053ae8b99f098bcfb27799a1e4e1142224","ref":"refs/heads/syl/backport-max-tokens","pushedAt":"2024-05-01T19:28:19.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"fix type errors","shortMessageHtmlLink":"fix type errors"}},{"before":null,"after":"94c0f8ed9bc1729a8d7cb7e8ae1b9ed597c6fc08","ref":"refs/heads/syl/backport-max-tokens","pushedAt":"2024-05-01T19:22:49.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"technillogue","name":null,"path":"/technillogue","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/945691?s=80&v=4"},"commit":{"message":"settle on min/max_tokens to match openai","shortMessageHtmlLink":"settle on min/max_tokens to match openai"}},{"before":"b66de4116a4b2019c8fe01ebb5e4c7265c22fc31","after":"384a39e27b7cf35836315dda3deba76cc717d8a3","ref":"refs/heads/main","pushedAt":"2024-05-01T13:31:31.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"yorickvP","name":"Yorick","path":"/yorickvP","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/647076?s=80&v=4"},"commit":{"message":"update lockfiles after merge, move cog override","shortMessageHtmlLink":"update lockfiles after merge, move cog override"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEV3yjyAA","startCursor":null,"endCursor":null}},"title":"Activity · replicate/cog-triton"}