apache · yongwww · May 14, 2024 · Apr 30, 2024
diff --git a/gallery/how_to/deploy_models/deploy_prequantized.py b/gallery/how_to/deploy_models/deploy_prequantized.py
@@ -162,7 +162,7 @@ def quantize_model(model, inp):
 #
 # You would see operators specific to quantization such as
 # qnn.quantize, qnn.dequantize, qnn.requantize, and qnn.conv2d etc.
-input_name = "input"  # the input name can be be arbitrary for PyTorch frontend.
+input_name = "input"  # the input name can be arbitrary for PyTorch frontend.
 input_shapes = [(input_name, (1, 3, 224, 224))]
 mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
 # print(mod) # comment in to see the QNN IR dump

diff --git a/include/tvm/relax/dataflow_pattern.h b/include/tvm/relax/dataflow_pattern.h
@@ -914,7 +914,7 @@ class ExternFuncPatternNode : public DFPatternNode {
  public:
   String global_symbol_; /*!< The global symbol name of the external function */
 
-  /*! \brief The the external function name */
+  /*! \brief The external function name */
   const String& global_symbol() const { return global_symbol_; }
   void VisitAttrs(tvm::AttrVisitor* v) { v->Visit("global_symbol", &global_symbol_); }
 

diff --git a/src/runtime/contrib/vllm/attention_kernels.cu b/src/runtime/contrib/vllm/attention_kernels.cu
@@ -145,7 +145,7 @@ __device__ void paged_attention_kernel(
 
   // Load the query to registers.
   // Each thread in a thread group has a different part of the query.
-  // For example, if the the thread group size is 4, then the first thread in the group
+  // For example, if the thread group size is 4, then the first thread in the group
   // has 0, 4, 8, ... th vectors of the query, and the second thread has 1, 5, 9, ...
   // th vectors of the query, and so on.
   // NOTE(woosuk): Because q is split from a qkv tensor, it may not be contiguous.
@@ -185,7 +185,7 @@ __device__ void paged_attention_kernel(
 
     // Load a key to registers.
     // Each thread in a thread group has a different part of the key.
-    // For example, if the the thread group size is 4, then the first thread in the group
+    // For example, if the thread group size is 4, then the first thread in the group
     // has 0, 4, 8, ... th vectors of the key, and the second thread has 1, 5, 9, ... th
     // vectors of the key, and so on.
     for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) {

diff --git a/src/runtime/relax_vm/kv_state.h b/src/runtime/relax_vm/kv_state.h
@@ -83,7 +83,7 @@ class KVStateObj : public Object {
    * with prefill length "10", "15", "20", then we pass `[5, 1, 8]`
    * as the seq_ids and `[10, 15, 20]` as the append_lengths.
    * This method is invoked right before entering the model forward
-   * function, and contains operations to prepare the the incoming
+   * function, and contains operations to prepare the incoming
    * forward. For instance, this method may send auxiliary KV cache
    * data structures to GPUs so that they can be operated
    * in the model forward function.

diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc
@@ -85,7 +85,7 @@ struct Block {
   int32_t start_pos = 0;
   /*!
    * \brief The current attention sink length of the block.
-   * It means the the **first** sink size elements will be pinned
+   * It means the **first** sink size elements will be pinned
    * in the KV cache even when sliding window is enabled.
    */
   int32_t sink_length = 0;
@@ -247,7 +247,7 @@ class PagedKVCacheAuxDataManager {
   /*!
    * \brief Copy the append length indptr array on device.
    * \note Since the Q/K/V data may have raggedness in terms of lengths,
-   * we represent the the append lengths in CSR format.
+   * we represent the append lengths in CSR format.
    */
   virtual NDArray CopyCurAppendLengthIndptrAsync(std::vector<int32_t>* data) = 0;
   /*! \brief Copy the k position offset of applying RoPE for each sequence. */