run-llama · jerryjliu · Mar 13, 2024 · Mar 13, 2024 · Mar 13, 2024 · Mar 13, 2024
diff --git a/README.md b/README.md
@@ -4,9 +4,8 @@ LlamaParse is an API created by LlamaIndex to efficiently parse and represent fi
 
 LlamaParse directly integrates with [LlamaIndex](https://github.com/run-llama/llama_index).
 
-Currently available in preview mode for **free**. Try it out today!
 
-**NOTE:** Currently, only PDF files are supported.
+Free plan is up to 1000 pages a day. Paid plan is free 7k pages per week + 0.3c per additional page.
 
 ## Getting Started
 

diff --git a/llama_parse/base.py b/llama_parse/base.py
@@ -108,6 +108,19 @@ class Language(str, Enum):
     VIETNAMESE = "vi"
 
 
+SUPPORTED_FILE_TYPES = [
+    ".pdf",
+    ".xml"
+    ".doc",
+    ".docx",
+    ".pptx",
+    ".rtf",
+    ".pages",
+    ".key",
+    ".epub"
+]
+
+
 class LlamaParse(BasePydanticReader):
     """A smart-parser for files."""
 
@@ -165,8 +178,12 @@ def validate_base_url(cls, v: str) -> str:
     # upload a document and get back a job_id
     async def _create_job(self, file_path: str, extra_info: Optional[dict] = None) -> str:
         file_path = str(file_path)
-        if not file_path.endswith(".pdf"):
-            raise Exception("Currently, only PDF files are supported.")
+        file_ext = os.path.splitext(file_path)[1]
+        if file_ext not in SUPPORTED_FILE_TYPES:
+            raise Exception(
+                f"Currently, only the following file types are supported: {SUPPORTED_FILE_TYPES}\n"
+                f"Current file type: {file_ext}"
+            )
 
         extra_info = extra_info or {}
         extra_info["file_path"] = file_path
@@ -183,7 +200,7 @@ async def _create_job(self, file_path: str, extra_info: Optional[dict] = None) -
             async with httpx.AsyncClient(timeout=self.max_timeout) as client:
                 response = await client.post(url, files=files, headers=headers, data={"language": self.language.value, "parsing_instruction": self.parsing_instruction})
                 if not response.is_success:
-                    raise Exception(f"Failed to parse the PDF file: {response.text}")
+                    raise Exception(f"Failed to parse the file: {response.text}")
 
         # check the status of the job, return when done
         job_id = response.json()["id"]
@@ -206,15 +223,15 @@ async def _get_job_result(self, job_id: str, result_type: str) -> dict:
                     end = time.time()
                     if end - start > self.max_timeout:
                         raise Exception(
-                            f"Timeout while parsing the PDF file: {job_id}"
+                            f"Timeout while parsing the file: {job_id}"
                         )
                     if self.verbose and tries % 10 == 0:
                         print(".", end="", flush=True)
                     continue
 
                 if result.status_code == 400:
                     detail = result.json().get("detail", "Unknown error")
-                    raise Exception(f"Failed to parse the PDF file: {detail}")
+                    raise Exception(f"Failed to parse the file: {detail}")
 
                 return result.json()
 
@@ -235,7 +252,7 @@ async def _aload_data(self, file_path: str, extra_info: Optional[dict] = None) -
             ]
 
         except Exception as e:
-            print(f"Error while parsing the PDF file '{file_path}':", e)
+            print(f"Error while parsing the file '{file_path}':", e)
             raise e
             return []
 
@@ -283,7 +300,7 @@ async def _aget_json(self, file_path: str, extra_info: Optional[dict] = None) ->
             return [result]
 
         except Exception as e:
-            print(f"Error while parsing the PDF file '{file_path}':", e)
+            print(f"Error while parsing the file '{file_path}':", e)
             raise e