Skip to content

Commit

Permalink
Merge pull request #53 from Rock-Candy-Tea/dev
Browse files Browse the repository at this point in the history
4.3.3
  • Loading branch information
hiltay committed Jul 2, 2022
2 parents e92cca0 + 255a502 commit dd59f3c
Show file tree
Hide file tree
Showing 13 changed files with 194 additions and 91 deletions.
18 changes: 6 additions & 12 deletions README.md
Expand Up @@ -5,32 +5,26 @@
部署教程:[文档](https://fcircle-doc.js.cool/) | [备用地址](https://fcircle-doc.is-a.dev/)

```
4.3.2 支持:
4.3.3 支持:
- 支持 gitee 和 github 上的 issuse 友链获取
- 支持butterfly、volantis、matery、sakura、fluid、nexmoe、Yun、stun、stellar、next主题的友链和文章获取
- 支持feed订阅规则,如atom、rss等规则(支持wordpress类型的博客)
- 支持自定义订阅后缀
- 支持站点屏蔽
- 支持按照更新时间和创建时间排序
- 支持未适配的hexo主题和非hexo用户使用,在配置项选择开启配置项友链
- 支持未适配的hexo主题和非hexo用户使用,可以在配置项选择开启配置项友链
- 额外的友链页同时爬取
- 支持添加HTTP代理
- 多种数据存储,提供leancloud,mysql,sqlite,mongodb存储方式
- 多种方式部署,提供github,server,docker部署方式
- 将api整合到主仓库
- 新增友链获取策略的多种common规则
- 新增api方式的配置项友链
- 新增api方式的配置项友链,并提供可扩展性
- 将额外友链页和环境变量友链统一为LINK,在配置文件中配置
- 提供一个简单的快速部署脚本
最近改动:
- 添加mongodb workflow
- randomfriend和randompost两个接口支持随机N篇功能
- 新增friendstatus接口,用于快速查询友链状态
- 修复vercel无法获取部分接口的问题
- 修复leancloud过期文章清理不生效的问题
- 添加自定义日志信息
- 修复leancloud接口中统计的数量和实际数量不同的问题
- 修复leancloud接口中创建时间和更新时间颠倒问题
- 扩展json_api的功能,支持从本地读取json友链配置,同时额外提供一个进阶格式
- 一些格式改动
- 修复/all接口在文章数小于20时导致无法获取数据的问题;
```

22 changes: 11 additions & 11 deletions api/leancloudapi.py
Expand Up @@ -7,6 +7,7 @@
import leancloud
from hexo_circle_of_friends import settings
from hexo_circle_of_friends.utils.process_time import time_compare
from utils import start_end_check


def db_init():
Expand All @@ -16,7 +17,7 @@ def db_init():
leancloud.init(os.environ["APPID"], os.environ["APPKEY"])


def query_all(list, start: int = 0, end: int = -1, rule: str = "updated"):
def query_all(li, start: int = 0, end: int = -1, rule: str = "updated"):
# Verify key
db_init()

Expand All @@ -41,6 +42,14 @@ def query_all(list, start: int = 0, end: int = -1, rule: str = "updated"):
article_num = len(query_list)
last_updated_time = max([item.get('createdAt').strftime('%Y-%m-%d %H:%M:%S') for item in query_list])

# 检查start、end的合法性
start, end, message = start_end_check(start, end, article_num)
if message:
return {"message": message}
# 检查rule的合法性
if rule != "created" and rule != "updated":
return {"message": "rule error, please use 'created'/'updated'"}

data['statistical_data'] = {
'friends_num': friends_num,
'active_num': active_num,
Expand All @@ -53,7 +62,7 @@ def query_all(list, start: int = 0, end: int = -1, rule: str = "updated"):
article_data = []
for item in query_list:
itemlist = {}
for elem in list:
for elem in li:
if elem == 'created':
itemlist[elem] = item.get('created')
elif elem == 'avatar':
Expand All @@ -62,15 +71,6 @@ def query_all(list, start: int = 0, end: int = -1, rule: str = "updated"):
itemlist[elem] = item.get(elem)
article_data_init.append(itemlist)

if end == -1:
end = min(article_num, 1000)
if start < 0 or start >= min(article_num, 1000):
return {"message": "start error"}
if end <= 0 or end > min(article_num, 1000):
return {"message": "end error"}
if rule != "created" and rule != "updated":
return {"message": "rule error, please use 'created'/'updated'"}

rules = []
# list sort 是 稳定 的,这意味着当多个记录具有相同的键值时,将**保留其原始顺序**
if rule == "created":
Expand Down
12 changes: 6 additions & 6 deletions api/mongodbapi.py
Expand Up @@ -7,6 +7,7 @@
from hexo_circle_of_friends import settings
from pymongo import MongoClient
from hexo_circle_of_friends.utils.process_time import time_compare
from utils import start_end_check


def db_init():
Expand All @@ -24,12 +25,11 @@ def db_init():
def query_all(list, start: int = 0, end: int = -1, rule: str = "updated"):
post_collection, friend_db_collection = db_init()
article_num = post_collection.count_documents({})
if end == -1:
end = min(article_num, 1000)
if start < 0 or start >= min(article_num, 1000):
return {"message": "start error"}
if end <= 0 or end > min(article_num, 1000):
return {"message": "end error"}
# 检查start、end的合法性
start, end, message = start_end_check(start, end, article_num)
if message:
return {"message": message}
# 检查rule的合法性
if rule != "created" and rule != "updated":
return {"message": "rule error, please use 'created'/'updated'"}

Expand Down
12 changes: 6 additions & 6 deletions api/sqlapi.py
Expand Up @@ -12,6 +12,7 @@
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.sql.expression import desc, func
from hexo_circle_of_friends.utils.process_time import time_compare
from utils import start_end_check


def db_init():
Expand Down Expand Up @@ -40,12 +41,11 @@ def db_init():
def query_all(list, start: int = 0, end: int = -1, rule: str = "updated"):
session = db_init()
article_num = session.query(Post).count()
if end == -1:
end = min(article_num, 1000)
if start < 0 or start >= min(article_num, 1000):
return {"message": "start error"}
if end <= 0 or end > min(article_num, 1000):
return {"message": "end error"}
# 检查start、end的合法性
start, end, message = start_end_check(start, end, article_num)
if message:
return {"message": message}
# 检查rule的合法性
if rule != "created" and rule != "updated":
return {"message": "rule error, please use 'created'/'updated'"}

Expand Down
47 changes: 47 additions & 0 deletions api/utils.py
@@ -0,0 +1,47 @@
def start_end_check(start, end, article_num):
"""
检查start、end的合法性:
1、article_num必须小于等于1000:article_num<=1000
2、end如果为-1,则取文章数作为end
3、start必须大于等于0且小于end:0<=start<end
4、end必须小于等于文章数:end<=article_num
:return:
"""
message = ""
article_num = min(article_num, 1000)

if end == -1:
end = article_num
elif end > article_num:
end = article_num

if start < 0 or start >= end:
message = "start error"

return start, end, message


def test():
import random
start = [random.randint(-100, 1500) for _ in range(2000)]
end = [random.randint(-100, 1500) for _ in range(2000)]
article_num = [random.randint(0, 1500) for _ in range(2000)]
success = 0
error = 0
for i in range(2000):
# print(start[i], end[i], article_num[i])
s, e, m = start_end_check(start[i], end[i], article_num[i])
if not m:
if s >= 0 and s < e and e <= article_num[i] and e <= 1000:
success += 1
else:
print(start[i], end[i])
print(s, e)
print("\n")
else:
error += 1
print(success, error, success + error)


if __name__ == '__main__':
test()
3 changes: 1 addition & 2 deletions hexo_circle_of_friends/pipelines/mongodb_pipe.py
Expand Up @@ -139,7 +139,7 @@ def friendlist_push(self):
print("上传数据失败,请检查:%s" % friend.get("link"))
return len(friends), error_num

def friendpoor_push(self,item):
def friendpoor_push(self, item):
item["createdAt"] = today
try:
self.posts.replace_one({"link": item.get("link")}, item, upsert=True)
Expand All @@ -148,4 +148,3 @@ def friendpoor_push(self,item):
print("----------------------")
print(item["author"])
print("《{}》\n文章发布时间:{}\t\t采取的爬虫规则为:{}".format(item["title"], item["created"], item["rule"]))

3 changes: 2 additions & 1 deletion hexo_circle_of_friends/pipelines/pipelines.py
Expand Up @@ -9,8 +9,9 @@
class DuplicatesPipeline:
def __init__(self):
self.data_link_set = set() # 通过链接对post文章数据的去重
self.data_title_set = set() # 通过标题对post文章数据的去重
self.data_title_set = set() # 通过标题对post文章数据的去重
self.friends_set = set() # friends filter set 用于对friends的去重

def process_item(self, item, spider):
if "userdata" in item.keys():
# userdata filter
Expand Down
95 changes: 82 additions & 13 deletions hexo_circle_of_friends/run.py
@@ -1,13 +1,18 @@
# -*- coding:utf-8 -*-
# Author:yyyz
import os
import time
import requests
import schedule

from multiprocessing.context import Process
from scrapy.utils.project import get_project_settings
from scrapy.crawler import CrawlerProcess
from settings import *
import schedule
from multiprocessing.context import Process
import time
import requests
from hexo_circle_of_friends.utils.logger import get_logger

# 日志记录配置
logger = get_logger()


def main():
Expand All @@ -16,7 +21,7 @@ def main():
# init settings
initsettings(setting)
process = CrawlerProcess(setting)
didntWorkSpider = ['xiaoso', ]
didntWorkSpider = []
for spider_name in process.spiders.list():
if spider_name in didntWorkSpider:
continue
Expand All @@ -25,14 +30,77 @@ def main():
process.start()


def settings_friends_json_parse(setting):
import json
def settings_friends_json_parse(json_file, setting):
"""
json格式友链解析,并配置到setting中
:param json_file: 友链字典
:param setting: 配置
:return:
"""
if not json_file.get("friends"):
logger.warning(f"json_api格式错误:没有friends字段")
return
friends = json_file["friends"]
# 数据形式:0:未知;1:普通格式;2:进阶格式
data_type = 0
try:
response = requests.get(setting["SETTINGS_FRIENDS_LINKS"]["json_api"])
friends = json.loads(response.text)["friends"]
setting["SETTINGS_FRIENDS_LINKS"]["list"].extend(friends)
if isinstance(friends[0], list):
data_type = 1
elif isinstance(friends[0], dict):
data_type = 2
except:
pass
logger.warning(f"json_api格式错误:无法判定数据形式")

if data_type == 1:
# 普通格式
setting["SETTINGS_FRIENDS_LINKS"]["list"].extend(friends)
elif data_type == 2:
# 进阶格式
try:
for dic in json_file["friends"]:
link_list = dic["link_list"]
for link in link_list:
# 必须有name、link、avatar字段
name = link.get("name")
friendlink = link.get("link")
avatar = link.get("avatar")
suffix = link.get("suffix")
if name and friendlink and avatar:
friends = [name, friendlink, avatar]
if suffix:
friends.append(suffix)
setting["SETTINGS_FRIENDS_LINKS"]["list"].append(friends)
except:
logger.warning(f"json_api进阶格式解析错误")
else:
logger.warning(f"json_api格式错误:无法判定数据形式")


def settings_friends_json_read(json_api, setting):
"""
判断配置方式,读取json文件
:param json_api: api地址
:param setting: 配置
:return:
"""
import json
# 解析json友链
if json_api.startswith("http"):
# 通过url配置的在线json,发送请求获取
try:
response = requests.get(setting["SETTINGS_FRIENDS_LINKS"]["json_api"])
file = json.loads(response.text)
settings_friends_json_parse(file, setting)
except:
logger.warning(f"在线解析:{json_api} 失败")
elif os.path.isfile(json_api) and json_api.endswith(".json"):
# 如果是json文件的形式配置,直接读取
try:
with open(json_api, "r", encoding="utf-8") as f:
file = json.load(f)
settings_friends_json_parse(file, setting)
except:
logger.warning(f"加载文件:{json_api} 失败")


def sub_process_start():
Expand All @@ -50,8 +118,9 @@ def initsettings(setting):
elif DATABASE == "mongodb":
setting["ITEM_PIPELINES"]["hexo_circle_of_friends.pipelines.mongodb_pipe.MongoDBPipeline"] = 300
# 如果配置了json_api友链,在这里进行获取
if SETTINGS_FRIENDS_LINKS["json_api"].startswith("http"):
settings_friends_json_parse(setting)
if SETTINGS_FRIENDS_LINKS["enable"] and SETTINGS_FRIENDS_LINKS["json_api"]:
json_api = SETTINGS_FRIENDS_LINKS["json_api"]
settings_friends_json_read(json_api, setting)


if __name__ == '__main__':
Expand Down
4 changes: 2 additions & 2 deletions hexo_circle_of_friends/settings.py
Expand Up @@ -33,7 +33,7 @@

# 配置项友链
# enable:# 是否启用配置项友链 True/False(针对还未适配主题或者有定制需求的用户)
# json_api:通过api获取配置项友链,返回格式必须为:{"friends":[[友链1],[友链2],[友链3],[友链4]....]},友链内容同LINK字段格式
# json_api:通过json格式配置友链,详见:https://fcircle-doc.js.cool/#/developmentdoc?id=配置项json友链
# list字段填写格式:["name", "link", "avatar","suffix"],其中:
# name:必填,友链的名字
# link:必填,友链主页地址
Expand Down Expand Up @@ -99,7 +99,7 @@

##############################除非您了解本项目,否则请勿修改以下内容################################

VERSION = "4.3.2"
VERSION = "4.3.3"

# debug
# debug模式
Expand Down

0 comments on commit dd59f3c

Please sign in to comment.