bobdu.cc

Follow me on GitHub

xueqiu.py

import json
import requests
from models import session, Fangchan


def save_db(r_dict):
    """将一条响应字典内容处理存入MySQL"""
    for foo in r_dict['list']:
        bar = json.loads(foo['data'])

        tid = bar['id']
        title = bar['title']
        description = bar['description']
        target = bar['target']

        fangchan = Fangchan(id=tid, title=title, description=description, target=target)
        session.add(fangchan)

    session.commit()


def get_one_ajax(max_id):
    """获取一条ajax响应json"""
    url = f'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={max_id}&count=10&category=111'
    headers = {
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
        'Cookie': 'aliyungf_tc=AQAAAGqCOzhkvAQAUhVFeTRBE/skwOZG; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.1962889565.1534299758; _gid=GA1.2.83378188.1534299758; _gat_gtag_UA_16079156_4=1; u=201534299759497; device_id=9eff4f9a1adbe00eecae81a94e931c03; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534299759,1534299771,1534299785; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534299785'
    }
    r_dict = requests.get(url, headers=headers).json()
    save_db(r_dict)

    return r_dict


if __name__ == '__main__':
    max_id = -1
    for i in range(100):
        r_dict = get_one_ajax(max_id)
        max_id = r_dict['next_max_id']

models.py

from sqlalchemy import create_engine, Column, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

engine = create_engine('mysql+pymysql://root:123456@127.0.0.1:3306/xueqiu')
Session = sessionmaker(engine)
session = Session()

Base = declarative_base()


class Fangchan(Base):
    """房产数据模型"""
    __tablename__ = 'fangchan'

    id = Column(Integer, primary_key=True)
    title = Column(String(255))
    description = Column(Text)
    target = Column(String(255))


if __name__ == '__main__':

    # 初始化数据库,根据模型自动创建所有表
    Base.metadata.create_all(engine)