百度统计 API(Python 版)

前言

创建 RSA 公钥

新建 RSA 公钥文件 api_pub.key,然后将以下内容拷贝并保存到该文件中。

1
2
3
4
5
6
-----BEGIN PUBLIC KEY-----
MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDHn/hfvTLRXViBXTmBhNYEIJeG
GGDkmrYBxCRelriLEYEcrwWrzp0au9nEISpjMlXeEW4+T82bCM22+JUXZpIga5qd
BrPkjU08Ktf5n7Nsd7n9ZeI0YoAKCub3ulVExcxGeS3RVxFai9ozERlavpoTOdUz
EH6YWHP4reFfpMpLzwIDAQAB
-----END PUBLIC KEY-----

Python2 代码

以下 Python 代码调用了百度统计的 API 接口,默认会获取今天和昨天的网站概况统计数据,然后通过 Server 酱 将 MarkDown 格式(HTML 表格)的统计数据发送到特定的手机(需绑定 Server 酱的微信公众号)。Linux 系统环境下,配合 Python 脚本 + Crontab 定时任务,即可定时发送统计报表信息到特定的手机上,这样就不再需要频繁登录 Web 版的百度统计管理后台了。请自行替换代码中的 PUBLIC_KEY_FILEUSER_NAMEPASS_WORDTOKENSC_URL 变量值,点击此处可查看移动端的展示效果。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# --*-- coding:utf-8 ---*---

import sys
import json
import requests
import math
import StringIO
import gzip
import rsa
import uuid
import time
import logging
import datetime


reload(sys)
sys.setdefaultencoding('utf8')


UUID = str(uuid.uuid1())
PUBLIC_KEY_FILE = './api_pub.key'
LOG_FILE = "/tmp/baidu_tongji_report.log"


ACCOUNT_TYPE = '1' # 百度统计的账号类型:ZhanZhang:1, FengChao:2, Union:3, Columbus:4
USER_NAME = 'xxxxxxxxx' # 百度统计的用户名
PASS_WORD = 'xxxxxxxxxxxxxxxxxx' # 百度统计的密码
TOKEN = 'xxxxxxxxxxxxxxxxxxxxxxxxxxx' # 百度统计的Token


API_URL = 'https://api.baidu.com/json/tongji/v1/ReportService' # 百度统计的查询接口
LOGIN_URL = 'https://api.baidu.com/sem/common/HolmesLoginService' # 百度统计的登录接口
SC_URL = 'https://sc.ftqq.com/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.send' # Server酱的消息接口


def encrypt(data):
# 加载公钥
with open(PUBLIC_KEY_FILE) as publickfile:
p = publickfile.read()
pubkey = rsa.PublicKey.load_pkcs1_openssl_pem(p)

# 用公钥加密
n = int(math.ceil(len(data) * 1.0 / 117))
ret = ''
for i in range(n):
gzdata = data[i * 117:(i + 1) * 117]
ret += rsa.encrypt(gzdata, pubkey)
return ret


# 解压gzip
def gzdecode(data):
f = StringIO.StringIO(data)
gziper = gzip.GzipFile(fileobj=f, compresslevel=9)
data2 = gziper.read()
gziper.close()
return data2


# 压缩gzip
def gzencode(data):
f = StringIO.StringIO()
gziper = gzip.GzipFile(fileobj=f, mode='wb', compresslevel=9, )
gziper.write(data)
gziper.close()
return f.getvalue()


# 日期解析器
class DateEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.date):
return obj.strftime('%Y-%m-%d')
else:
return json.JSONEncoder.default(self, obj)


# 发送消息
def sendMessage(title, content):
data = {'text': title, 'desp': content}
response = requests.get(SC_URL, params=data)
return response.content


class BaiduTongji(object):
ucid = None
st = None

def __init__(self, username, password, token):
self.username = username
self.password = password
self.token = token

# login
# self.prelogin()
ret = self.dologin()
self.ucid = str(ret['ucid'])
self.st = ret['st']

def prelogin(self):
data = {'username': self.username,
'token': self.token,
'functionName': 'preLogin',
'uuid': UUID,
'request': {'osVersion': 'windows', 'deviceType': 'pc', 'clientVersion': '1.0'},
}

headers = {'UUID': UUID, 'account_type': ACCOUNT_TYPE,
'Content-Type': 'data/gzencode and rsa public encrypt;charset=UTF-8'
}

# 压缩
post_data = gzencode(json.dumps(data))
# 加密
post_data = encrypt(post_data)

resp = requests.post(LOGIN_URL, data=post_data, headers=headers)
ret = json.loads(gzdecode(resp.content[8:]))
print 'prelogin:', ret

def dologin(self):
data = {'username': self.username,
'token': self.token,
'functionName': 'doLogin',
'uuid': UUID,
'request': {'password': self.password}
}

headers = {'UUID': UUID, 'account_type': ACCOUNT_TYPE,
'Content-Type': 'data/gzencode and rsa public encrypt;charset=UTF-8'
}

# 压缩
post_data = gzencode(json.dumps(data))
# 加密
post_data = encrypt(post_data)
# post
resp = requests.post(LOGIN_URL, data=post_data, headers=headers)
ret = json.loads(gzdecode(resp.content[8:]))
if ret['retcode'] == 0:
print u'dologin:', ret['retmsg'], ' ucid:', ret['ucid'], ' st:', ret['st']
return ret

def dologout(self):
data = {'username': self.username,
'token': self.token,
'functionName': 'doLogout',
'uuid': UUID,
'request': {'ucid': self.ucid, 'st': self.st, }
}

headers = {'UUID': UUID, 'account_type': ACCOUNT_TYPE,
'Content-Type': 'data/gzencode and rsa public encrypt;charset=UTF-8'
}

# 压缩
post_data = gzencode(json.dumps(data))
# 加密
post_data = encrypt(post_data)
# post
resp = requests.post(LOGIN_URL, data=post_data, headers=headers)
ret = json.loads(gzdecode(resp.content[8:]))
print 'logout:', ret['retmsg']

def getsitelist(self):
url = API_URL + '/getSiteList'
headers = {'UUID': UUID, 'USERID': self.ucid, 'Content-Type': 'data/json;charset=UTF-8'}
data = {'header': {'username': self.username, 'password': self.st, 'token': self.token,
'account_type': ACCOUNT_TYPE, },
'body': None, }
post_data = json.dumps(data)
resp = requests.post(url, data=post_data, headers=headers)
# print resp.json()
return resp.json()['body']['data'][0]['list']

def getdata(self, para):
url = API_URL + '/getData'
headers = {'UUID': UUID, 'USERID': self.ucid, 'Content-Type': 'data/json;charset=UTF-8'}
data = {'header': {'username': self.username, 'password': self.st, 'token': self.token,
'account_type': ACCOUNT_TYPE, },
'body': para, }

post_data = json.dumps(data, cls=DateEncoder)
resp = requests.post(url, data=post_data, headers=headers)
# print resp.json()
return resp.json()['body']


'''
# 地域分布报告 visit/district/a
# pv_count (浏览量(PV))
# pv_ratio (浏览量占比,%)
# visit_count (访问次数)
# visitor_count (访客数(UV))
# new_visitor_count (新访客数)
# new_visitor_ratio (新访客比率,%)
# ip_count (IP 数)
# bounce_ratio (跳出率,%)
# avg_visit_time (平均访问时长,秒)
# avg_visit_pages (平均访问页数)
# trans_count (转化次数)
# trans_ratio (转化率,%)
# 网站概况 overview/getTimeTrendRpt
# pv_count (浏览量(PV))
# visitor_count (访客数(UV))
# ip_count (IP 数)
# bounce_ratio (跳出率,%)
# avg_visit_time (平均访问时长,秒)
# 趋势分析 trend/time/a
# pv_count (浏览量(PV))
# pv_ratio (浏览量占比,%)
# visit_count (访问次数)
# visitor_count (访客数(UV))
# new_visitor_count (新访客数)
# new_visitor_ratio (新访客比率,%)
# ip_count (IP 数)
# bounce_ratio (跳出率,%)
# avg_visit_time (平均访问时长,秒)
# avg_visit_pages (平均访问页数)
# trans_count (转化次数)
# trans_ratio (转化率,%)
# avg_trans_cost (平均转化成本,元)
# income (收益,元)
# profit (利润,元)
# roi (投资回报率,%)



'''


'''
# Http 请求参数
para = {
'site_id': site_id, # 站点ID
'method': 'trend/time/a', # 趋势分析报告
'start_date': '20170316', # 所查询数据的起始日期
'end_date': '20170320', # 所查询数据的结束日期
'metrics': 'pv_count,visitor_count', # 所查询指标为PV和UV
'max_results': '0', # 返回所有条数
'gran': 'day', # 按天粒度 day/hour/week/month
}
'''


# 查询网站概况的统计数据
def queryOverviewData():
bdtj = BaiduTongji(USER_NAME, PASS_WORD, TOKEN)
sites = bdtj.getsitelist()
site_id = sites[0]['site_id']

today = ''.join(time.strftime("%Y-%m-%d", time.localtime()))
yesterday = datetime.date.today() + datetime.timedelta(-1)
para = {'site_id': site_id,
'method': 'overview/getTimeTrendRpt',
'start_date': yesterday,
'end_date': today,
'metrics': 'pv_count,visitor_count,ip_count,bounce_ratio,avg_visit_time',
'max_results': '0',
'gran': 'day',
}

# 查询数据
data = bdtj.getdata(para)
# print json.dumps(data['data'][0]['result']['items'], indent=4)

# 日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', filename=LOG_FILE)

# 打印查询结果
logging.info(json.dumps(data['data'][0]['result']['items']))

# 今日数据
today_data = data['data'][0]['result']['items'][1][1]
today_date = json.dumps(data['data'][0]['result']['items'][0][1])[7:-2].replace('/','-')

# 昨日数据
yesterday_data = data['data'][0]['result']['items'][1][0]
yesterday_date = json.dumps(data['data'][0]['result']['items'][0][0])[7:-2].replace('/','-')

# 数据格式化
str_format = ''.join((
'<form>'
' <table>',
' <tr>',
' <th>统计日期</th>',
' <th>今天({today_date})</th>',
' <th>昨天({yesterday_date})</th>',
' </tr>\n',
' <tr>',
' <td>浏览量(PV)</td>',
' <td>{today_pv_count} </td>',
' <td>{yesterday_pv_count}</td>',
' </tr>\n\n',
' <tr>',
' <td>访客数(UV)</td>',
' <td>{today_visitor_count} </td>',
' <td>{yesterday_visitor_count}</td>',
' </tr>\n\n',
' <tr>',
' <td>IP数 </td>',
' <td>{today_ip_count} </td>',
' <td>{yesterday_ip_count}</td>',
' </tr>\n\n',
' <tr>',
' <td>跳出率 </td>',
' <td>{today_bounce_ratio}% </td>',
' <td>{yesterday_bounce_ratio}%</td>',
' </tr>\n\n',
' <tr>',
' <td>平均访问时长</td>',
' <td>{today_avg_visit_minute}:{today_avg_visit_second} </td>',
' <td>{yesterday_avg_visit_minute}:{yesterday_avg_visit_second}</td>',
' </tr>',
' </table>',
'</form>'))

report = str_format.format(
today_date=today_date,
today_pv_count=today_data[0],
today_visitor_count=today_data[1],
today_ip_count=today_data[2],
today_bounce_ratio=today_data[3],
today_avg_visit_minute=today_data[4]/60,
today_avg_visit_second=today_data[4] % 60,
yesterday_date=yesterday_date,
yesterday_pv_count=yesterday_data[0],
yesterday_visitor_count=yesterday_data[1],
yesterday_ip_count=yesterday_data[2],
yesterday_bounce_ratio=yesterday_data[3],
yesterday_avg_visit_minute=yesterday_data[4]/60,
yesterday_avg_visit_second=yesterday_data[4] % 60)

# 发送消息
title = ''.join(('百度统计报表(', time.strftime("%m-%d %H:%M", time.localtime()), ')'))
msgResp = sendMessage(title, report)
msgResult = json.loads(msgResp)
if msgResult['errno'] == 0:
logging.info('message send successed!')
else:
logging.error(''.join(('message send faild: ', msgResult)))

if __name__ == '__main__':

queryOverviewData()

Crontab 定时任务

Linux 系统环境下,配合 Python 脚本 + Crontab 定时任务,即可定时发送统计报表信息。

1
2
# 每天晚上23时59分发送统计报表信息
59 23 * * * /usr/bin/python2 /usr/local/baidu-push/baidu_tongji.py

脚本输出的日志信息

1
2
3
4
$ cat /tmp/baidu_tongji_report.log

2020-01-17 22:15:10,563 - www - INFO - [[["2020/01/16"], ["2020/01/17"]], [[218, 65, 65, 76.19, 295], [100, 63, 62, 80.88, 397]], [], []]
2020-01-17 22:15:15,737 - www - INFO - message send successed!

Docker 一键部署统计服务

  • Dockerfile 的内容如下,构建生成 Docker 镜像后,使用命令直接启动 Docker 镜像即可。
  • 使用命令直接启动 Docker 镜像时,需要通过 -v 参数挂载对应的文件(如下)
    • a) 将宿主机里的 RSA 公钥文件挂载到 Docker 容器内的 /usr/local/python_scripts/api_pub.key 位置
    • b) 将宿主机里的 Python 脚本文件挂载到 Docker 容器内的 /usr/local/python_scripts/baidu_tongji.py 位置
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from augurproject/python2-and-3

MAINTAINER clay<656418510@qq.com>

RUN mkdir -p /tmp/baidu

RUN touch /var/log/cron.log

RUN mkdir -p /usr/local/python_scripts

ENV workpath /usr/local/python_scripts

WORKDIR $workpath

RUN echo "Asia/Shanghai" > /etc/timezone
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

RUN cp /etc/apt/sources.list /etc/apt/backup.sources.list
RUN echo "deb http://mirrors.163.com/debian/ stretch main non-free contrib" > /etc/apt/sources.list
RUN echo "deb http://mirrors.163.com/debian/ stretch-updates main non-free contrib" >> /etc/apt/sources.list
RUN echo "deb http://mirrors.163.com/debian/ stretch-backports main non-free contrib" >> /etc/apt/sources.list
RUN echo "deb-src http://mirrors.163.com/debian/ stretch main non-free contrib" >> /etc/apt/sources.list
RUN echo "deb-src http://mirrors.163.com/debian/ stretch-updates main non-free contrib" >> /etc/apt/sources.list
RUN echo "deb-src http://mirrors.163.com/debian/ stretch-backports main non-free contrib" >> /etc/apt/sources.list
RUN echo "deb http://mirrors.163.com/debian-security/ stretch/updates main non-free contrib" >> /etc/apt/sources.list
RUN echo "deb-src http://mirrors.163.com/debian-security/ stretch/updates main non-free contrib" >> /etc/apt/sources.list

RUN apt-get -y update && apt-get -y upgrade
RUN apt-get -y install python-rsa python-requests cron rsyslog vim htop net-tools telnet apt-utils tree wget curl git make gcc
RUN apt-get -y autoclean && apt-get -y autoremove

RUN sed -i "s/#cron./cron./g" /etc/rsyslog.conf

RUN echo "59 23 * * * root /usr/bin/python2 /usr/local/python_scripts/baidu_tongji.py" >> /etc/crontab

CMD service rsyslog start && service cron start && tail -f -n 20 /var/log/cron.log

若通过 Docker-Compose 来管理 Docker 镜像,那么 YML 配置文件的内容如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
version: '3.5'

services:
baidu-push:
image: clay/baidu-push:1.0
container_name: hexo-baidu-push
restart: always
environment:
TZ: 'Asia/Shanghai'
volumes:
- /usr/local/baidu-push/logs:/tmp/baidu
- /usr/local/baidu-push/api_pub.key:/usr/local/python_scripts/api_pub.key
- /usr/local/baidu-push/baidu_tongji.py:/usr/local/python_scripts/baidu_tongji.py

数据卷挂载:

  • /usr/local/baidu-push/logs:宿主机里的日志目录
  • /usr/local/baidu-push/api_pub.key:宿主机里 RSA 公钥文件的路径
  • /usr/local/baidu-push/baidu_tongji.py:宿主机里 Python 脚本文件的路径

移动端的展示效果

baidu-tongji-sc

参考资料