Python调用js代码:https://github.com/ZSAIm…
1.pyexecjs
- 安装
pip install PyExecJS - 用法:
import execjs execjs.eval(js代码) 或者 js = js代码 ctx = execjs.compile(js) result = ctx.call(js函数,参数)
- 存在的问题:
- 执行编码的输入输出操作出现报错
- 解决方案,可以把参数使用base64编码一下
- 执行大型js会慢
- 执行编码的输入输出操作出现报错
2.selenium
- 用法
js = “js代码” result = browser.execute_script(js)
3.pyppeteer
用法
await page.evaluate(""" () =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } }) } """)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
- ```python
# -*- coding: utf-8 -*-
import asyncio
import time
from pyppeteer.launcher import launch
'''
pip uninstall websockets
pip install websockets==6.0
'''
from utils_bag.exe_js import js1, js3, js4, js5
from fake_useragent import UserAgent
loop = asyncio.get_event_loop()
async def main(url, img_addr, index):
cookies = []
# browser = await launch({'headless': False, 'args':
# ['--proxy-server='.format(proxy)], 'executablePath':
# 'chrome-win/chrome.exe', })
browser = await launch({'headless': False, 'args': ['--no-sandbox'], 'executablePath': r'chrome-win\chrome.exe', })
page = await browser.newPage()
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
await page.goto(url)
time.sleep(5)
for cookie in cookies:
await page.setCookie(cookie)
await page.goto(url)
await page.evaluateOnNewDocument('() =>{ Object.defineProperties(navigator,'
'{ webdriver:{ get: () => false } }) }')
await page.evaluate(js1)
await page.evaluate(js3)
await page.evaluate(js4)
await page.evaluate(js5)
try:
await page.click('#sufei-dialog-close')
except Exception as e:
print(e)
time.sleep(5)
try:
print('-------------------')
await page.evaluate('''(function () {
var y = 0;
var step = 100;
window.scroll(0, 0);
function f() {
if (y < document.body.scrollHeight) {
y += step;
window.scroll(0, y);
setTimeout(f, 100);
} else {
window.scroll(0, 0);
document.title += "scroll-done";
}
}
setTimeout(f, 1000);
})();
''')
except Exception as err:
print(err)
# js_border = '''
# a1 = document.querySelectorAll('.tshop-pbsm-shop-srch-list .grid .item3line1 .item .detail a.item-name, .tshop-pbsm-shop-srch-list .rmd-bd .item3line1 .item .detail a.item-name, .tshop-pbsm-shop-srch-list .grid .item30line1 .item .detail a.item-name, .tshop-pbsm-shop-srch-list .rmd-bd .item30line1 .item .detail a.item-name, .tshop-pbsm-shop-srch-list .grid .item4line1 .item .detail a.item-name, .tshop-pbsm-shop-srch-list .rmd-bd .item4line1 .item .detail a.item-name');
# a1[{}].style.color="red";
# a1[{}].style.textDecoration='underline';
# a2 = document.querySelectorAll('.tshop-pbsm-shop-srch-list .grid .item3line1 .item .photo a img, .tshop-pbsm-shop-srch-list .rmd-bd .item3line1 .item .photo a img');
# a2[{}].style = 'border-bottom-color:green;border:5px solid black';
# a2[{}].style = 'border-bottom-color:green;border:5px solid black';
# '''.format(index)
# await page.evaluate(js_border)
position = await page.evaluate('''
() => {
a2 = document.querySelectorAll('.tshop-pbsm-shop-srch-list .grid .item3line1 .item .photo a img, .tshop-pbsm-shop-srch-list .rmd-bd .item3line1 .item .photo a img')
p = a2[%s].getBoundingClientRect()
return {
left:p["left"],
right:p["right"],
top:p["top"],
bottom:p["bottom"],
}
}
''' % (index))
time.sleep(10)
await page.screenshot({
"path": '{}'.format(img_addr),
"fullPage": True
})
time.sleep(10)
await browser.close()
return position
def callback(task):
return task.result()
def screenshot(url, img_addr, index=1):
coroutine = main(url, img_addr, index)
task = loop.create_task(coroutine)
task.add_done_callback(callback)
loop.run_until_complete(task)
result = task.result()
return result
if __name__ == '__main__':
screenshot(
'https://shop449187140.taobao.com/search.htm?pageNo=2',
"./sqs.png")
4.nodejs
- 方案
- 所有的python代码直接用js写,不在跨语言
- rpc
- 使用nodejs的Express框架搭建个服务,然后用python调用
- 缺点
- nodejs中没有window对象
- 需要自己创建,或者使用jsdom之类的库
- base64中window.btoa,btoa() 方法用于创建一个base64编码的字符串。
- nodejs中没有window对象
5.pip install PyJSCaller
https://github.com/ZSAIm/PyJSCaller
1 | import jscaller |
6.python
直接引用npm包,解决一些加密包的依赖问题。
我的目录:
1 | │ js_aes.py |
代码:
1 | import execjs |
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来源 desperado!