Laravel下使用Guzzle编写多线程爬虫实战

中年以后的男人,时常会觉得孤独,因为他一睁开眼睛,周围都是要依靠他的人,却没有他可以依靠的人
### 创建命令 运行命令行创建命令
1
php artisan make:console MultithreadingRequest --command=test:multithreading-request
### 注册命令 编辑 app/Console/Kernel.php,在 $commands 数组中增加:
1
Commands\MultithreadingRequest::class,
### 测试命令 修改 app/Console/Commands/MultithreadingRequest.php 文件,在 handle 方法中增加:
1
$this->info('hello');

安装 Guzzle

1
composer require guzzlehttp/guzzle "6.2"

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
<?php namespace App\Console\Commands;

use GuzzleHttp\Client;
use GuzzleHttp\Pool;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\Exception\ClientException;
use Illuminate\Console\Command;

class MultithreadingRequest extends Command
{
private $totalPageCount;
private $counter = 1;
private $concurrency = 7; // 同时并发抓取

private $users = ['CycloneAxe', 'appleboy', 'Aufree', 'lifesign',
'overtrue', 'zhengjinghua', 'NauxLiu'];

protected $signature = 'test:multithreading-request';
protected $description = 'Command description';

public function __construct()
{
parent::__construct();
}

public function handle()
{
$this->totalPageCount = count($this->users);

$client = new Client();

$requests = function ($total) use ($client) {
foreach ($this->users as $key => $user) {

$uri = 'https://api.github.com/users/' . $user;
yield function() use ($client, $uri) {
return $client->getAsync($uri);
};
}
};

$pool = new Pool($client, $requests($this->totalPageCount), [
'concurrency' => $this->concurrency,
'fulfilled' => function ($response, $index){

$res = json_decode($response->getBody()->getContents());

$this->info("请求第 $index 个请求,用户 " . $this->users[$index] . " 的 Github ID 为:" .$res->id);

$this->countedAndCheckEnded();
},
'rejected' => function ($reason, $index){
$this->error("rejected" );
$this->error("rejected reason: " . $reason );
$this->countedAndCheckEnded();
},
]);

// 开始发送请求
$promise = $pool->promise();
$promise->wait();
}

public function countedAndCheckEnded()
{
if ($this->counter < $this->totalPageCount){
$this->counter++;
return;
}
$this->info("请求结束!");
}
}

https://laravel-china.org/topics/2130/laravel-under-the-use-of-guzzle-to-prepare-multi-threaded-crawler-combat