Social media scraping
Here you can find code integration examples for basic social media scraping, such as posts, profiles, etc.
Instagram
Scraping a post
import requests
url = "https://scrape.smartproxy.com/v1/tasks"
payload = {
"target": "universal",
"parse": False,
"headless": "html",
"url": "https://www.instagram.com/p/Ch2hW9-JHTT/"
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://www.instagram.com/p/Ch2hW9-JHTT/"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
headless: 'html',
url: 'https://www.instagram.com/p/Ch2hW9-JHTT/'
})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> Instagram page content</html>"
"status_code": 200,
"url": "https://www.instagram.com/p/Ch2hW9-JHTT/",
"task_id": "6971442143109891073",
"created_at": "2022-09-02 12:22:10",
"updated_at": "2022-09-02 12:22:30"
}
]
}
Scraping a profile
import requests
url = "https://scrape.smartproxy.com/v1/tasks"
payload = {
"target": "universal",
"parse": False,
"url": "https://www.instagram.com/eminem/",
"headless": "html"
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"url":"https://www.instagram.com/eminem/","headless":"html"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
url: 'https://www.instagram.com/eminem/',
headless: 'html'
})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> Instagram page content</html>"
"status_code": 200,
"url": "https://www.instagram.com/eminem/",
"task_id": "6971440655478339585",
"created_at": "2022-09-02 12:16:15",
"updated_at": "2022-09-02 12:16:37"
}
]
}
Facebook
Scraping a post
import requests
url = "https://scrape.smartproxy.com/v1/tasks"
payload = {
"target": "universal",
"parse": False,
"headless": "html",
"url": "https://www.facebook.com/zuck/posts/pfbid0HeY54v4LMcv2EMxDz5RvnWaR6swsGFWikzUbrsEFtvxu9n4GCx7zA2YTM69XdiYnl"
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://www.facebook.com/zuck/posts/pfbid0HeY54v4LMcv2EMxDz5RvnWaR6swsGFWikzUbrsEFtvxu9n4GCx7zA2YTM69XdiYnl"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
headless: 'html',
url: 'https://www.facebook.com/zuck/posts/pfbid0HeY54v4LMcv2EMxDz5RvnWaR6swsGFWikzUbrsEFtvxu9n4GCx7zA2YTM69XdiYnl'
})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> Facebook page content</html>"
"status_code": 200,
"url": "https://www.facebook.com/zuck/posts/pfbid0HeY54v4LMcv2EMxDz5RvnWaR6swsGFWikzUbrsEFtvxu9n4GCx7zA2YTM69XdiYnl",
"task_id": "6972484278999372801",
"created_at": "2022-09-05 09:23:14",
"updated_at": "2022-09-05 09:23:32"
}
]
}
Scraping a page
import requests
url = "https://scrape.smartproxy.com/v1/tasks"
payload = {
"target": "universal",
"parse": False,
"url": "https://www.facebook.com/ladygaga"
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"url":"https://www.facebook.com/ladygaga"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({target: 'universal', parse: false, url: 'https://www.facebook.com/ladygaga'})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> Facebook page content</html>"
"status_code": 200,
"url": "https://www.facebook.com/ladygaga",
"task_id": "6972452679540839425",
"created_at": "2022-09-05 07:17:40",
"updated_at": "2022-09-05 07:17:45"
}
]
}
Scraping a group
import requests
url = "https://scrape.smartproxy.com/v1/tasks"
payload = {
"target": "universal",
"parse": False,
"url": "https://www.facebook.com/groups/1394454774138066"
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"url":"https://www.facebook.com/groups/1394454774138066"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
url: 'https://www.facebook.com/groups/1394454774138066'
})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> Facebook page content</html>"
"status_code": 200,
"url": "https://www.facebook.com/groups/1394454774138066",
"task_id": "6972486765374350337",
"created_at": "2022-09-05 09:33:07",
"updated_at": "2022-09-05 09:33:33"
}
]
}
Twitter
Scraping a post
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
url: 'https://www.facebook.com/groups/1394454774138066'
})
.then(res => console.log(res))
.catch(err => console.error(err));
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://twitter.com/elonmusk/status/1552317587694010368?cxt=HHwWgIC-keOn-IorAAAA","device_type":"desktop"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
headless: 'html',
url: 'https://twitter.com/elonmusk/status/1552317587694010368?cxt=HHwWgIC-keOn-IorAAAA',
device_type: 'desktop'
})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> Twitter page content</html>"
"status_code": 200,
"url": "https://twitter.com/elonmusk/status/1552317587694010368?cxt=HHwWgIC-keOn-IorAAAA",
"task_id": "6973211115165632513",
"created_at": "2022-09-07 09:31:25",
"updated_at": "2022-09-07 09:31:55"
}
]
}
Scraping a profile
import requests
url = "https://scrape.smartproxy.com/v1/tasks"
payload = {
"target": "universal",
"parse": False,
"headless": "html",
"url": "https://twitter.com/elonmusk",
"device_type": "desktop"
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://twitter.com/elonmusk","device_type":"desktop"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
headless: 'html',
url: 'https://twitter.com/elonmusk',
device_type: 'desktop'
})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> Twitter page content</html>"
"status_code": 200,
"url": "https://twitter.com/elonmusk",
"task_id": "6973191788077014017",
"created_at": "2022-09-07 08:14:37",
"updated_at": "2022-09-07 08:15:02"
}
]
}
Tiktok
Scraping a post
import requests
url = "https://scrape.smartproxy.com/v1/tasks"
payload = {
"target": "universal",
"parse": False,
"url": "https://www.tiktok.com/@soukainasing1/video/7139227399035571462"
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"url":"https://www.tiktok.com/@soukainasing1/video/7139227399035571462"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
url: 'https://www.tiktok.com/@soukainasing1/video/7139227399035571462'
})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> TikTok page content</html>"
"status_code": 200,
"url": "https://www.tiktok.com/@soukainasing1/video/7139227399035571462",
"task_id": "6972492376870795265",
"created_at": "2022-09-05 09:55:25",
"updated_at": "2022-09-05 09:55:29"
}
]
}
Scraping a profile
import requests
url = "https://scrape.smartproxy.com/v1/tasks"
payload = {
"target": "universal",
"parse": False,
"url": "https://www.tiktok.com/@soukainasing1"
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
<?php
require_once('vendor/autoload.php');
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
'body' => '{"target":"universal","parse":false,"url":"https://www.tiktok.com/@soukainasing1"}',
'headers' => [
'Accept' => 'application/json',
'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');
sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
target: 'universal',
parse: false,
url: 'https://www.tiktok.com/@soukainasing1'
})
.then(res => console.log(res))
.catch(err => console.error(err));
{
"results": [
{
"content": "<html> TikTok page content</html>"
"status_code": 200,
"url": "https://www.tiktok.com/@soukainasing1",
"task_id": "6972490335716924417",
"created_at": "2022-09-05 09:47:18",
"updated_at": "2022-09-05 09:47:22"
}
]
}
You can check out our Github page on social media scraping here
Updated about 1 year ago