Social media scraping

Here you can find code integration examples for basic social media scraping, such as posts, profiles, etc.

Instagram

Scraping a post

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "headless": "html",
    "url": "https://www.instagram.com/p/Ch2hW9-JHTT/"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://www.instagram.com/p/Ch2hW9-JHTT/"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  headless: 'html',
  url: 'https://www.instagram.com/p/Ch2hW9-JHTT/'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Instagram page content</html>"
      "status_code": 200,
      "url": "https://www.instagram.com/p/Ch2hW9-JHTT/",
      "task_id": "6971442143109891073",
      "created_at": "2022-09-02 12:22:10",
      "updated_at": "2022-09-02 12:22:30"
    }
  ]
}

Scraping a profile

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "url": "https://www.instagram.com/eminem/",
    "headless": "html"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"url":"https://www.instagram.com/eminem/","headless":"html"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  url: 'https://www.instagram.com/eminem/',
  headless: 'html'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Instagram page content</html>"
      "status_code": 200,
      "url": "https://www.instagram.com/eminem/",
      "task_id": "6971440655478339585",
      "created_at": "2022-09-02 12:16:15",
      "updated_at": "2022-09-02 12:16:37"
    }
  ]
}

Facebook

Scraping a post

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "headless": "html",
    "url": "https://www.facebook.com/zuck/posts/pfbid0HeY54v4LMcv2EMxDz5RvnWaR6swsGFWikzUbrsEFtvxu9n4GCx7zA2YTM69XdiYnl"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://www.facebook.com/zuck/posts/pfbid0HeY54v4LMcv2EMxDz5RvnWaR6swsGFWikzUbrsEFtvxu9n4GCx7zA2YTM69XdiYnl"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  headless: 'html',
  url: 'https://www.facebook.com/zuck/posts/pfbid0HeY54v4LMcv2EMxDz5RvnWaR6swsGFWikzUbrsEFtvxu9n4GCx7zA2YTM69XdiYnl'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Facebook page content</html>"
      "status_code": 200,
      "url": "https://www.facebook.com/zuck/posts/pfbid0HeY54v4LMcv2EMxDz5RvnWaR6swsGFWikzUbrsEFtvxu9n4GCx7zA2YTM69XdiYnl",
      "task_id": "6972484278999372801",
      "created_at": "2022-09-05 09:23:14",
      "updated_at": "2022-09-05 09:23:32"
    }
  ]
}

Scraping a page

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "url": "https://www.facebook.com/ladygaga"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"url":"https://www.facebook.com/ladygaga"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({target: 'universal', parse: false, url: 'https://www.facebook.com/ladygaga'})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Facebook page content</html>"
      "status_code": 200,
      "url": "https://www.facebook.com/ladygaga",
      "task_id": "6972452679540839425",
      "created_at": "2022-09-05 07:17:40",
      "updated_at": "2022-09-05 07:17:45"
    }
  ]
}

Scraping a group

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "url": "https://www.facebook.com/groups/1394454774138066"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"url":"https://www.facebook.com/groups/1394454774138066"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  url: 'https://www.facebook.com/groups/1394454774138066'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Facebook page content</html>"
      "status_code": 200,
      "url": "https://www.facebook.com/groups/1394454774138066",
      "task_id": "6972486765374350337",
      "created_at": "2022-09-05 09:33:07",
      "updated_at": "2022-09-05 09:33:33"
    }
  ]
}

Twitter

Scraping a post

const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  url: 'https://www.facebook.com/groups/1394454774138066'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://twitter.com/elonmusk/status/1552317587694010368?cxt=HHwWgIC-keOn-IorAAAA","device_type":"desktop"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  headless: 'html',
  url: 'https://twitter.com/elonmusk/status/1552317587694010368?cxt=HHwWgIC-keOn-IorAAAA',
  device_type: 'desktop'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Twitter page content</html>"
      "status_code": 200,
      "url": "https://twitter.com/elonmusk/status/1552317587694010368?cxt=HHwWgIC-keOn-IorAAAA",
      "task_id": "6973211115165632513",
      "created_at": "2022-09-07 09:31:25",
      "updated_at": "2022-09-07 09:31:55"
    }
  ]
}

Scraping a profile

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "headless": "html",
    "url": "https://twitter.com/elonmusk",
    "device_type": "desktop"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://twitter.com/elonmusk","device_type":"desktop"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  headless: 'html',
  url: 'https://twitter.com/elonmusk',
  device_type: 'desktop'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Twitter page content</html>"
      "status_code": 200,
      "url": "https://twitter.com/elonmusk",
      "task_id": "6973191788077014017",
      "created_at": "2022-09-07 08:14:37",
      "updated_at": "2022-09-07 08:15:02"
    }
  ]
}

Tiktok

Scraping a post

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "url": "https://www.tiktok.com/@soukainasing1/video/7139227399035571462"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"url":"https://www.tiktok.com/@soukainasing1/video/7139227399035571462"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  url: 'https://www.tiktok.com/@soukainasing1/video/7139227399035571462'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> TikTok page content</html>"
      "status_code": 200,
      "url": "https://www.tiktok.com/@soukainasing1/video/7139227399035571462",
      "task_id": "6972492376870795265",
      "created_at": "2022-09-05 09:55:25",
      "updated_at": "2022-09-05 09:55:29"
    }
  ]
}

Scraping a profile

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "url": "https://www.tiktok.com/@soukainasing1"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"url":"https://www.tiktok.com/@soukainasing1"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  url: 'https://www.tiktok.com/@soukainasing1'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> TikTok page content</html>"
      "status_code": 200,
      "url": "https://www.tiktok.com/@soukainasing1",
      "task_id": "6972490335716924417",
      "created_at": "2022-09-05 09:47:18",
      "updated_at": "2022-09-05 09:47:22"
    }
  ]
}

You can check out our Github page on social media scraping here