Skip to content

[Feature request] Make async: True do everything under the hood #51

@dimitryzub

Description

@dimitryzub

From a user perspective, the less setup required the better. I personally find the second example (example.py) more user-friendly especially for non-very technical users.

The user has to just add an async: True and don't bother tinkering/figuring out stuff for another ~hour about how Queue or something else works.

@jvmvik @ilyazub @hartator what do you guys think?

@aliayar @marm123 @schaferyan have you guys noticed similar issues for the users or have any users requested similar things?


What if instead of this:

# async batch requests: https://github.com/serpapi/google-search-results-python#batch-asynchronous-searches

from serpapi import YoutubeSearch
from queue import Queue
import os, re, json

queries = [
    'burly',
    'creator',
    'doubtful'
]

search_queue = Queue()

for query in queries:
    params = {
        'api_key': '...',                 
        'engine': 'youtube',              
        'device': 'desktop',              
        'search_query': query,          
        'async': True,                   # ❗
        'no_cache': 'true'
    }
    search = YoutubeSearch(params)       
    results = search.get_dict()         
    
    if 'error' in results:
        print(results['error'])
        break

    print(f"Add search to the queue with ID: {results['search_metadata']}")
    search_queue.put(results)

data = []

while not search_queue.empty():
    result = search_queue.get()
    search_id = result['search_metadata']['id']

    print(f'Get search from archive: {search_id}')
    search_archived = search.get_search_archive(search_id)
    
    print(f"Search ID: {search_id}, Status: {search_archived['search_metadata']['status']}")

    if re.search(r'Cached|Success', search_archived['search_metadata']['status']):
        for video_result in search_archived.get('video_results', []):
            data.append({
                'title': video_result.get('title'),
                'link': video_result.get('link'),
                'channel': video_result.get('channel').get('name'),
            })
    else:
        print(f'Requeue search: {search_id}')
        search_queue.put(result)

Users can do something like this and we handle everything under the hood:

# example.py
# testable example
# example import: from serpapi import async_search

from async_search import async_search
import json

queries = [
    'burly',
    'creator',
    'doubtful',
    'minecraft' 
]

# or as we typically pass params dict
data = async_search(queries=queries, api_key='...', engine='youtube', device='desktop')

print(json.dumps(data, indent=2))
print('All searches completed')

Under the hood code example:

# async_search.py
# testable example

from serpapi import YoutubeSearch
from queue import Queue
import os, re

search_queue = Queue()

def async_search(queries, api_key, engine, device):
    data = []
    for query in queries:
        params = {
            'api_key': api_key,                 
            'engine': engine,              
            'device': device,              
            'search_query': query,          
            'async': True,                  
            'no_cache': 'true'
        }
        search = YoutubeSearch(params)       
        results = search.get_dict()         
        
        if 'error' in results:
            print(results['error'])
            break

        print(f"Add search to the queue with ID: {results['search_metadata']}")
        search_queue.put(results)

    while not search_queue.empty():
        result = search_queue.get()
        search_id = result['search_metadata']['id']

        print(f'Get search from archive: {search_id}')
        search_archived = search.get_search_archive(search_id)
        
        print(f"Search ID: {search_id}, Status: {search_archived['search_metadata']['status']}")

        if re.search(r'Cached|Success', search_archived['search_metadata']['status']):
            for video_result in search_archived.get('video_results', []):
                data.append({
                    'title': video_result.get('title'),
                    'link': video_result.get('link'),
                    'channel': video_result.get('channel').get('name'),
                })
        else:
            print(f'Requeue search: {search_id}')
            search_queue.put(result)
            
    return data

Is there a specific reason we haven't done it before?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions