Functions

Useful functions
Author

Benedict Thekkel

!pip list | grep fast
fastai                    2.7.14
fastbook                  0.0.29
fastcore                  1.5.29
fastdownload              0.0.7
fastjsonschema            2.19.1
fastprogress              1.0.3

Reading a list for text file


source

read_from_file

 read_from_file (file_path)
Exported source
def read_from_file(file_path):
    from fastbook import search_images_ddg
    from fastdownload import download_url
    import os
    import shutil
    from PIL import Image
    countries = ()
    with open(file_path, 'r') as file:
        for line in file:
            # Remove any leading/trailing whitespace and newline characters
            country = line.strip()
            # Add the country to the tuple
            countries += (country,)

    return countries

Download single pictures


source

download_pic

 download_pic (image:str, n_images:int=1, name:str='', folder:str='',
               show_progress:bool=False, recreate:bool=False)

Downloads the image into the folder provided and displays it

Type Default Details
image str image description
n_images int 1
name str image name
folder str File path of the image
show_progress bool False
recreate bool False
Exported source
def download_pic(
    image:str, #image description
    n_images:int=1,
    name:str='', #image name
    folder:str='',   # File path of the image
    show_progress:bool=False,
    recreate:bool=False
): 
    'Downloads the image into the folder provided and displays it'
    assert isinstance(image, str), "image must be a str."
    assert isinstance(name, str), "name must be a str."
    assert isinstance(folder, str), "folder must be a str."
    assert isinstance(n_images, int), "n_images must be an integer."
    assert isinstance(show_progress, bool), "show_progress must be a bool."
    assert isinstance(recreate, bool), "recreate must be a bool."

    from fastbook import search_images_ddg
    from fastdownload import download_url
    import os
    import shutil
    from PIL import Image    
    from pathlib import Path
    from tqdm.notebook import tqdm

    if folder == '':
        folder = Path()
    else:
        folder = Path(folder)
        
    if name == '': name = image

    image_path = f'{folder}/{name}{0}.jpg'
    # Check if the image file exists
    if recreate is False and os.path.exists(image_path):
        print("Image file exists.")
    else:
        search_links = search_images_ddg(
                        f'{image}',
                        max_images=n_images)

        for i in tqdm(range(n_images)): 
            try:
                image_path = f'{folder}/{name}{i}.jpg' 
                if show_progress == True: print(f"Downloading image_path.{i}")
                download_url(
                    search_links[i], image_path,
                    show_progress=show_progress
                )
            except Exception as e:
                # Code to handle any unhandled exceptions
                print("An error occurred:", e)

    return Image.open(image_path).to_thumb(256,256)
download_pic('bird',
             n_images = 1,
             folder = './Data',
             show_progress=False,
             recreate = True)

Downloading Large Dataset of Images


source

create_searches_folder

 create_searches_folder (folder_path, searches, show_progress:bool=False)
Exported source
def create_searches_folder(folder_path, searches, show_progress:bool=False):
    from tqdm.notebook import tqdm
    print("Create folder")
    for i in tqdm(searches):
        dest = (folder_path/i)
        dest.mkdir(exist_ok=True, parents=True)
        if show_progress == True: print(f'created {i} folder')

source

download_search_image

 download_search_image (folder_path, item, before, after, amount,
                        show_progress:bool=False)
Exported source
def download_search_image(folder_path,
                          item,
                          before,
                          after,
                          amount,
                          show_progress:bool=False,
                         ):
    from fastbook import search_images_ddg
    from fastai.vision.all import download_images
    imgAmount = amount
    try:
        urls=search_images_ddg(f'{before}{item}{after}', imgAmount)
        if show_progress == True: print(f"downloading {imgAmount} images for:{before}{item}{after}")

        download_images(
        folder_path/item,
        urls=urls,
        n_workers=16
        )
        
    except Exception as e:
        # Code to handle any unhandled exceptions
        if show_progress == True: print(f"Error with {imgAmount} images of {before}{item}{after}:", e)
        imgAmount -= 20
        if imgAmount > 0: download_search_image(folder_path, item, before, after, imgAmount)

source

download_search_images

 download_search_images (folder_path, searches, before, after, amount,
                         show_progress:bool=False)
Exported source
def download_search_images(folder_path,
                           searches,
                           before,
                           after,
                           amount,
                           show_progress:bool=False,
                          ):
    from tqdm.notebook import tqdm
    for item in tqdm(searches):
        imgAmount = amount
        download_search_image(folder_path, item, before, after, amount, show_progress)

source

verify_pics

 verify_pics (folder_path)
Exported source
def verify_pics(folder_path,
               ):
    from fastai.vision.all import verify_images, get_image_files
    from pathlib import Path
    
    
    failed = verify_images(get_image_files(folder_path))
    failed.map(Path.unlink)
    print(f"Number of images failed: {len(failed)}")

source

resize_pics

 resize_pics (folder_path, searches, max_size=400, show:bool=True)
Exported source
def resize_pics(folder_path,
                searches,
                max_size=400,
                show:bool=True,
               ):
    from fastai.vision.all import resize_images
    from tqdm.notebook import tqdm
    
    for k in tqdm(searches):
        resize_images(
            folder_path/k,
            max_size=max_size,
            dest=folder_path/k,
            max_workers=16
        )
        if show == True: print(f"resizing images for: {k}")

source

create_data_folder

 create_data_folder (folder_path:str, searches:tuple, before:str='',
                     after:str='', amount:int=200, recreate:bool=False,
                     show_progress:bool=False)

generate image data

Exported source
def create_data_folder(
    folder_path:str,
    searches:tuple,
    before:str='',
    after:str='',
    amount:int=200,
    recreate:bool=False,
    show_progress:bool=False,
):
    'generate image data'
    assert isinstance(searches, tuple), "searches must be a list."
    assert isinstance(amount, int), "amount must be an int."
    assert isinstance(recreate, bool), "recreate must be a bool."
    assert isinstance(before, str), "before must be a str."
    assert isinstance(after, str), "after must be a str."


    
    from fastdownload import download_url
    from fastai.vision.all import download_images, verify_images
    import os
    import shutil
    from PIL import Image
    from pathlib import Path

    folder_path = Path(folder_path)
    
    if recreate is False and os.path.exists(folder_path):
        print(f"Folder already exists: {folder_path}") 
    else:   
        if recreate is True and os.path.exists(folder_path): 
            shutil.rmtree(folder_path)
        create_searches_folder(folder_path, searches, show_progress)
        download_search_images(folder_path, searches, before, after, amount, show_progress)
        verify_pics(folder_path)
        resize_pics(folder_path, searches)
searches = ('forests','birds')
path = 'Data/bird_or_not'

create_data_folder(folder_path=path,
                             searches=searches,
                             amount=1,
                             recreate= True,
                             show_progress=True)
Create folder
created forests folder
created birds folder
downloading 1 images for:forests
downloading 1 images for:birds
Number of images failed: 0
resizing images for: forests
resizing images for: birds

Classifying Images


source

classify_images

 classify_images (learn, img)

image classifer

Exported source
def classify_images(learn, img):
    'image classifer'
    from fastai.vision.all import PILImage
    
    categories = learn.dls.vocab
    pred,idx,probs = learn.predict(PILImage.create(img))
    rounded_probs = [round(float(prob*100), 5) for prob in probs]
    return dict(zip(categories, rounded_probs))

Kaggle Shortcut


source

kaggle_competition_download

 kaggle_competition_download (name:str, folderpath:str='./Data')

download competition files from kaggle

Exported source
def kaggle_competition_download(name:str, folderpath:str = './Data'):
    'download competition files from kaggle'
    import os
    import shutil
    from pathlib import Path

    
    iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
    if iskaggle: path = Path(f'../input/{name}')
    else:
        path = Path(f'{folderpath}/{name}')
        if path.exists():print("file exists")
        else:
            import zipfile,kaggle
            kaggle.api.competition_download_cli(competition = name, path = path)
            zipfile.ZipFile(f'{path}/{name}.zip').extractall(path)

source

kaggle_dataset_download

 kaggle_dataset_download (user:str, name:str, folderpath:str='./Data')

download competition files from kaggle

Exported source
def kaggle_dataset_download(user:str,
                            name:str,
                            folderpath:str = './Data'):
    'download competition files from kaggle'
    import os
    import shutil
    from pathlib import Path

    iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
    if iskaggle: path = Path(f'../input/{name}')
    else:
        path = Path(f'{folderpath}/{name}')
        if path.exists():print("file exists")
        else:
            import zipfile,kaggle
            kaggle.api.dataset_download_files(dataset = f'{user}/{name}', path = path)
            zipfile.ZipFile(f'{path}/{name}.zip').extractall(path)

Graphviz


source

graph

 graph (*args, **kwargs)

*Directed graph source code in the DOT language.

Args: name: Graph name used in the source code. comment: Comment added to the first line of the source. filename: Filename for saving the source (defaults to name + '.gv'). directory: (Sub)directory for source saving and rendering. format: Rendering output format ('pdf', 'png', …). engine: Layout command used ('dot', 'neato', …). renderer: Output renderer used ('cairo', 'gd', …). formatter: Output formatter used ('cairo', 'gd', …). encoding: Encoding for saving the source. graph_attr: Mapping of (attribute, value) pairs for the graph. node_attr: Mapping of (attribute, value) pairs set for all nodes. edge_attr: Mapping of (attribute, value) pairs set for all edges. body: Iterable of verbatim lines (including their final newline) to add to the graph body. strict (bool): Rendering should merge multi-edges.

Note: All parameters are optional and can be changed under their corresponding attribute name after instance creation.*

Exported source
from graphviz import Digraph
Exported source
class graph(Digraph):
    def __init__(self, *args, **kwargs):
        # Call the superclass's __init__ method using super() and pass all arguments
        super().__init__(*args, **kwargs)
        self.primary = '#fdfcdc'
        self.secondary = '#fcbf49'
        self.third = '#f77f00'
        self.fourth = '#d62828'
        self.fifth = '#003049'

        
        self.graph_attr.update(style='rounded,filled',
                               rankdir='LR',
                               compound='true',
                               fillcolor=self.fourth,
                               fontcolor=self.fifth,
                               penwidth = '0',
                               fontname = "Helvetica,Arial,sans-serif")
        
        self.node_attr.update(style='rounded,filled',
                              size='8,5', shape='box',
                              width='1.5',
                              fillcolor=self.secondary,
                              fontcolor=self.fifth,
                              penwidth = '0',
                              fontname = "Helvetica,Arial,sans-serif") 
        
        self.edge_attr.update(arrowhead='vee',
                              arrowsize='1',
                              color=self.fifth,
                              len='1.00',
                              fontname = "Helvetica,Arial,sans-serif",
                              penwidth='1')
        self.graph_attr.update()
dot = graph()
# Add nodes with different shapes and formatting
dot.node('x', 'x')
dot.node('a', 'a(x)', shape='circle')
dot.node('y', 'y')
dot.node('b', 'b(y)', shape='circle')
dot.node('z', 'z')

# Add edges with custom labels and formatting
dot.edge('x', 'a')
dot.edge('a', 'y')
dot.edge('y', 'b')
dot.edge('b', 'z')

# Render the graph
dot

Back to top