Useful snippets

Posted: Updated: 3 minute read

I will not update this post over time as planned. I have rather started a new collection to be the home for these snippets. Click below to navigate there.

Learn

Here are some snippets I like to have laying around for a rainy day :)

Import

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import chart_studio.plotly as py
import cufflinks as cf
import dash_core_components as dcc
import dash_html_components as html
import country_converter as coco
import geopy

from jupyter_dash import JupyterDash
from dash.dependencies import Input, Output
from plotly.offline import init_notebook_mode, download_plotlyjs, plot, iplot
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

Setup

Matplotlib

pd.set_option("display.max_rows", 85)
pd.set_option("display.max_columns", 85)

Plotly Offline

init_notebook_mode(connected=True)
cf.go_offline()

Templates

Decorator

import functools

def decorator(func):
    @functools.wraps(func)
    def wrapper_decorator(*args, **kwargs):
        # Do something before
        value = func(*args, **kwargs)
        # Do something after
        return value
    return wrapper_decorator

Misc

Check operating system

def os_check():
    from sys import platform

    if platform == "darwin":
        print("macOS")
    elif platform == "win32":
        print("Windows")
    elif platform == "linux" or platform == "linux2":
        print("Linux")
    else:
        print("Oops! Something went wrong.")

Check if path is a file or directory

def path_checker(path):
    from pathlib import Path
    path = Path(path)

    if path.exists():
        if path.is_dir():
            print(f"'{path}' is directory")
        else:
            if path.is_file():
                print(f"'{path}' is a file")
        return True

    else:
        print(f"'{path}' does not exist.")
        return False

Unpack list and return with Oxford comma

def unpack_list(lst):  # Oxford comma
    if not isinstance(lst, str):
        lst = [str(item) for item in lst]
    if len(lst) == 0:
        return
    if len(lst) == 1:
        return ", ".join(lst)
    if len(lst) == 2:
        return ", and ".join(lst) 
    else:
        first_part = lst[:-1]
        last_part = lst[-1]
        return ", ".join(first_part) + ", and " + last_part

Combine two lists to a dictionary

months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
months_alpha = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Des']
dictionary = [{'label': x,'value': y} for x, y in zip(months, months_alpha)]

Pandas

Word search in a DataFrame

def word_search(df, *words):
    if not words or len(words[0]) < 1:
        return

    col_count: int = 0
    sum_words: int = 0
    found_words: str = []

    if isinstance(words[0], str):
        words = [word for word in words]
    else:
        words = list(*words)
        print(words)

    for word in words:
        col_count = 0
        sum_word = 0
        for column in df:
            if df[column].dtype == object or df[column].dtype == str:
                col_count += 1
                sum_word += df[column].str.contains(f"^{word}$").sum()
                if df[column].str.contains(f"^{word}$").any():
                    if word not in found_words:
                        found_words.append(word)
        sum_words += sum_word
    if len(found_words) == 0:
        found_words = words
    print("Columns of dtype str or object:", col_count)
    print(
        f"Instances of {unpack_list(found_words)} in the dataframe: {sum_words}"
    )

Find missing values in a DataFrame

def find_missing_values(df):
    column_names = (df.columns[df.isnull().any() == True]).format()
    miss_columns = df.isna().any().sum()
    miss_values = df.isna().sum().sum()
    print(f"Instances of missing data: {miss_values}")
    print(f"Columns with missing data: {miss_columns}")
    print(f"Column names with missing data: {unpack_list(column_names)}")

Find location of missing values in a DataFrame

def missing_location(df):
    col_criteria = df.isnull().any(axis=0)
    miss_col = df[col_criteria.index[col_criteria]]

    miss_only = miss_col[miss_col.isnull().any(axis=1)]

    row_criteria = df.isnull().any(axis=1)
    miss_row = df[row_criteria]
    
    return miss_col, miss_row, miss_only

Find location data from a DataFrame column

def df_locatinon_data(df, search_col):
    from geopy.geocoders import Nominatim
    from geopy.extra.rate_limiter import RateLimiter
    geolocator = Nominatim(user_agent="my_geocoder")
    geocode = RateLimiter(geolocator.geocode, min_delay_seconds=.1)
    # Find the location.
    df['location'] = df[search_col].apply(geocode)
    # Extract point to its own columns.
    df['point'] = df['location'].apply(lambda loc: tuple(loc.point)
                                       if loc else None)
    # split point column into latitude, longitude and altitude columns.
    df[['latitude', 'longitude',
        'altitude']] = pd.DataFrame(df['point'].tolist(), index=df.index)

    return df

Change name of index or row items, or to datetime.

def replace_df_ax_name(df, find, replace_with="", axis=0):

    dff = df.copy()

    if axis == 1:  # <-- Columns
        dff = dff.T

    dff_row = dff.index.to_list()
    dff_dict = {i: dff_row[i] for i in range(len(dff_row))}

    change_index: list = []
    change_dict: dict = {}

    for i, v in dff_dict.items():
        if find in v:
            change_index.append(i)
            if replace_with == "d_to_datetime":
                v = pd.to_datetime(v)
            else:
                v = v.replace(find, replace_with)
            change_dict[i] = v

    dff_dict.update(change_dict)
    dff.index = list(dff_dict.values())

    if axis == 1:  # <-- Columns
        dff = dff.T

    return dff

Comprehensions

Real Python

List

squares = [i * i for i in range(10)]
sentence = 'the rocket came back from mars'
vowels = [i for i in sentence if i in 'aeiou']

def is_consonant(letter):
    vowels = 'aeiou'
    return letter.isalpha() and letter.lower() not in vowels
consonants = [i for i in sentence if is_consonant(i)]
original_prices = [1.25, -9.45, 10.22, 3.78, -5.92, 1.16]
prices = [i if i > 0 else 0 for i in original_prices]

Set

quote = "life, uh, finds a way"
unique_vowels = {i for i in quote if i in 'aeiou'}

Dictionary

squares = {i: i * i for i in range(10)}
squares

Leave a comment