Monday, February 27, 2017

User Defined kNN Prediction (Python)

# -*- coding: utf-8 -*-
"""
Created on Mon Feb 27 15:45:28 2017

@author: varun
"""
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as ss

def plotting_points(points,p,n, predicted_outcome):
    """Plotting points based on outcomes"""  
    plt.figure()
    plt.plot(points[:n,0], points[:n,1],"ro")
    plt.plot(points[n:,0], points[n:,1],"bo")
 
def distance (p1,p2):
    """Find the distance between two points"""
    import numpy as np
 
    if p1.shape==p2.shape:
        return np.sqrt(np.sum(np.power(p2-p1,2)))
    else:
        print('Distance cannot be found, both arrays have difference dimensions')
     
def majority_vote(votes):
    """Used to calculate the vote with maximum frequency"""
    import random
    vote_counts={}
 
    for vote in votes:
        if vote in vote_counts:
            vote_counts[vote]+=1
        else:
            vote_counts[vote]=1
 
    max_count=max(vote_counts.values())
 
    winner=[]
 
    for vote, count in vote_counts.items():
        if count == max_count:
            winner.append(vote)
    return(random.choice(winner))
     
def kNeareastNeighbor(p, points, k = 5):
    """For implementing kNN of point p and return their indices"""
    distances=np.zeros(points.shape[0])
    for i in range(len(distances)):
        distances[i]=distance(p, points[i])
 
    ind=np.argsort(distances)  
    return(ind[:k])
 
def kNN_Predict_userdefined(p, points, outcomes, k=5):
    ind = kNeareastNeighbor(p, points, k)
    return (majority_vote(outcomes[ind]))

def generate_synthetic_data(n=50):
    """Generate synthetic data for two n X 2 sizes with specific mean and standard deviation (bivarial normal distribution)"""
    points = np.concatenate((ss.norm(0,1).rvs((n,2)),ss.norm(1,1).rvs((n,2))),axis=0)
    outcomes = np.concatenate((np.repeat(0,n),np.repeat(1,n)),axis=0)
    return points, outcomes


--------------------------------------------------------------------------------------------------------------------------
Order of execution

n = 200

points, outcomes = generate_synthetic_data(n)

predicted_outcome = kNN_Predict(p,points, outcomes)

plotting_points(points, p,n,predicted_outcome)

Friday, February 24, 2017

Counting the winner in candidates with specific list of votes (Python)

def majority_vote(votes):
    """Used to calculate the vote with maximum frequency"""
    import random
    vote_counts={}
 
    for vote in votes:
        if vote in vote_counts:
            vote_counts[vote]+=1
        else:
            vote_counts[vote]=1
 
    max_count=max(vote_counts.values())
 
    winner=[]
 
    for vote, count in vote_counts.items():
        if count == max_count:
            winner.append(vote)
    return(random.choice(winner))

Wednesday, February 15, 2017

For Counting words in text(Python)

def count_words_fast(text):
    text=text.replace("\n","")
    text=text.replace("\r","")
    dic={}
   
    for val in text.split(" "):
        if not val in dic:
            dic[val]=1
        else:
            dic[val]+=1
             
    return(dic)
def word_count_distribution(text):
    word_counts={}
    word_counts=count_words_fast(text)

    dict={}
   
    for val in word_counts.values():
        if not val in dict:
            dict[val]=1
        else:
            dict[val]+=1
    return(dict)   

Monday, December 19, 2016

Running Average Query (Python)

# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""
import random

random.seed(1)

def moving_window_average(x, n_neighbors=1):
    n=len(x)
    width = n_neighbors*2 + 1
    y=[]
    for i in range(-n_neighbors,n-n_neighbors):
        temp=0
        for k in range(i,i+width):
            if(k<0):
                #print(str(k) + " " + str(x[0]))
                temp=temp+x[0]
            elif(k>n-1):
                #print(str(k) + " " + str(x[n-1]))
                temp=temp+x[n-1]
            else:
                #print(str(k) + " " + str(x[k]))
                temp=temp+x[k]
        #print(round(temp/width,2))
        y.append(round(temp/width,2))
        temp=0
    print(y)

In case you are checking in DataCamp, it will say solution is not correct unless we remove round function call. :)