Monday, February 27, 2017

User Defined kNN Prediction (Python)

# -*- coding: utf-8 -*-
"""
Created on Mon Feb 27 15:45:28 2017

@author: varun
"""
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as ss

def plotting_points(points,p,n, predicted_outcome):
    """Plotting points based on outcomes"""  
    plt.figure()
    plt.plot(points[:n,0], points[:n,1],"ro")
    plt.plot(points[n:,0], points[n:,1],"bo")
 
def distance (p1,p2):
    """Find the distance between two points"""
    import numpy as np
 
    if p1.shape==p2.shape:
        return np.sqrt(np.sum(np.power(p2-p1,2)))
    else:
        print('Distance cannot be found, both arrays have difference dimensions')
     
def majority_vote(votes):
    """Used to calculate the vote with maximum frequency"""
    import random
    vote_counts={}
 
    for vote in votes:
        if vote in vote_counts:
            vote_counts[vote]+=1
        else:
            vote_counts[vote]=1
 
    max_count=max(vote_counts.values())
 
    winner=[]
 
    for vote, count in vote_counts.items():
        if count == max_count:
            winner.append(vote)
    return(random.choice(winner))
     
def kNeareastNeighbor(p, points, k = 5):
    """For implementing kNN of point p and return their indices"""
    distances=np.zeros(points.shape[0])
    for i in range(len(distances)):
        distances[i]=distance(p, points[i])
 
    ind=np.argsort(distances)  
    return(ind[:k])
 
def kNN_Predict_userdefined(p, points, outcomes, k=5):
    ind = kNeareastNeighbor(p, points, k)
    return (majority_vote(outcomes[ind]))

def generate_synthetic_data(n=50):
    """Generate synthetic data for two n X 2 sizes with specific mean and standard deviation (bivarial normal distribution)"""
    points = np.concatenate((ss.norm(0,1).rvs((n,2)),ss.norm(1,1).rvs((n,2))),axis=0)
    outcomes = np.concatenate((np.repeat(0,n),np.repeat(1,n)),axis=0)
    return points, outcomes


--------------------------------------------------------------------------------------------------------------------------
Order of execution

n = 200

points, outcomes = generate_synthetic_data(n)

predicted_outcome = kNN_Predict(p,points, outcomes)

plotting_points(points, p,n,predicted_outcome)

Friday, February 24, 2017

Counting the winner in candidates with specific list of votes (Python)

def majority_vote(votes):
    """Used to calculate the vote with maximum frequency"""
    import random
    vote_counts={}
 
    for vote in votes:
        if vote in vote_counts:
            vote_counts[vote]+=1
        else:
            vote_counts[vote]=1
 
    max_count=max(vote_counts.values())
 
    winner=[]
 
    for vote, count in vote_counts.items():
        if count == max_count:
            winner.append(vote)
    return(random.choice(winner))

Wednesday, February 15, 2017

For Counting words in text(Python)

def count_words_fast(text):
    text=text.replace("\n","")
    text=text.replace("\r","")
    dic={}
   
    for val in text.split(" "):
        if not val in dic:
            dic[val]=1
        else:
            dic[val]+=1
             
    return(dic)
def word_count_distribution(text):
    word_counts={}
    word_counts=count_words_fast(text)

    dict={}
   
    for val in word_counts.values():
        if not val in dict:
            dict[val]=1
        else:
            dict[val]+=1
    return(dict)