# import required libraries

import pandas as pd 

# for visualization

import plotly.express as px


# CRISP-DM Model
# Step1: Business Understanding

# Step2: Data Understanding

# Step3: Data Preparation

# There was a data entry error in line 11 in dataset. A backslash sign ('/') in 'activity' column prevented the data 
# to be read by Python. It had to be removed manually.

df_kiva = pd.read_csv('data_abschlussprojekt.csv', sep='#')
df_kiva


# dealing with duplicates 
duplicate_rows = df_kiva[df_kiva.duplicated()]

# Print the result
print("Duplicate rows:")
print(duplicate_rows)

Duplicate rows:
Empty DataFrame
Columns: [Unnamed: 0,  funded_amount,  loan_amount,  activity,  sector,  use,  country_code,  country,  region,  currency,  term_in_months,  lender_count,  borrower_genders,  repayment_interval]
Index: []


# Calculate the amount of missing values
df_kiva.isnull().sum()

# according to the result we have four set of missing values

Unnamed: 0                 0
 funded_amount             0
 loan_amount               0
 activity                  0
 sector                    0
 use                    4232
 country_code              8
 country                   0
 region                56800
 currency                  0
 term_in_months            0
 lender_count              0
 borrower_genders       4221
 repayment_interval        0
dtype: int64


df_kiva.columns

Index(['Unnamed: 0', ' funded_amount', ' loan_amount', ' activity', ' sector',
       ' use', ' country_code', ' country', ' region', ' currency',
       ' term_in_months', ' lender_count', ' borrower_genders',
       ' repayment_interval'],
      dtype='object')


# There is a space before the name of each column. First we remove them to avoid future errors
df_kiva.columns = df_kiva.columns.str.strip()
df_kiva.columns

Index(['Unnamed: 0', 'funded_amount', 'loan_amount', 'activity', 'sector',
       'use', 'country_code', 'country', 'region', 'currency',
       'term_in_months', 'lender_count', 'borrower_genders',
       'repayment_interval'],
      dtype='object')


# we replcae the with correct value
missing_country_code_rows = df_kiva[df_kiva['country_code'].isnull()]

# Display the resulting DataFrame
missing_country_code_rows


# Namibia is missing its country_code we replace it with NM
df_kiva.loc[missing_country_code_rows.index, 'country_code'] = "NM"

# verify the 'country_code' replacement
df_kiva[df_kiva['country'] == 'Namibia']


# there are a number of values in 'borrower_genders' column
df_kiva['borrower_genders'].unique()

array(['female', 'female, female', 'female, female, female', ...,
       'female, female, male, female, female, female, female, female, female, female, male, male, female, female, male, female, female, female, female, female, female, female',
       'male, female, female, female, female, female, female, female, male, male, female, male, female, male, male, male',
       'female, female, female, male, female, female, female, male, female, female, female, male, female, male, female, female, female, female, female, female, female, female, female, female, female, female, female, female, male'],
      dtype=object)


# we choose four unique values for this column: male, female, mixed (when there are both male and female), unknown for 
# missing values

def simplify_genders(gender_string):
    if pd.isna(gender_string):
        return 'unknown'
    
    genders = gender_string.split(', ')
    
    if 'male' in genders and 'female' not in genders:
        return 'male'
    elif 'female' in genders and 'male' not in genders:
        return 'female'
    else:
        return 'mixed'

# Apply the function to the 'borrower_genders' column
df_kiva['borrower_genders'] = df_kiva['borrower_genders'].apply(simplify_genders)

# Display the unique values in the simplified 'borrower_genders' column
print(df_kiva['borrower_genders'].unique())

['female' 'male' 'mixed' 'unknown']


# There are a lot of similar data in column 'use' in DataFrame that reprsent the same value. 
# Thay could be converted to one value with Text Mining methods.

df_kiva['use'].mode()

value_counts = df_kiva['use'].value_counts()

# Print the result
print("Value counts:")
value_counts

Value counts:

use
to buy a water filter to provide safe drinking water for their family.                              5217
to buy a water filter to provide safe drinking water for her family.                                4082
To buy a water filter to provide safe drinking water for their family.                              2141
to build a sanitary toilet for her family.                                                          1708
to build a sanitary toilet for her family                                                           1599
                                                                                                    ... 
to acquire furniture, equipment and beauty products (makeup, an iron, sprays, hair creams, etc.)       1
to purchase materials like cloth, needles, zippers, and duck feed                                      1
to buy farm tools as well as manure and fertilizer to maintain his crops.                              1
to launch her own shoe store in her village                                                            1
to buy differently sized pots                                                                          1
Name: count, Length: 424912, dtype: int64


# Handle NaN values in 'use' column
df_kiva['use'] = df_kiva['use'].fillna("unknown")


df_kiva['region'] = df_kiva['region'].fillna("unknown")


df_kiva.isnull().sum()

Unnamed: 0            0
funded_amount         0
loan_amount           0
activity              0
sector                0
use                   0
country_code          0
country               0
region                0
currency              0
term_in_months        0
lender_count          0
borrower_genders      0
repayment_interval    0
dtype: int64


df_kiva_s = df_kiva.copy(deep=True)
df_kiva_s


df_kiva_s['repayment_interval'].unique()

array(['irregular', 'bullet', 'monthly', 'weekly'], dtype=object)


# This figure shows there is a outlier in our data

fig_sct = px.scatter(df_kiva_s, x="lender_count", y="funded_amount", color="repayment_interval", 
                     title="Relation between the number of lender and requested funding")

# If you print the figure, you'll see that it's just a regular figure with data and layout
# print(fig)

fig_sct.show()


# The figure without outliers
# The figure shows there is no project with requested amount of higher than 10K USD without any lender 
# So lenders are tent to invest on more expensive projects

df_kiva_no_outlier = df_kiva_s[df_kiva_s['lender_count'] != 2986]
fig_sct2 = px.scatter(df_kiva_no_outlier, 
                     x="lender_count", 
                     y="funded_amount", 
                     color="repayment_interval", 
                     title="Relation between the number of lender and requested funding"
                    )

# If you print the figure, you'll see that it's just a regular figure with data and layout
# print(fig)

fig_sct2.show()


# Business Question: Are the reuested amount for the project (funded_amount) was allocated to the project (loan_amount)?
# Wurde der beantragte Betrag für das Projekt (funded_amount) dem Projekt (loan_amount) zugewiesen?

fig_sct3 = px.scatter(df_kiva_no_outlier, 
                      x="loan_amount", 
                      y="funded_amount", 
                      
                      color="lender_count", 
                      title="Was the target amount funded as desired?"
                     ).update_layout(xaxis_title="Target amount (USD)", yaxis_title="Funded Amount")


# If you print the figure, you'll see that it's just a regular figure with data and layout
# print(fig)

fig_sct3.show()


df_aggregated = df_kiva_s.groupby(['country', 'repayment_interval'], as_index=False)['funded_amount'].sum()

fig_bar = px.bar(df_aggregated, x='country', y='funded_amount', color='repayment_interval',
             title='Total Funded Amount by Country and Repayment Interval',
             labels={'funded_amount': 'Total Funded Amount (USD)', 'repayment_interval': 'Repayment Interval'},
             height=700,
             category_orders={'country': df_aggregated['country'].sort_values(ascending=True).unique()}
            )

fig_bar.show()


# I present a heatmap here to show the funded amount that is allocated to each country in each sector.
# For a heatmap we need a matrix dataset that we create it through pivot table 


# Create a pivot table
pivot_table_kiva = df_kiva_s.pivot_table(columns='country_code', index='sector', 
                                         values='funded_amount', aggfunc='sum', fill_value=0
                                        )

# Display the resulting pivot table
print(pivot_table_kiva)

country_code       AF      AL       AM       AZ       BF       BI      BJ  \
sector                                                                      
Agriculture         0  976925  6607450  1556375   614025    95825   50375   
Arts            14000    8375    63225     8450    47550     4700     300   
Clothing            0  153925   288725    49900   368550   299875    7625   
Construction        0   35325   146725    66450     8950    40675       0   
Education           0  117075  1494725    55325        0        0       0   
Entertainment       0    5225    18675        0        0        0       0   
Food                0  107375   449800   374825  1215600  1439675  185625   
Health              0  329400   855950    92050     1825     1750       0   
Housing             0  436175    40725    49500        0        0       0   
Manufacturing       0   33675   119550    33150    18975     7925    2625   
Personal Use        0   87550   147900    20750     1975        0       0   
Retail              0   52250   314250   118525   515975   613900  248850   
Services            0   83100   445975   155600   115550    54225   21425   
Transportation      0   50875   170925   115825     1000        0       0   
Wholesale           0   12750    22075     2850        0        0       0   

country_code         BO      BR     BT  ...     VC  VI       VN    VU  \
sector                                  ...                             
Agriculture     2050000   32625      0  ...   6275   0  4714375     0   
Arts             958975   30925  15625  ...   1275   0    11400     0   
Clothing        2127025  112400      0  ...   3900   0   221625     0   
Construction     284200   31925      0  ...      0   0   112625     0   
Education        499775       0      0  ...      0   0  1286325     0   
Entertainment     41425       0      0  ...   3725   0    12950     0   
Food            5823575   97625      0  ...  14400   0  2589775     0   
Health           602975    3550      0  ...      0   0   113625     0   
Housing          434050       0      0  ...      0   0  2860700     0   
Manufacturing    319250     600      0  ...   4000   0   248750     0   
Personal Use      93550       0      0  ...      0   0    67650  9250   
Retail          2991775  196525      0  ...   3650   0   920025     0   
Services        1711550  149575      0  ...  12000   0   472275     0   
Transportation   334675    1200      0  ...      0   0    18950     0   
Wholesale          3400    4075      0  ...      0   0    10650     0   

country_code         WS      XK      YE      ZA      ZM      ZW  
sector                                                           
Agriculture     1870975  616100   75700   28850   93450  828450  
Arts             458575  110300    4075    6850       0   22400  
Clothing          37500   26950  134700    5700    1250  814450  
Construction      13800  100200  100750   10150       0   22700  
Education         20550    3800    2450  302225  109925   40500  
Entertainment     32875   20225       0       0       0       0  
Food            1766150  113400  248275   72175   61225  560475  
Health                0    6275   32925    1250    9400    6700  
Housing           14625  306750  303075   13475       0       0  
Manufacturing      1275   29175    8425   11975   12750   66925  
Personal Use      26400    4700  399775       0   68800    1400  
Retail           826975   66400  209300   29575   61675  765000  
Services         341550  280425  123000   55550  700650  209875  
Transportation   230175   89725  141000   33500       0   11550  
Wholesale           400    4175     625    2750   28825   22300  

[15 rows x 87 columns]


# There are 87 columns in the obove matrix 
# In order to show the heatmap better, the pivot table is divided to three 
# so one third of the countries are presented in each pivot table

country_codes = pivot_table_kiva.columns.tolist()

# Split the country codes into three parts
third_length = len(country_codes) // 3
first_third_countries = country_codes[:third_length]
second_third_countries = country_codes[third_length:2*third_length]
third_third_countries = country_codes[2*third_length:]

# Create three smaller pivot tables
pivot_table_kiva1 = pivot_table_kiva[first_third_countries]
pivot_table_kiva2 = pivot_table_kiva[second_third_countries]
pivot_table_kiva3 = pivot_table_kiva[third_third_countries]

# Display the resulting pivot tables
print("************* pivot_table_kiva1:***************")
print(pivot_table_kiva1)

print("\n************* pivot_table_kiva2:***************")
print(pivot_table_kiva2)

print("\n************* pivot_table_kiva3:***************")
print(pivot_table_kiva3)

************* pivot_table_kiva1:***************
country_code       AF      AL       AM       AZ       BF       BI      BJ  \
sector                                                                      
Agriculture         0  976925  6607450  1556375   614025    95825   50375   
Arts            14000    8375    63225     8450    47550     4700     300   
Clothing            0  153925   288725    49900   368550   299875    7625   
Construction        0   35325   146725    66450     8950    40675       0   
Education           0  117075  1494725    55325        0        0       0   
Entertainment       0    5225    18675        0        0        0       0   
Food                0  107375   449800   374825  1215600  1439675  185625   
Health              0  329400   855950    92050     1825     1750       0   
Housing             0  436175    40725    49500        0        0       0   
Manufacturing       0   33675   119550    33150    18975     7925    2625   
Personal Use        0   87550   147900    20750     1975        0       0   
Retail              0   52250   314250   118525   515975   613900  248850   
Services            0   83100   445975   155600   115550    54225   21425   
Transportation      0   50875   170925   115825     1000        0       0   
Wholesale           0   12750    22075     2850        0        0       0   

country_code         BO      BR     BT  ...       DO       EC      EG      GE  \
sector                                  ...                                     
Agriculture     2050000   32625      0  ...     6825  5020525  420175  752625   
Arts             958975   30925  15625  ...        0   231875    4500    7650   
Clothing        2127025  112400      0  ...   235600  1652100  108725  424400   
Construction     284200   31925      0  ...     2225   122725   61950   30800   
Education        499775       0      0  ...  1055825    58100       0   61650   
Entertainment     41425       0      0  ...        0     7350     600    3125   
Food            5823575   97625      0  ...   350050  3124675  272525  733250   
Health           602975    3550      0  ...        0    77100     350   29350   
Housing          434050       0      0  ...     8875    50950       0  168175   
Manufacturing    319250     600      0  ...        0   182200    7150   46575   
Personal Use      93550       0      0  ...        0    23425       0     675   
Retail          2991775  196525      0  ...   276000  2954725  121500  379200   
Services        1711550  149575      0  ...   148100   944625   81050  241925   
Transportation   334675    1200      0  ...        0   141550       0  490100   
Wholesale          3400    4075      0  ...        0     6975    6400       0   

country_code         GH       GT   GU       HN      HT       ID  
sector                                                           
Agriculture      538950  2506750    0  2839350  251700  1076075  
Arts             157275  1721900    0    19900   86150   308100  
Clothing         469600  1569975    0   270850  187350    15125  
Construction      40925    87525    0    40875   64225    14075  
Education        127825    26175    0   255275   10875   155000  
Entertainment      2625     6475    0      475       0     1600  
Food            1695050  1919575    0  1191300  735975   273275  
Health           113400    99675    0    20500   36050     5475  
Housing           16800   561125    0    51150       0   836775  
Manufacturing     81850    82225    0    23025   62575     6200  
Personal Use      31575    79375    0   438025  128525   244500  
Retail          1209600  1731050  395   352100  865300  1006900  
Services         299300   483225    0   132450  111700   591675  
Transportation     3600    49975    0    30650    1700    11300  
Wholesale          4100     9950    0     2000  107350      950  

[15 rows x 29 columns]

************* pivot_table_kiva2:***************
country_code        IL       IN       IQ       JO        KE       KG       KH  \
sector                                                                          
Agriculture       7450  1973775    11725   215150  16484185  5558375  9061800   
Arts             10450   503325    20000   107950    106100    23400   170300   
Clothing        139800   276850    56375   345675   1547850   142800    35700   
Construction         0    47575    50500    37250    305765     3175   151475   
Education        32450   325050   651200  1975975    905825   375975  1685875   
Entertainment    22425     3375     7300     4125     14450        0     5925   
Food            128850  1249100   295350   570475   4603345   191525  1017875   
Health           13775    26100    13000     5550    682550        0    29025   
Housing              0   651950     3000        0    130550   221225  1755925   
Manufacturing        0   174200    47100    52625    232325     7850    33000   
Personal Use      5750   134950    14400     1150    362700     3675  4191275   
Retail           94050   515150   151925   619050   4232640   138825   254725   
Services        262450   536200  1289350   485150   1896535    58725   181475   
Transportation    2000    44850        0     1025    729835     3000   236450   
Wholesale            0     4400        0     4375     13750        0     6275   

country_code         LA       LB      LR  ...      MZ      NG       NI     NM  \
sector                                    ...                                   
Agriculture           0   298200    1225  ...  224675  991450  1151900      0   
Arts                  0   203700       0  ...     725       0    83650      0   
Clothing              0   933975  125575  ...   60500       0   979600      0   
Construction          0   239750   22025  ...   37150    1450   133350      0   
Education             0  3081000       0  ...    9775  278875  1034925      0   
Entertainment         0    39150    5800  ...       0       0    12125      0   
Food                  0  1551875  569450  ...  103000   90725  2509375      0   
Health            15950   777350   20375  ...    2600       0   112825      0   
Housing            1175   475700       0  ...  757625       0  1158950      0   
Manufacturing         0   177375    5675  ...    2900       0    95325      0   
Personal Use    1073500   658525    2275  ...   62225  505500   459750      0   
Retail            10800  1171300  394525  ...  529850   35450  1683625   5100   
Services              0  1743575   20200  ...  116075    1875   377050      0   
Transportation    59000   193600       0  ...    9925       0    54475      0   
Wholesale             0    11275    1100  ...    1800       0     7450  27275   

country_code        NP      PA       PE        PH       PK     PR  
sector                                                             
Agriculture      88700  108625  7491100  11815050  1519250  78900  
Arts             11225    1100  1175050    582150   916175  15000  
Clothing           850    1500  1488425   1071800   655175  19725  
Construction      4925    3950   538525    331850   137175      0  
Education        15375    6775  1596075    706500  1362250  24800  
Entertainment      200       0    35200     62275    11650  17000  
Food            116375   21500  8942525  14559250  1294925  82775  
Health           33425       0   241950    286700    63875    275  
Housing              0   30975   354975   1808700     2750      0  
Manufacturing     3675     500   214800    547000   552350   2500  
Personal Use         0    7450   821150    321900    13050      0  
Retail            6625   37500  4996750  18848550  2009125  24850  
Services         26100   46100  1477925   1824225  2479750  34000  
Transportation     150    7300   968150   1659825  1376200      0  
Wholesale            0       0    52250     50600    73400      0  

[15 rows x 29 columns]

************* pivot_table_kiva3:***************
country_code         PS       PY       RW      SB       SL       SN     SO  \
sector                                                                       
Agriculture     1550350  1444900  2361225  125575   136825  1233900   1500   
Arts             178100   572150    65475    7050     6575    34950      0   
Clothing         383600  4052600  1675500   28125   490150   255000  14900   
Construction     407225   241825   272625    1950   105650    11275  27400   
Education       2148350  2606925   174950       0   104200     5375   9425   
Entertainment     35675      975     9250       0     3400        0      0   
Food            1135225  8455525  6332425  105875  1638825  2898250  56000   
Health            78800   517200    31850       0   106050     4550   3000   
Housing         1235425    33300     5100   28075    72200     5525      0   
Manufacturing    161100    30525    53975       0    20800    61425   9000   
Personal Use    1535925     5200    13000    2000    22775     8500    500   
Retail           690900  7266575  3736225  166625  1158575  2179625  87400   
Services        1999025  4127875   676625   24225    83225    79625  15100   
Transportation   431375    54425    82700    3725     9500    35925   1650   
Wholesale         60950     2700    14675     650     2300     5750      0   

country_code        SR     SS       SV  ...     VC  VI       VN    VU  \
sector                                  ...                             
Agriculture     490875   4925  8280325  ...   6275   0  4714375     0   
Arts                 0   1900   349475  ...   1275   0    11400     0   
Clothing             0   1850  1064775  ...   3900   0   221625     0   
Construction         0   2200   142350  ...      0   0   112625     0   
Education            0   2875    81200  ...      0   0  1286325     0   
Entertainment        0      0    16200  ...   3725   0    12950     0   
Food              5400   8700  4918325  ...  14400   0  2589775     0   
Health               0  55925   195800  ...      0   0   113625     0   
Housing          18475   8925  2978950  ...      0   0  2860700     0   
Manufacturing        0    475   375250  ...   4000   0   248750     0   
Personal Use      8900      0   223425  ...      0   0    67650  9250   
Retail            7950  26425  3337000  ...   3650   0   920025     0   
Services          6275   6700  1122675  ...  12000   0   472275     0   
Transportation    2600      0   269175  ...      0   0    18950     0   
Wholesale            0      0     2800  ...      0   0    10650     0   

country_code         WS      XK      YE      ZA      ZM      ZW  
sector                                                           
Agriculture     1870975  616100   75700   28850   93450  828450  
Arts             458575  110300    4075    6850       0   22400  
Clothing          37500   26950  134700    5700    1250  814450  
Construction      13800  100200  100750   10150       0   22700  
Education         20550    3800    2450  302225  109925   40500  
Entertainment     32875   20225       0       0       0       0  
Food            1766150  113400  248275   72175   61225  560475  
Health                0    6275   32925    1250    9400    6700  
Housing           14625  306750  303075   13475       0       0  
Manufacturing      1275   29175    8425   11975   12750   66925  
Personal Use      26400    4700  399775       0   68800    1400  
Retail           826975   66400  209300   29575   61675  765000  
Services         341550  280425  123000   55550  700650  209875  
Transportation   230175   89725  141000   33500       0   11550  
Wholesale           400    4175     625    2750   28825   22300  

[15 rows x 29 columns]


pivot_table_kiva.columns

Index(['AF', 'AL', 'AM', 'AZ', 'BF', 'BI', 'BJ', 'BO', 'BR', 'BT', 'BZ', 'CD',
       'CG', 'CI', 'CL', 'CM', 'CN', 'CO', 'CR', 'DO', 'EC', 'EG', 'GE', 'GH',
       'GT', 'GU', 'HN', 'HT', 'ID', 'IL', 'IN', 'IQ', 'JO', 'KE', 'KG', 'KH',
       'LA', 'LB', 'LR', 'LS', 'MD', 'MG', 'ML', 'MM', 'MN', 'MR', 'MW', 'MX',
       'MZ', 'NG', 'NI', 'NM', 'NP', 'PA', 'PE', 'PH', 'PK', 'PR', 'PS', 'PY',
       'RW', 'SB', 'SL', 'SN', 'SO', 'SR', 'SS', 'SV', 'TG', 'TH', 'TJ', 'TL',
       'TR', 'TZ', 'UA', 'UG', 'US', 'VC', 'VI', 'VN', 'VU', 'WS', 'XK', 'YE',
       'ZA', 'ZM', 'ZW'],
      dtype='object', name='country_code')


# this fuction get a matrix of data and creates a heatmap
# the data is used for this heatmap is the funde_amount was allocated in each sector to each country 

import numpy as np
import matplotlib.pyplot as plt


def plot_matrix_heatmap(matrix_data, cbar_size, fontsize_x, fontsize_y):


    fig, ax = plt.subplots(figsize=(50, 30))

    # Creating a plot with blue as a color
    plt.imshow(matrix_data, cmap='Blues')
    
    # Displaying a color bar
    cbar= plt.colorbar(shrink=cbar_size)
    cbar.ax.tick_params(labelsize=30)
    cbar.set_label('funded_amount (M)',  size=32)


    # Adding the tick labels
    plt.xticks(np.arange(len(matrix_data.columns)), labels=matrix_data.columns, fontsize=fontsize_x)
    plt.yticks(np.arange(len(matrix_data.index)), labels=matrix_data.index, fontsize=fontsize_y)

    fig.tight_layout()

    # Displaying the figure
    plt.show()

# the heatmap for the whole countries in the dataset
plot_matrix_heatmap(pivot_table_kiva, 0.5, 19, 25)


# the heatmap for the first set of countries in the dataset
plot_matrix_heatmap(pivot_table_kiva1, 0.7, 32, 34)


# the heatmap for the second set of countries in the dataset
plot_matrix_heatmap(pivot_table_kiva2, 0.7, 32, 34)


# the heatmap for the third set of countries in the dataset
plot_matrix_heatmap(pivot_table_kiva3, 0.7, 32, 34)


#here indicates the purposes for which the money is being funded

from wordcloud import WordCloud



text_data = " ".join(df_kiva_s['use'].dropna())

# Generate the word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_data)

# Display the generated word cloud using matplotlib
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()


# import required libraries for Dashboard

import dash
from dash import dcc
#import dash_core_components as dcc 

from dash import html
#import dash_html_components as html 

from dash.dependencies import Input, Output
import pycountry_convert as pc


# Get unique countries from the DataFrame
countries = df_kiva_s['country'].unique()

def country_to_continent(country_name):
    try:
        if country_name == 'The Democratic Republic of the Congo':
            return 'Africa'
        elif country_name == 'Kosovo':
            return 'Europe'
        elif country_name == 'Timor-Leste':
            return 'Asia'
        elif country_name == 'Virgin Islands':
            return 'North America'
        elif country_name == 'Myanmar (Burma)':
            return 'Asia'
        elif country_name == "Cote D'Ivoire":
            return 'Africa'
        else:
            country_alpha2 = pc.country_name_to_country_alpha2(country_name)
            country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
            country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
            return country_continent_name
    except (KeyError, ValueError) as e:
        # Handle exceptions, e.g., if the country name is not recognized
        return None

    
# Apply the get_continent function to the DataFrame
#df_kiva_s['continent'] = df_kiva_s['country'].apply(country_to_continent)
df_kiva_s.loc[:, 'continent'] = df_kiva_s['country'].apply(country_to_continent)


nu=df_kiva_s[df_kiva_s['continent']== 'Oceania']
nu


continents = df_kiva_s['continent'].unique()


continents

array(['Asia', 'Africa', 'North America', 'South America', 'Europe',
       'Oceania'], dtype=object)


df_kiva_s[df_kiva_s['continent'].isnull()]['country'].unique()

array([], dtype=object)


#df_kiva_s['term_in_months'].unique()


# Get unique countries sorted alphabetically
sorted_countries = sorted(df_kiva_s['country'].unique())


##########################################################
# Step1: Create a Dash kiva_app
kiva_app = dash.Dash(__name__)

def map_continent_to_scope(continent_name):
    continent_mapping = {
        'Africa': 'africa',
        'Asia': 'asia',
        'Europe': 'europe',
        'North America': 'north america',
        'South America': 'south america',
        'Oceania': 'world',  # Keep 'Oceania' as 'oceania'
        #'USA': 'usa',
        'World': 'world'
    }

    return continent_mapping.get(continent_name, 'world')  # Default to 'world' if not found

##########################################################
# Step2: Layout of the kiva_app
kiva_app.layout = html.Div([
    # First interactive figure for showing funded amount in different continents and different gender groups
    
    # ************ Dropdown Menu1 ***********
    html.Div([
        dcc.Dropdown(
            id='continent-dropdown',
            options=[{'label': 'World', 'value': 'all'}] + [{'label': continent, 'value': continent} for continent in continents],
            value='all',  # Default selected option to show all
            multi=False
    ),

    #************ Radio Buttons ***********
        dcc.RadioItems(
            id='gender-radio',
            options=[
                {'label': 'All Genders', 'value': 'all'},
                {'label': 'Male', 'value': 'male'},
                {'label': 'Female', 'value': 'female'},
                {'label': 'Mixed', 'value': 'mixed'},
            ],
            value='all',  # Default selected option to show all genders
            labelStyle={'display': 'block'}
        ),
    
    dcc.Graph(id='scatter-geo-plot')
    ]),
    
    
    # ************ Dropdown Menu2 ***********
    # The funded amount in each sector based on countries
    html.Div([
        dcc.Dropdown(
            id='country-dropdown',
            options=[{'label': country, 'value': country} for country in sorted_countries],
            value=sorted_countries[1],  # Default selected country
            multi=False
    
    ),
    dcc.Graph(id='heatmap')
    ])
    
      
    
])

##########################################################
# Step3: Callbacks
# ************ Callback1 ***********
# Callback to update Scatter Geo plot based on selected continent
@kiva_app.callback(
    Output('scatter-geo-plot', 'figure'),
    [Input('continent-dropdown', 'value'),
     Input('gender-radio', 'value')]
)

def update_scatter_geo_plot(selected_continent, selected_gender):
  
    # Filter data based on all continent
    if selected_continent == 'all':
        filtered_df = df_kiva_s
    else:
        # Filter data based on selected continent
        filtered_df = df_kiva_s[df_kiva_s['continent'] == selected_continent]

    # Filter data based on gender
    if selected_gender != 'all':
        filtered_df = filtered_df[filtered_df['borrower_genders'].str.contains(selected_gender, case=False, na=False)]


    # Aggregate data to get total sum of 'funded_amount' for each country
    aggregated_df = filtered_df.groupby('country')['funded_amount'].sum().reset_index()
    


    fig = px.scatter_geo(
        aggregated_df, 
        locations='country', 
        locationmode='country names', 
        color='funded_amount',
        size='funded_amount',
        hover_name='country',
        title=f'Scatter Geo: Funded Amount by Country ({selected_continent})',
        projection='natural earth',
        size_max=20,
        scope=map_continent_to_scope(selected_continent)
        #scope=selected_continent.lower()
        #projection_scale=3
                        
                        )
    fig.update_layout(height=700, width=1100)

    return fig



# ************ Callback2 ***********
# Callback to update the heatmap based on selected country
@kiva_app.callback(
    Output('heatmap', 'figure'),
    [Input('country-dropdown', 'value')]
)
def update_heatmap(selected_country):
    filtered_df2 = df_kiva_s[df_kiva_s['country'] == selected_country]

    # Create a heatmap


    fig_heatmap = px.histogram(filtered_df2, x='sector', y='funded_amount',
                               title=f'Funding Amount by Sector in {selected_country}',
                               labels={'funded_amount': 'Total Funded Amount (USD)', 'sector': 'Sector'},
                               color_discrete_sequence=['maroon'])  # You can choose a color for the bars

    return fig_heatmap

##########################################################
# Step4: Run the kiva_app
#kiva_app.run(jupyter_mode="external", port=8892)


if __name__ == '__main__':
    kiva_app.run_server(debug=True)


df_kiva_s['borrower_genders'].nunique()

4


df_kiva_s['borrower_genders'].value_counts()

borrower_genders
female     488081
male       138523
mixed       40380
unknown      4221
Name: count, dtype: int64


aggregated_df = df_kiva_s.groupby('country')['funded_amount'].sum().reset_index()
aggregated_df

	Unnamed: 0	funded_amount	loan_amount	activity	sector	use	country_code	country	region	currency	term_in_months	lender_count	borrower_genders	repayment_interval
0	0	300.0	300.0	Fruits & Vegetables	Food	To buy seasonal, fresh fruits to sell.	PK	Pakistan	Lahore	PKR	12.0	12	female	irregular
1	1	575.0	575.0	Rickshaw	Transportation	to repair and maintain the auto rickshaw used ...	PK	Pakistan	Lahore	PKR	11.0	14	female, female	irregular
2	2	150.0	150.0	Transportation	Transportation	To repair their old cycle-van and buy another ...	IN	India	Maynaguri	INR	43.0	6	female	bullet
3	3	200.0	200.0	Embroidery	Arts	to purchase an embroidery machine and a variet...	PK	Pakistan	Lahore	PKR	11.0	8	female	irregular
4	4	400.0	400.0	Milk Sales	Food	to purchase one buffalo.	PK	Pakistan	Abdul Hakeem	PKR	14.0	16	female	monthly
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
671200	671200	0.0	25.0	Livestock	Agriculture	[True, u'para compara: cemento, arenya y ladri...	PY	Paraguay	Concepción	USD	13.0	0	female	monthly
671201	671201	25.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	1	female	monthly
671202	671202	0.0	25.0	Games	Entertainment	NaN	KE	Kenya	NaN	KES	13.0	0	NaN	monthly
671203	671203	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671204	671204	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	0	female	monthly

	Unnamed: 0	funded_amount	loan_amount	activity	sector	use	country_code	country	region	currency	term_in_months	lender_count	borrower_genders	repayment_interval
0	0	300.0	300.0	Fruits & Vegetables	Food	To buy seasonal, fresh fruits to sell.	PK	Pakistan	Lahore	PKR	12.0	12	female	irregular
1	1	575.0	575.0	Rickshaw	Transportation	to repair and maintain the auto rickshaw used ...	PK	Pakistan	Lahore	PKR	11.0	14	female	irregular
2	2	150.0	150.0	Transportation	Transportation	To repair their old cycle-van and buy another ...	IN	India	Maynaguri	INR	43.0	6	female	bullet
3	3	200.0	200.0	Embroidery	Arts	to purchase an embroidery machine and a variet...	PK	Pakistan	Lahore	PKR	11.0	8	female	irregular
4	4	400.0	400.0	Milk Sales	Food	to purchase one buffalo.	PK	Pakistan	Abdul Hakeem	PKR	14.0	16	female	monthly
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
671200	671200	0.0	25.0	Livestock	Agriculture	[True, u'para compara: cemento, arenya y ladri...	PY	Paraguay	Concepción	USD	13.0	0	female	monthly
671201	671201	25.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	unknown	KES	13.0	1	female	monthly
671202	671202	0.0	25.0	Games	Entertainment	unknown	KE	Kenya	unknown	KES	13.0	0	unknown	monthly
671203	671203	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	unknown	KES	13.0	0	female	monthly
671204	671204	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	unknown	KES	13.0	0	female	monthly

	Unnamed: 0	funded_amount	loan_amount	activity	sector	use	country_code	country	region	currency	term_in_months	lender_count	borrower_genders	repayment_interval	continent
3443	3443	650.0	650.0	Farming	Agriculture	to buy bush knives, a wheelbarrow, fertilizer ...	WS	Samoa	Gataivai Savaii	WST	14.0	23	female	irregular	Oceania
3481	3481	1075.0	1075.0	Farming	Agriculture	to buy a wheelbarrow, bush knives, chemicals, ...	WS	Samoa	Faala Palauli	WST	14.0	40	female	irregular	Oceania
3529	3529	325.0	325.0	Farming	Agriculture	to buy chemicals, weedsprayer, bush knives, sp...	WS	Samoa	Gataivai Savaii	WST	14.0	10	female	irregular	Oceania
3552	3552	450.0	450.0	Tailoring	Services	to buy material, thread, needles, and a sewing...	WS	Samoa	Aopo, Savaii	WST	14.0	16	female	irregular	Oceania
3570	3570	450.0	450.0	Farming	Agriculture	to buy a weedsprayer, a wheelbarrow, a bag of ...	WS	Samoa	TAPUELEELE	WST	14.0	15	female	irregular	Oceania
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
670501	670501	75.0	1100.0	Farming	Agriculture	to buy chemicals, fertilizer, a new wheelbarro...	WS	Samoa	Fasitoo Tai	WST	14.0	3	female	irregular	Oceania
670510	670510	0.0	600.0	Food Production/Sales	Food	to buy bags of flour, barrel of cooking oil, b...	WS	Samoa	Faleasiu	WST	14.0	0	female	irregular	Oceania
670523	670523	0.0	600.0	Textiles	Arts	to buy material, fabric paint, stencils, brush...	WS	Samoa	Siusega	WST	14.0	0	female	irregular	Oceania
670537	670537	0.0	1100.0	Cafe	Food	to buy sandwiches, pies, donuts, snacks, coffe...	WS	Samoa	Faleasiu	WST	14.0	0	female	irregular	Oceania
670640	670640	125.0	875.0	Textiles	Arts	to buy material, fabric paint, stencils, brush...	WS	Samoa	Satupaitea Vaega Savaii	WST	14.0	4	female	irregular	Oceania

	country	funded_amount
0	Afghanistan	14000.0
1	Albania	2490000.0
2	Armenia	11186675.0
3	Azerbaijan	2699575.0
4	Belize	114025.0
...	...	...
82	Vietnam	13661700.0
83	Virgin Islands	0.0
84	Yemen	1784075.0
85	Zambia	1147950.0
86	Zimbabwe	3372725.0

Somaye-DS¶

A Data Analytics Project¶

1. Teil - Datenanalyse¶

CRISP DM: Business Understanding¶

CRISP DM: Data Understanding¶

initially there are no duplicates but we have to see again after dealing with missing values and cleaning data¶

Dealing with Missing values¶

dealing with missing values in 'country_code'¶

dealing with missing values in 'borrower_genders' column¶

dealing with missing values in 'use' column¶

dealing with missing values in 'region' column¶

Modeling and Visualisation¶

A heatmap presenting funded amount for each country in each sector¶

2. Teil - Dashboard¶

Business Questions¶

	Unnamed: 0	funded_amount	loan_amount	activity	sector	use	country_code	country	region	currency	term_in_months	lender_count	borrower_genders	repayment_interval
202537	202537	4150.0	4150.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	EEnhana	NAD	6.0	162	female	bullet
202823	202823	4150.0	4150.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	Rundu	NAD	6.0	159	male	bullet
344929	344929	3325.0	3325.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	EEnhana	NAD	7.0	120	female	bullet
351177	351177	3325.0	3325.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	Rundu	NAD	7.0	126	male	bullet
420953	420953	3325.0	3325.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	EEnhana	NAD	7.0	118	female	bullet
421218	421218	4000.0	4000.0	Wholesale	Wholesale	purchase solar lighting products for sale to l...	NaN	Namibia	Rundu	NAD	7.0	150	male	bullet
487207	487207	5100.0	5100.0	Renewable Energy Products	Retail	to pay for stock of solar lights and cell phon...	NaN	Namibia	Katima Mulilo	NAD	7.0	183	male	bullet
487653	487653	5000.0	5000.0	Wholesale	Wholesale	to maintain a stock of solar lights and cell p...	NaN	Namibia	Oshakati	NAD	7.0	183	female	bullet