# Exploration of pokemon data

In [1]:
import pandas as pd
import plotly.express as px

## Loading and simplifying

In [2]:
df = pd.read_csv("pokemon.csv")

In [3]:
# viewing sample
df.head()

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,name,percentage_male,pokedex_number,sp_attack,sp_defense,speed,weight_kg,generation,is_legendary,types
0,"Overgrow,Chlorophyll",1,1,1,0,0,0,2,2,1,...,Bulbasaur,88.1,1,65,65,45,6.9,1,0,"grass,poison"
1,"Overgrow,Chlorophyll",1,1,1,0,0,0,2,2,1,...,Ivysaur,88.1,2,80,80,60,13.0,1,0,"grass,poison"
2,"Overgrow,Chlorophyll",1,1,1,0,0,0,2,2,1,...,Venusaur,88.1,3,122,120,80,100.0,1,0,"grass,poison"
3,"Blaze,Solar Power",0,1,1,1,0,1,0,1,1,...,Charmander,88.1,4,60,50,65,8.5,1,0,fire
4,"Blaze,Solar Power",0,1,1,1,0,1,0,1,1,...,Charmeleon,88.1,5,80,65,80,19.0,1,0,fire


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 801 entries, 0 to 800
Data columns (total 40 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   abilities          801 non-null    object 
 1   against_bug        801 non-null    int64  
 2   against_dark       801 non-null    int64  
 3   against_dragon     801 non-null    int64  
 4   against_electric   801 non-null    int64  
 5   against_fairy      801 non-null    int64  
 6   against_fight      801 non-null    int64  
 7   against_fire       801 non-null    int64  
 8   against_flying     801 non-null    int64  
 9   against_ghost      801 non-null    int64  
 10  against_grass      801 non-null    int64  
 11  against_ground     801 non-null    int64  
 12  against_ice        801 non-null    int64  
 13  against_normal     801 non-null    int64  
 14  against_poison     801 non-null    int64  
 15  against_psychic    801 non-null    int64  
 16  against_rock       801 non

In [6]:
basic_info_columns = [
    "pokedex_number",
    "name",
    "classification",
    "is_legendary",
    "generation",
    "height_m",
    "weight_kg",
    "attack",
    "defense",
    "speed",
    "capture_rate",
    "percentage_male",
    "types",
    "abilities"
]
df[basic_info_columns].head()

Unnamed: 0,pokedex_number,name,classification,is_legendary,generation,height_m,weight_kg,attack,defense,speed,capture_rate,percentage_male,types,abilities
0,1,Bulbasaur,Seed Pokémon,0,1,0.7,6.9,49,49,45,45,88.1,"grass,poison","Overgrow,Chlorophyll"
1,2,Ivysaur,Seed Pokémon,0,1,1.0,13.0,62,63,60,45,88.1,"grass,poison","Overgrow,Chlorophyll"
2,3,Venusaur,Seed Pokémon,0,1,2.0,100.0,100,123,80,45,88.1,"grass,poison","Overgrow,Chlorophyll"
3,4,Charmander,Lizard Pokémon,0,1,0.6,8.5,52,43,65,45,88.1,fire,"Blaze,Solar Power"
4,5,Charmeleon,Flame Pokémon,0,1,1.1,19.0,64,58,80,45,88.1,fire,"Blaze,Solar Power"


In [7]:
df[basic_info_columns].to_csv("pokemon_simple.csv",index=False)

## Exploding and Normalization

In [8]:
df_basic = pd.read_csv('pokemon_simple.csv')
df_basic[["types",'abilities']] = df_basic[["types",'abilities']].map(lambda x: x.split(','))
df_basic.explode('types').explode('abilities') # this has many duplicates

Unnamed: 0,pokedex_number,name,classification,is_legendary,generation,height_m,weight_kg,attack,defense,speed,capture_rate,percentage_male,types,abilities
0,1,Bulbasaur,Seed Pokémon,0,1,0.7,6.9,49,49,45,45,88.1,grass,Overgrow
0,1,Bulbasaur,Seed Pokémon,0,1,0.7,6.9,49,49,45,45,88.1,grass,Chlorophyll
0,1,Bulbasaur,Seed Pokémon,0,1,0.7,6.9,49,49,45,45,88.1,poison,Overgrow
0,1,Bulbasaur,Seed Pokémon,0,1,0.7,6.9,49,49,45,45,88.1,poison,Chlorophyll
1,2,Ivysaur,Seed Pokémon,0,1,1.0,13.0,62,63,60,45,88.1,grass,Overgrow
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
799,800,Necrozma,Prism Pokémon,1,7,2.4,230.0,107,101,79,3,,psychic,Prism Armor
799,800,Necrozma,Prism Pokémon,1,7,2.4,230.0,107,101,79,3,,legendary,Prism Armor
800,801,Magearna,Artificial Pokémon,1,7,1.0,80.5,95,115,65,3,,steel,Soul-Heart
800,801,Magearna,Artificial Pokémon,1,7,1.0,80.5,95,115,65,3,,fairy,Soul-Heart


In [9]:
# split into three tables by normalization
df_info = df_basic.drop(columns=['types','abilities']) # basic info other than types and abilities
df_types = df_basic[['pokedex_number','types']].explode('types') # pokedex_number foreign key
df_abilities = df_basic[["pokedex_number", "abilities"]].explode("abilities") # pokedex_number foreign key
df_info

Unnamed: 0,pokedex_number,name,classification,is_legendary,generation,height_m,weight_kg,attack,defense,speed,capture_rate,percentage_male
0,1,Bulbasaur,Seed Pokémon,0,1,0.7,6.9,49,49,45,45,88.1
1,2,Ivysaur,Seed Pokémon,0,1,1.0,13.0,62,63,60,45,88.1
2,3,Venusaur,Seed Pokémon,0,1,2.0,100.0,100,123,80,45,88.1
3,4,Charmander,Lizard Pokémon,0,1,0.6,8.5,52,43,65,45,88.1
4,5,Charmeleon,Flame Pokémon,0,1,1.1,19.0,64,58,80,45,88.1
...,...,...,...,...,...,...,...,...,...,...,...,...
796,797,Celesteela,Launch Pokémon,1,7,9.2,999.9,101,103,61,25,
797,798,Kartana,Drawn Sword Pokémon,1,7,0.3,0.1,181,131,109,255,
798,799,Guzzlord,Junkivore Pokémon,1,7,5.5,888.0,101,53,43,15,
799,800,Necrozma,Prism Pokémon,1,7,2.4,230.0,107,101,79,3,


In [10]:
df_abilities

Unnamed: 0,pokedex_number,abilities
0,1,Overgrow
0,1,Chlorophyll
1,2,Overgrow
1,2,Chlorophyll
2,3,Overgrow
...,...,...
796,797,Beast Boost
797,798,Beast Boost
798,799,Beast Boost
799,800,Prism Armor


In [11]:
df_types

Unnamed: 0,pokedex_number,types
0,1,grass
0,1,poison
1,2,grass
1,2,poison
2,3,grass
...,...,...
799,800,psychic
799,800,legendary
800,801,steel
800,801,fairy


In [12]:
df_types[df_types['types'] =='fairy']

Unnamed: 0,pokedex_number,types
34,35,fairy
35,36,fairy
38,39,fairy
39,40,fairy
121,122,fairy
172,173,fairy
173,174,fairy
174,175,fairy
175,176,fairy
182,183,fairy


In [13]:
# using merge to combine tables This is similar to join in data base tables
df_info.merge(df_types[df_types["types"] == "fairy"], on='pokedex_number')

Unnamed: 0,pokedex_number,name,classification,is_legendary,generation,height_m,weight_kg,attack,defense,speed,capture_rate,percentage_male,types
0,35,Clefairy,Fairy Pokémon,0,1,0.6,7.5,45,48,35,150,24.6,fairy
1,36,Clefable,Fairy Pokémon,0,1,1.3,40.0,70,73,60,25,24.6,fairy
2,39,Jigglypuff,Balloon Pokémon,0,1,0.5,5.5,45,20,20,170,24.6,fairy
3,40,Wigglytuff,Balloon Pokémon,0,1,1.0,12.0,70,45,45,50,24.6,fairy
4,122,Mr. Mime,Barrier Pokémon,0,1,1.3,54.5,45,65,90,45,50.0,fairy
5,173,Cleffa,Star Shape Pokémon,0,2,0.3,3.0,25,28,15,150,24.6,fairy
6,174,Igglybuff,Balloon Pokémon,0,2,0.3,1.0,30,15,15,170,24.6,fairy
7,175,Togepi,Spike Ball Pokémon,0,2,0.3,1.5,20,65,20,190,88.1,fairy
8,176,Togetic,Happiness Pokémon,0,2,0.6,3.2,40,85,40,75,88.1,fairy
9,183,Marill,Aquamouse Pokémon,0,2,0.4,8.5,20,50,40,190,50.0,fairy


## Scatter plots

In [14]:
px.scatter(df_basic,x='attack',y='defense',color='classification',hover_name='name',title="Pokemon Attack-Defense").show()

In [15]:
px.scatter(
    df_basic,
    x="speed",
    y="attack",
    color="classification",
    hover_name="name",
    title="Pokemon Attack-Speed",
).show()

In [16]:
px.scatter(
    df_basic,
    x="height_m",
    y="weight_kg",
    color="classification",
    hover_name="name",
    hover_data={
        "attack": True,
        "speed": True,
        "defense": True,
    },
    title="Pokemon height-weight",
).show()

## Histograms

In [17]:
px.histogram(
    df_info,
    x="weight_kg",
    height=600,
    title="Weight Histogram",
)

In [18]:
px.histogram(df_info.merge(df_types,on='pokedex_number'), x='weight_kg', color='types', height=600,title="Weight Histogram with types color")

In [19]:
px.histogram(df_info, x="height_m",title="Height Histogram")

## Histogram 2d - heat map


In [20]:
px.density_heatmap(
    df_info,
    x="height_m",
    y="weight_kg",
    title="Height weight heatmap",
)

In [21]:
px.density_heatmap(
    df_info,
    x="height_m",
    y="weight_kg",
    title="Height weight heatmap",
    nbinsx=200,
    nbinsy=100,
    range_x=(0,3),
    range_y=(0,300)
)

# Dumping into dataset