# import piplite
# await piplite.install(['numpy'])
# await piplite.install(['pandas'])
# await piplite.install(['seaborn'])

# pandas is a software library written for the Python programming language for data manipulation and analysis.
import pandas as pd
#NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import numpy as np
# Matplotlib is a plotting library for python and pyplot gives us a MatLab like plotting framework. We will use this in our plotter function to plot data.
import matplotlib.pyplot as plt
#Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics
import seaborn as sns

!pip install js

Collecting js
  Downloading js-1.0.tar.gz (2.5 kB)
  Preparing metadata (setup.py) ... done
Collecting fanstatic (from js)
  Downloading fanstatic-1.4-py3-none-any.whl (53 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 53.9/53.9 kB 700.9 kB/s eta 0:00:00
Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from js) (67.7.2)
Collecting WebOb>=1.2 (from fanstatic->js)
  Downloading WebOb-1.8.7-py2.py3-none-any.whl (114 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 115.0/115.0 kB 2.8 MB/s eta 0:00:00
Building wheels for collected packages: js
  Building wheel for js (setup.py) ... done
  Created wheel for js: filename=js-1.0-py3-none-any.whl size=2883 sha256=cd2b649dc2218e254fe15af84846fd6b31083ecda0c7f7205e563e78e5da3f47
  Stored in directory: /root/.cache/pip/wheels/77/dc/68/d580eaea0ef6137289a09dabe661f5df14951f5c29c08d443d
Successfully built js
Installing collected packages: WebOb, fanstatic, js
Successfully installed WebOb-1.8.7 fanstatic-1.4 js-1.0

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

sns.set(rc={'figure.figsize':(14, 6)})
URL = "/content/drive/MyDrive/IBM Data Science Capstone/dataset_part_2.csv"

df=pd.read_csv(URL)
df.head(5)

sns.catplot(y="PayloadMass", x="FlightNumber", hue="Class", data=df, aspect = 5)
plt.xlabel("Flight Number",fontsize=20)
plt.ylabel("Pay load Mass (kg)",fontsize=20)
plt.show()

# Use catplot to plot FlightNumber vs LaunchSite
sns.catplot(x='FlightNumber', y='LaunchSite', hue='Class', data=df,aspect=1)

# Set the title of the plot
plt.title('Flight Number vs Launch Site')

# Show the plot
plt.show()

# Plot a scatter point chart with x axis to be Flight Number and y axis to be the launch site, and hue to be the class value

# Plot scatter point chart
sns.scatterplot(x='FlightNumber', y='LaunchSite', hue='Class', data=df)

# Set the title of the plot
plt.title('Flight Number vs Launch Site')

# Show the plot
plt.show()

# Plot a scatter point chart with x axis to be Pay Load Mass (kg) and y axis to be the launch site, and hue to be the class value

# Set the style of the visualization
sns.set_style("whitegrid")

# Create a scatter plot
sns.scatterplot(x='PayloadMass', y='LaunchSite', hue='Class', data=df)

# Set the title of the plot
plt.title('Payload Mass vs Launch Site')

# Show the plot
plt.show()

# Display the DataFrame
print(df[['PayloadMass', 'LaunchSite', 'Class']].to_string(index=False))

 PayloadMass   LaunchSite  Class
 6104.959412 CCAFS SLC 40      0
  525.000000 CCAFS SLC 40      0
  677.000000 CCAFS SLC 40      0
  500.000000  VAFB SLC 4E      0
 3170.000000 CCAFS SLC 40      0
 3325.000000 CCAFS SLC 40      0
 2296.000000 CCAFS SLC 40      1
 1316.000000 CCAFS SLC 40      1
 4535.000000 CCAFS SLC 40      0
 4428.000000 CCAFS SLC 40      0
 2216.000000 CCAFS SLC 40      0
 2395.000000 CCAFS SLC 40      0
  570.000000 CCAFS SLC 40      1
 1898.000000 CCAFS SLC 40      0
 4707.000000 CCAFS SLC 40      0
 2477.000000 CCAFS SLC 40      0
 2034.000000 CCAFS SLC 40      1
  553.000000  VAFB SLC 4E      0
 5271.000000 CCAFS SLC 40      0
 3136.000000 CCAFS SLC 40      1
 4696.000000 CCAFS SLC 40      1
 3100.000000 CCAFS SLC 40      1
 2257.000000 CCAFS SLC 40      1
 4600.000000 CCAFS SLC 40      1
 5500.000000 CCAFS SLC 40      0
 9600.000000  VAFB SLC 4E      1
 2490.000000   KSC LC 39A      1
 5600.000000   KSC LC 39A      0
 5300.000000   KSC LC 39A      1
 6104.959412   KSC LC 39A      1
 6070.000000   KSC LC 39A      0
 2708.000000   KSC LC 39A      1
 3669.000000   KSC LC 39A      1
 9600.000000  VAFB SLC 4E      1
 6761.000000   KSC LC 39A      0
 2910.000000   KSC LC 39A      1
  475.000000  VAFB SLC 4E      1
 4990.000000   KSC LC 39A      1
 9600.000000  VAFB SLC 4E      1
 5200.000000   KSC LC 39A      1
 3700.000000   KSC LC 39A      1
 2205.000000 CCAFS SLC 40      1
 9600.000000  VAFB SLC 4E      1
 6104.959412 CCAFS SLC 40      1
 4230.000000 CCAFS SLC 40      1
 6092.000000 CCAFS SLC 40      0
 9600.000000  VAFB SLC 4E      0
 2760.000000 CCAFS SLC 40      0
  350.000000 CCAFS SLC 40      1
 3750.000000   KSC LC 39A      1
 5383.850000 CCAFS SLC 40      0
 2410.000000 CCAFS SLC 40      0
 7076.000000 CCAFS SLC 40      1
 9600.000000  VAFB SLC 4E      1
 5800.000000 CCAFS SLC 40      1
 7060.000000 CCAFS SLC 40      1
 2800.000000  VAFB SLC 4E      1
 3000.000000   KSC LC 39A      1
 4000.000000  VAFB SLC 4E      1
 2573.000000 CCAFS SLC 40      0
 4400.000000 CCAFS SLC 40      0
 9600.000000  VAFB SLC 4E      1
12259.000000   KSC LC 39A      1
 2482.000000 CCAFS SLC 40      1
13620.000000 CCAFS SLC 40      1
 1425.000000  VAFB SLC 4E      1
 2227.700000 CCAFS SLC 40      1
 6500.000000 CCAFS SLC 40      0
15600.000000 CCAFS SLC 40      1
 5000.000000 CCAFS SLC 40      1
 6800.000000 CCAFS SLC 40      1
15400.000000 CCAFS SLC 40      1
 6104.959412   KSC LC 39A      0
15600.000000 CCAFS SLC 40      1
15400.000000 CCAFS SLC 40      0
 1977.000000 CCAFS SLC 40      1
15600.000000   KSC LC 39A      0
15400.000000   KSC LC 39A      1
 9525.000000   KSC LC 39A      1
15400.000000 CCAFS SLC 40      1
15400.000000 CCAFS SLC 40      1
 3880.000000 CCAFS SLC 40      1
 6104.959412 CCAFS SLC 40      1
15400.000000 CCAFS SLC 40      1
 1600.000000 CCAFS SLC 40      1
15400.000000   KSC LC 39A      1
15400.000000   KSC LC 39A      1
15400.000000   KSC LC 39A      1
15400.000000 CCAFS SLC 40      1
 3681.000000 CCAFS SLC 40      1

# HINT use groupby method on Orbit column and get the mean of Class column
# Calculate the success rate for each orbit type
orbit_success_rate = df.groupby('Orbit')['Class'].mean().reset_index()

# Sort the dataframe by success rate in descending order
orbit_success_rate = orbit_success_rate.sort_values(by='Class', ascending=False)

# Create a bar plot
plt.figure(figsize=(10, 6))
sns.barplot(x='Class', y='Orbit', data=orbit_success_rate, palette='viridis')

# Set the title and labels
plt.title('Success Rate of Each Orbit Type')
plt.xlabel('Success Rate')
plt.ylabel('Orbit')

# Show the plot
plt.show()

<ipython-input-11-d7b32a3c439f>:10: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Class', y='Orbit', data=orbit_success_rate, palette='viridis')

orbit_success_rate

# Plot a scatter point chart with x axis to be FlightNumber and y axis to be the Orbit, and hue to be the class value
# Create the scatter plot
plt.figure(figsize=(12, 8))
sns.scatterplot(data=df, x='FlightNumber', y='Orbit', hue='Class', palette='coolwarm')

# Set the title and labels
plt.title('Flight Number vs Orbit (Colored by Class)')
plt.xlabel('Flight Number')
plt.ylabel('Orbit')

# Show the plot
plt.legend(title='Class', loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True)
plt.tight_layout()
plt.show()

# Display the DataFrame
print(df[['FlightNumber', 'Orbit', 'Class']].to_string(index=False))

 FlightNumber Orbit  Class
            1   LEO      0
            2   LEO      0
            3   ISS      0
            4    PO      0
            5   GTO      0
            6   GTO      0
            7   ISS      1
            8   LEO      1
            9   GTO      0
           10   GTO      0
           11   ISS      0
           12   ISS      0
           13 ES-L1      1
           14   ISS      0
           15   GTO      0
           16   ISS      0
           17   LEO      1
           18    PO      0
           19   GTO      0
           20   ISS      1
           21   GTO      1
           22   GTO      1
           23   ISS      1
           24   GTO      1
           25   GTO      0
           26    PO      1
           27   ISS      1
           28   GTO      0
           29   GTO      1
           30   LEO      1
           31   GTO      0
           32   ISS      1
           33   GTO      1
           34    PO      1
           35   GTO      0
           36   ISS      1
           37   SSO      1
           38   LEO      1
           39    PO      1
           40   GTO      1
           41   GTO      1
           42   ISS      1
           43    PO      1
           44   LEO      1
           45   GTO      1
           46   GTO      0
           47    PO      0
           48   ISS      0
           49   HEO      1
           50   GTO      1
           51   GTO      0
           52   ISS      0
           53   GTO      1
           54    PO      1
           55   GTO      1
           56   GTO      1
           57   SSO      1
           58   GTO      1
           59   SSO      1
           60   ISS      0
           61   MEO      0
           62    PO      1
           63   ISS      1
           64   ISS      1
           65  VLEO      1
           66   SSO      1
           67   ISS      1
           68   GTO      0
           69  VLEO      1
           70   ISS      1
           71   GTO      1
           72  VLEO      1
           73    SO      0
           74  VLEO      1
           75  VLEO      0
           76   ISS      1
           77  VLEO      0
           78  VLEO      1
           79   ISS      1
           80  VLEO      1
           81  VLEO      1
           82   MEO      1
           83   GEO      1
           84  VLEO      1
           85   SSO      1
           86  VLEO      1
           87  VLEO      1
           88  VLEO      1
           89  VLEO      1
           90   MEO      1

# Display the DataFrame
print(df[['PayloadMass', 'Orbit', 'Class']].to_string(index=False))

 PayloadMass Orbit  Class
 6104.959412   LEO      0
  525.000000   LEO      0
  677.000000   ISS      0
  500.000000    PO      0
 3170.000000   GTO      0
 3325.000000   GTO      0
 2296.000000   ISS      1
 1316.000000   LEO      1
 4535.000000   GTO      0
 4428.000000   GTO      0
 2216.000000   ISS      0
 2395.000000   ISS      0
  570.000000 ES-L1      1
 1898.000000   ISS      0
 4707.000000   GTO      0
 2477.000000   ISS      0
 2034.000000   LEO      1
  553.000000    PO      0
 5271.000000   GTO      0
 3136.000000   ISS      1
 4696.000000   GTO      1
 3100.000000   GTO      1
 2257.000000   ISS      1
 4600.000000   GTO      1
 5500.000000   GTO      0
 9600.000000    PO      1
 2490.000000   ISS      1
 5600.000000   GTO      0
 5300.000000   GTO      1
 6104.959412   LEO      1
 6070.000000   GTO      0
 2708.000000   ISS      1
 3669.000000   GTO      1
 9600.000000    PO      1
 6761.000000   GTO      0
 2910.000000   ISS      1
  475.000000   SSO      1
 4990.000000   LEO      1
 9600.000000    PO      1
 5200.000000   GTO      1
 3700.000000   GTO      1
 2205.000000   ISS      1
 9600.000000    PO      1
 6104.959412   LEO      1
 4230.000000   GTO      1
 6092.000000   GTO      0
 9600.000000    PO      0
 2760.000000   ISS      0
  350.000000   HEO      1
 3750.000000   GTO      1
 5383.850000   GTO      0
 2410.000000   ISS      0
 7076.000000   GTO      1
 9600.000000    PO      1
 5800.000000   GTO      1
 7060.000000   GTO      1
 2800.000000   SSO      1
 3000.000000   GTO      1
 4000.000000   SSO      1
 2573.000000   ISS      0
 4400.000000   MEO      0
 9600.000000    PO      1
12259.000000   ISS      1
 2482.000000   ISS      1
13620.000000  VLEO      1
 1425.000000   SSO      1
 2227.700000   ISS      1
 6500.000000   GTO      0
15600.000000  VLEO      1
 5000.000000   ISS      1
 6800.000000   GTO      1
15400.000000  VLEO      1
 6104.959412    SO      0
15600.000000  VLEO      1
15400.000000  VLEO      0
 1977.000000   ISS      1
15600.000000  VLEO      0
15400.000000  VLEO      1
 9525.000000   ISS      1
15400.000000  VLEO      1
15400.000000  VLEO      1
 3880.000000   MEO      1
 6104.959412   GEO      1
15400.000000  VLEO      1
 1600.000000   SSO      1
15400.000000  VLEO      1
15400.000000  VLEO      1
15400.000000  VLEO      1
15400.000000  VLEO      1
 3681.000000   MEO      1

# Plot a scatter point chart with x axis to be Payload and y axis to be the Orbit, and hue to be the class value
# Create the scatter plot
plt.figure(figsize=(12, 8))
sns.scatterplot(data=df, x='PayloadMass', y='Orbit', hue='Class', palette='coolwarm')

# Set the title and labels
plt.title('Payload vs Orbit (Colored by Class)')
plt.xlabel('Payload Mass (kg)')
plt.ylabel('Orbit')

# Show the plot
plt.legend(title='Class', loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True)
plt.tight_layout()
plt.show()

# A function to Extract years from the date
year=[]
def Extract_year():
    for i in df["Date"]:
        year.append(i.split("-")[0])
    return year
Extract_year()
df['Date'] = year
df.head()

# Plot a line chart with x axis to be the extracted year and y axis to be the success rate
# Ensure 'Date' column is in datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Extract year from the 'Date' column
df['Date'] = df['Date'].dt.year
# Calculate success rate for each year
success_rate = df.groupby('Date')['Class'].apply(lambda x: (x == 1).mean() * 100)

# Plot line chart
plt.figure(figsize=(10, 6))
success_rate.plot(marker='o', color='b', linestyle='-')

# Set the title and labels
plt.title('Success Rate Over Years')
plt.xlabel('Year')
plt.ylabel('Success Rate (%)')

# Set ticks for x-axis
plt.xticks(success_rate.index)

# Show the plot
plt.grid(True)
plt.tight_layout()
plt.show()

success_rate

Date
2010     0.000000
2012     0.000000
2013     0.000000
2014    33.333333
2015    33.333333
2016    62.500000
2017    83.333333
2018    61.111111
2019    90.000000
2020    84.210526
Name: Class, dtype: float64

features = df[['FlightNumber', 'PayloadMass', 'Orbit', 'LaunchSite', 'Flights', 'GridFins', 'Reused', 'Legs', 'LandingPad', 'Block', 'ReusedCount', 'Serial']]
features.head()

# HINT: Use get_dummies() function on the categorical columns
# Columns to apply one-hot encoding
columns_to_encode = ['Orbit', 'LaunchSite', 'LandingPad', 'Serial']

# Apply one-hot encoding
features_one_hot = pd.get_dummies(features, columns=columns_to_encode)

# Display the results
print(features_one_hot.head())

   FlightNumber  PayloadMass  Flights  GridFins  Reused   Legs  Block  \
0             1  6104.959412        1     False   False  False    1.0   
1             2   525.000000        1     False   False  False    1.0   
2             3   677.000000        1     False   False  False    1.0   
3             4   500.000000        1     False   False  False    1.0   
4             5  3170.000000        1     False   False  False    1.0   

   ReusedCount  Orbit_ES-L1  Orbit_GEO  ...  Serial_B1048  Serial_B1049  \
0            0            0          0  ...             0             0   
1            0            0          0  ...             0             0   
2            0            0          0  ...             0             0   
3            0            0          0  ...             0             0   
4            0            0          0  ...             0             0   

   Serial_B1050  Serial_B1051  Serial_B1054  Serial_B1056  Serial_B1058  \
0             0             0             0             0             0   
1             0             0             0             0             0   
2             0             0             0             0             0   
3             0             0             0             0             0   
4             0             0             0             0             0   

   Serial_B1059  Serial_B1060  Serial_B1062  
0             0             0             0  
1             0             0             0  
2             0             0             0  
3             0             0             0  
4             0             0             0  

[5 rows x 80 columns]

# HINT: use astype function
# Cast the entire DataFrame to float64
features_one_hot = features_one_hot.astype('float64')

# Display the results
print(features_one_hot.head())

   FlightNumber  PayloadMass  Flights  GridFins  Reused  Legs  Block  \
0           1.0  6104.959412      1.0       0.0     0.0   0.0    1.0   
1           2.0   525.000000      1.0       0.0     0.0   0.0    1.0   
2           3.0   677.000000      1.0       0.0     0.0   0.0    1.0   
3           4.0   500.000000      1.0       0.0     0.0   0.0    1.0   
4           5.0  3170.000000      1.0       0.0     0.0   0.0    1.0   

   ReusedCount  Orbit_ES-L1  Orbit_GEO  ...  Serial_B1048  Serial_B1049  \
0          0.0          0.0        0.0  ...           0.0           0.0   
1          0.0          0.0        0.0  ...           0.0           0.0   
2          0.0          0.0        0.0  ...           0.0           0.0   
3          0.0          0.0        0.0  ...           0.0           0.0   
4          0.0          0.0        0.0  ...           0.0           0.0   

   Serial_B1050  Serial_B1051  Serial_B1054  Serial_B1056  Serial_B1058  \
0           0.0           0.0           0.0           0.0           0.0   
1           0.0           0.0           0.0           0.0           0.0   
2           0.0           0.0           0.0           0.0           0.0   
3           0.0           0.0           0.0           0.0           0.0   
4           0.0           0.0           0.0           0.0           0.0   

   Serial_B1059  Serial_B1060  Serial_B1062  
0           0.0           0.0           0.0  
1           0.0           0.0           0.0  
2           0.0           0.0           0.0  
3           0.0           0.0           0.0  
4           0.0           0.0           0.0  

[5 rows x 80 columns]

features_one_hot.to_csv('dataset_part_3.csv', index=False)

SpaceX Falcon 9 First Stage Landing Analysis¶

Exploring and Preparing Data¶

What makes a Falcon 9 landing successful?¶

My Objectives¶

Import Libraries and Define Helper Functions¶

Exploratory Data Analysis¶

TASK 1: Visualize the relationship between Flight Number and Launch Site¶

TASK 2: Visualize the relationship between Payload and Launch Site¶

TASK 3: Visualize the relationship between success rate of each orbit type¶

TASK 4: Visualize the relationship between FlightNumber and Orbit type¶

TASK 5: Visualize the relationship between Payload and Orbit type¶

TASK 6: Visualize the launch success yearly trend¶

Features Engineering¶

TASK 7: Create dummy variables to categorical columns¶

TASK 8: Cast all numeric columns to `float64`¶

Authors¶

Change Log¶

IBM Corporation 2022. All rights reserved.

	FlightNumber	Date	BoosterVersion	PayloadMass	Orbit	LaunchSite	Outcome	Flights	GridFins	Reused	Legs	LandingPad	Block	Serial	Longitude	Latitude
0	1	2010-06-04	Falcon 9	6104.959412	LEO	CCAFS SLC 40	None None	1	False	False	False	NaN	1.0	B0003	-80.577366	28.561857
1	2	2012-05-22	Falcon 9	525.000000	LEO	CCAFS SLC 40	None None	1	False	False	False	NaN	1.0	B0005	-80.577366	28.561857
2	3	2013-03-01	Falcon 9	677.000000	ISS	CCAFS SLC 40	None None	1	False	False	False	NaN	1.0	B0007	-80.577366	28.561857
3	4	2013-09-29	Falcon 9	500.000000	PO	VAFB SLC 4E	False Ocean	1	False	False	False	NaN	1.0	B1003	-120.610829	34.632093
4	5	2013-12-03	Falcon 9	3170.000000	GTO	CCAFS SLC 40	None None	1	False	False	False	NaN	1.0	B1004	-80.577366	28.561857

	Orbit	Class
0	ES-L1	1.000000
1	GEO	1.000000
3	HEO	1.000000
9	SSO	1.000000
10	VLEO	0.857143
5	LEO	0.714286
6	MEO	0.666667
7	PO	0.666667
4	ISS	0.619048
2	GTO	0.518519
8	SO	0.000000

	FlightNumber	Date	BoosterVersion	PayloadMass	Orbit	LaunchSite	Outcome	Flights	GridFins	Reused	Legs	LandingPad	Block	Serial	Longitude	Latitude
0	1	2010	Falcon 9	6104.959412	LEO	CCAFS SLC 40	None None	1	False	False	False	NaN	1.0	B0003	-80.577366	28.561857
1	2	2012	Falcon 9	525.000000	LEO	CCAFS SLC 40	None None	1	False	False	False	NaN	1.0	B0005	-80.577366	28.561857
2	3	2013	Falcon 9	677.000000	ISS	CCAFS SLC 40	None None	1	False	False	False	NaN	1.0	B0007	-80.577366	28.561857
3	4	2013	Falcon 9	500.000000	PO	VAFB SLC 4E	False Ocean	1	False	False	False	NaN	1.0	B1003	-120.610829	34.632093
4	5	2013	Falcon 9	3170.000000	GTO	CCAFS SLC 40	None None	1	False	False	False	NaN	1.0	B1004	-80.577366	28.561857

SpaceX Falcon 9 First Stage Landing Analysis¶

Exploring and Preparing Data¶

What makes a Falcon 9 landing successful?¶

My Objectives¶

Import Libraries and Define Helper Functions¶

Exploratory Data Analysis¶

TASK 1: Visualize the relationship between Flight Number and Launch Site¶

TASK 2: Visualize the relationship between Payload and Launch Site¶

TASK 3: Visualize the relationship between success rate of each orbit type¶

TASK 4: Visualize the relationship between FlightNumber and Orbit type¶

TASK 5: Visualize the relationship between Payload and Orbit type¶

TASK 6: Visualize the launch success yearly trend¶

Features Engineering¶

TASK 7: Create dummy variables to categorical columns¶

TASK 8: Cast all numeric columns to float64¶

Authors¶

Change Log¶

IBM Corporation 2022. All rights reserved.

TASK 8: Cast all numeric columns to `float64`¶