## Define variables¶

In [1]:
x=45

In [2]:
y=34

In [3]:
x+y

Out[3]:
79
In [4]:
mynames="dhafer"

In [5]:
print(mynames)

dhafer

In [6]:
print('My name is',mynames,"and I'm",x+y," years old")

My name is dhafer and I'm 79  years old


Comparaisons

In [7]:
5==3

Out[7]:
False
In [8]:
4==4

Out[8]:
True
In [9]:
8!=4

Out[9]:
True
In [10]:
8>3

Out[10]:
True

## While loop¶

In [11]:
i=0
while i<10:
print("you have a message")
i= i+1

you have a message
you have a message
you have a message
you have a message
you have a message
you have a message
you have a message
you have a message
you have a message
you have a message


## Loop¶

In [12]:
range(10)

Out[12]:
range(0, 10)
In [13]:
for i in range(10):
print(i**3)

0
1
8
27
64
125
216
343
512
729


## Define functions¶

In [14]:
def hello():
print("Hello ESSAI")

In [15]:
hello()

Hello ESSAI

In [16]:
def add_numbers(a,b):
print((a+b)/2)

In [17]:
add_numbers(4,9)

6.5


Check if a number is odd or even

In [18]:
def even_check(a):
if a%2==0:
print("It's even")
else:
print("It's odd")

In [19]:
even_check(9)

It's odd


## List¶

In [20]:
list1=["A","B","C","D","F"]

In [21]:
list2=[2,4,10]

In [22]:
list1[0]

Out[22]:
'A'
In [23]:
list2[1]

Out[23]:
4
In [24]:
list1[0:3]

Out[24]:
['A', 'B', 'C']
In [25]:
list1+list2

Out[25]:
['A', 'B', 'C', 'D', 'F', 2, 4, 10]

## Importing csv file¶

In [26]:
import pandas as pd

In [27]:
import glob

In [28]:
print(glob.glob("*.csv"))

['gdp-world.csv', 'Salary_data.csv', 'student.csv', 'decathlon.csv', 'Data delegation Economy  Polit-Table 1.csv', 'startups.csv', 'idr_gouv.csv']

In [29]:
print(glob.glob("*.html"))

['first_code.html', 'regression_model.html']

In [30]:
decat=pd.read_csv('decathlon.csv')

In [31]:
decat

Out[31]:
Unnamed: 0 100m Long.jump Shot.put High.jump 400m 110m.hurdle Discus Pole.vault Javeline 1500m Rank Points Competition
0 SEBRLE 11.04 7.58 14.83 2.07 49.81 14.69 43.75 5.02 63.19 291.70 1 8217 Decastar
1 CLAY 10.76 7.40 14.26 1.86 49.37 14.05 50.72 4.92 60.15 301.50 2 8122 Decastar
2 KARPOV 11.02 7.30 14.77 2.04 48.37 14.09 48.95 4.92 50.31 300.20 3 8099 Decastar
3 BERNARD 11.02 7.23 14.25 1.92 48.93 14.99 40.87 5.32 62.77 280.10 4 8067 Decastar
4 YURKOV 11.34 7.09 15.19 2.10 50.42 15.31 46.26 4.72 63.44 276.40 5 8036 Decastar
5 WARNERS 11.11 7.60 14.31 1.98 48.68 14.23 41.10 4.92 51.77 278.10 6 8030 Decastar
6 ZSIVOCZKY 11.13 7.30 13.48 2.01 48.62 14.17 45.67 4.42 55.37 268.00 7 8004 Decastar
7 McMULLEN 10.83 7.31 13.76 2.13 49.91 14.38 44.41 4.42 56.37 285.10 8 7995 Decastar
8 MARTINEAU 11.64 6.81 14.57 1.95 50.14 14.93 47.60 4.92 52.33 262.10 9 7802 Decastar
9 HERNU 11.37 7.56 14.41 1.86 51.10 15.06 44.99 4.82 57.19 285.10 10 7733 Decastar
10 BARRAS 11.33 6.97 14.09 1.95 49.48 14.48 42.10 4.72 55.40 282.00 11 7708 Decastar
11 NOOL 11.33 7.27 12.68 1.98 49.20 15.29 37.92 4.62 57.44 266.60 12 7651 Decastar
12 BOURGUIGNON 11.36 6.80 13.46 1.86 51.16 15.67 40.49 5.02 54.68 291.70 13 7313 Decastar
13 Sebrle 10.85 7.84 16.36 2.12 48.36 14.05 48.72 5.00 70.52 280.01 1 8893 OlympicG
14 Clay 10.44 7.96 15.23 2.06 49.19 14.13 50.11 4.90 69.71 282.00 2 8820 OlympicG
15 Karpov 10.50 7.81 15.93 2.09 46.81 13.97 51.65 4.60 55.54 278.11 3 8725 OlympicG
16 Macey 10.89 7.47 15.73 2.15 48.97 14.56 48.34 4.40 58.46 265.42 4 8414 OlympicG
17 Warners 10.62 7.74 14.48 1.97 47.97 14.01 43.73 4.90 55.39 278.05 5 8343 OlympicG
18 Zsivoczky 10.91 7.14 15.31 2.12 49.40 14.95 45.62 4.70 63.45 269.54 6 8287 OlympicG
19 Hernu 10.97 7.19 14.65 2.03 48.73 14.25 44.72 4.80 57.76 264.35 7 8237 OlympicG
20 Nool 10.80 7.53 14.26 1.88 48.81 14.80 42.05 5.40 61.33 276.33 8 8235 OlympicG
21 Bernard 10.69 7.48 14.80 2.12 49.13 14.17 44.75 4.40 55.27 276.31 9 8225 OlympicG
22 Schwarzl 10.98 7.49 14.01 1.94 49.76 14.25 42.43 5.10 56.32 273.56 10 8102 OlympicG
23 Pogorelov 10.95 7.31 15.10 2.06 50.79 14.21 44.60 5.00 53.45 287.63 11 8084 OlympicG
24 Schoenbeck 10.90 7.30 14.77 1.88 50.30 14.34 44.41 5.00 60.89 278.82 12 8077 OlympicG
25 Barras 11.14 6.99 14.91 1.94 49.41 14.37 44.83 4.60 64.55 267.09 13 8067 OlympicG
26 Smith 10.85 6.81 15.24 1.91 49.27 14.01 49.02 4.20 61.52 272.74 14 8023 OlympicG
27 Averyanov 10.55 7.34 14.44 1.94 49.72 14.39 39.88 4.80 54.51 271.02 15 8021 OlympicG
28 Ojaniemi 10.68 7.50 14.97 1.94 49.12 15.01 40.35 4.60 59.26 275.71 16 8006 OlympicG
29 Smirnov 10.89 7.07 13.88 1.94 49.11 14.77 42.47 4.70 60.88 263.31 17 7993 OlympicG
30 Qi 11.06 7.34 13.55 1.97 49.65 14.78 45.13 4.50 60.79 272.63 18 7934 OlympicG
31 Drews 10.87 7.38 13.07 1.88 48.51 14.01 40.11 5.00 51.53 274.21 19 7926 OlympicG
32 Parkhomenko 11.14 6.61 15.69 2.03 51.04 14.88 41.90 4.80 65.82 277.94 20 7918 OlympicG
33 Terek 10.92 6.94 15.15 1.94 49.56 15.12 45.62 5.30 50.62 290.36 21 7893 OlympicG
34 Gomez 11.08 7.26 14.57 1.85 48.61 14.41 40.95 4.40 60.71 269.70 22 7865 OlympicG
35 Turi 11.08 6.91 13.62 2.03 51.67 14.26 39.83 4.80 59.34 290.01 23 7708 OlympicG
36 Lorenzo 11.10 7.03 13.22 1.85 49.34 15.38 40.22 4.50 58.36 263.08 24 7592 OlympicG
37 Karlivans 11.33 7.26 13.30 1.97 50.54 14.98 43.34 4.50 52.92 278.67 25 7583 OlympicG
38 Korkizoglou 10.86 7.07 14.81 1.94 51.16 14.96 46.07 4.70 53.05 317.00 26 7573 OlympicG
39 Uldal 11.23 6.99 13.53 1.85 50.95 15.09 43.01 4.50 60.00 281.70 27 7495 OlympicG
40 Casarsa 11.36 6.68 14.92 1.94 53.20 15.39 48.66 4.40 58.62 296.12 28 7404 OlympicG
In [32]:
decat['100m']

Out[32]:
0     11.04
1     10.76
2     11.02
3     11.02
4     11.34
5     11.11
6     11.13
7     10.83
8     11.64
9     11.37
10    11.33
11    11.33
12    11.36
13    10.85
14    10.44
15    10.50
16    10.89
17    10.62
18    10.91
19    10.97
20    10.80
21    10.69
22    10.98
23    10.95
24    10.90
25    11.14
26    10.85
27    10.55
28    10.68
29    10.89
30    11.06
31    10.87
32    11.14
33    10.92
34    11.08
35    11.08
36    11.10
37    11.33
38    10.86
39    11.23
40    11.36
Name: 100m, dtype: float64
In [33]:
decat.describe()

Out[33]:
100m Long.jump Shot.put High.jump 400m 110m.hurdle Discus Pole.vault Javeline 1500m Rank Points
count 41.000000 41.000000 41.000000 41.000000 41.000000 41.000000 41.000000 41.000000 41.000000 41.000000 41.000000 41.000000
mean 10.998049 7.260000 14.477073 1.976829 49.616341 14.605854 44.325610 4.762439 58.316585 279.024878 12.121951 8005.365854
std 0.263023 0.316402 0.824428 0.088951 1.153451 0.471789 3.377845 0.278000 4.826820 11.673247 7.918949 342.385145
min 10.440000 6.610000 12.680000 1.850000 46.810000 13.970000 37.920000 4.200000 50.310000 262.100000 1.000000 7313.000000
25% 10.850000 7.030000 13.880000 1.920000 48.930000 14.210000 41.900000 4.500000 55.270000 271.020000 6.000000 7802.000000
50% 10.980000 7.300000 14.570000 1.950000 49.400000 14.480000 44.410000 4.800000 58.360000 278.050000 11.000000 8021.000000
75% 11.140000 7.480000 14.970000 2.040000 50.300000 14.980000 46.070000 4.920000 60.890000 285.100000 18.000000 8122.000000
max 11.640000 7.960000 16.360000 2.150000 53.200000 15.670000 51.650000 5.400000 70.520000 317.000000 28.000000 8893.000000
In [34]:
decat.Competition

Out[34]:
0     Decastar
1     Decastar
2     Decastar
3     Decastar
4     Decastar
5     Decastar
6     Decastar
7     Decastar
8     Decastar
9     Decastar
10    Decastar
11    Decastar
12    Decastar
13    OlympicG
14    OlympicG
15    OlympicG
16    OlympicG
17    OlympicG
18    OlympicG
19    OlympicG
20    OlympicG
21    OlympicG
22    OlympicG
23    OlympicG
24    OlympicG
25    OlympicG
26    OlympicG
27    OlympicG
28    OlympicG
29    OlympicG
30    OlympicG
31    OlympicG
32    OlympicG
33    OlympicG
34    OlympicG
35    OlympicG
36    OlympicG
37    OlympicG
38    OlympicG
39    OlympicG
40    OlympicG
Name: Competition, dtype: object
In [35]:
decat.Javeline

Out[35]:
0     63.19
1     60.15
2     50.31
3     62.77
4     63.44
5     51.77
6     55.37
7     56.37
8     52.33
9     57.19
10    55.40
11    57.44
12    54.68
13    70.52
14    69.71
15    55.54
16    58.46
17    55.39
18    63.45
19    57.76
20    61.33
21    55.27
22    56.32
23    53.45
24    60.89
25    64.55
26    61.52
27    54.51
28    59.26
29    60.88
30    60.79
31    51.53
32    65.82
33    50.62
34    60.71
35    59.34
36    58.36
37    52.92
38    53.05
39    60.00
40    58.62
Name: Javeline, dtype: float64

## Importing xls files¶

Let us first write a function that searchs xls files in a given forlder

In [36]:
import os, fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result

In [37]:
cwd = os.getcwd()

In [38]:
cwd

Out[38]:
'/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python'
In [39]:
find("*.xlsx",'/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python')

Out[39]:
['/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python/WA_Fn-UseC_-HR-Employee-Attrition.xlsx',
'/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python/migration.xlsx',
'/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python/~\$WA_Fn-UseC_-HR-Employee-Attrition.xlsx',
'/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python/data-wrangling-master/data/chp9/hiv_aids_2014.xlsx',
'/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python/data-wrangling-master/data/unicef/hiv_aids_2014.xlsx',
'/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python/data-wrangling-master/data/chp4/SOWC 2014 Stat Tables_Table 9.xlsx']
In [40]:
fxls = pd.ExcelFile("/Users/dhafermalouche/Documents/Teaching_2018_2019/Teaching_python/migration.xlsx")

In [41]:
fxls

Out[41]:
<pandas.io.excel.ExcelFile at 0x114233898>

How to know the names of the sheets in your new imported xlsx file?

In [42]:
nsheets=fxls.sheet_names

In [43]:
nsheets

Out[43]:
['Données', 'Cartes', 'Graphique']

fxls_donnees= fxls.parse(2)

fxls_donnees