A functional Python environment for data science
Python
pour la data science en mettant l’accent sur la modularité du langage et l’utilisation des notebooks…
Lino Galiana
2025-03-19
Data science with Python
Course website Python for Data Science
, an introduction to Python
for the second year of the engineering curriculum at ENSAE
(Master 1).
All content of this group is freely available here or on Github
and can be tested in the form of Jupyter
notebooks.
Example with the introduction to Pandas
On the agenda:
Overall, this course offers a comprehensive content that can satisfy both beginners in data science and those looking for more advanced material:
Pandas
), geographic data (Geopandas
), data retrieval (web scraping, APIs)…Matplotlib
, Seaborn
), cartography, reactive visualizations (Plotly
, Folium
)Scikit
), econometricsNLTK
and SpaCy
, modeling…ElasticSearch
, continuous integration…All content on this site relies on open data, whether it is French data (mainly from the central platform data.gouv
or the Insee website) or American data. The program is presented linearly at the top of this page (👆️) or in a disordered manner below (👇️).
A good complement to the website content is the course given with Romain Avouac in the final year at ENSAE, which focuses more on the production of data science projects: https://ensae-reproductibilite.github.io/
Pour découvrir Python
de manière désordonnée. La version ordonnée est dans la partie supérieure de cette page (👆️).
function reminderBadges({
sourceFile = "content/01_toto.Rmd",
type = ['md', 'html'],
split = null,
onyxiaOnly = false,
sspCloudService = "python",
GPU = false,
correction = false
} = {}) {
if (Array.isArray(type)) {
type = type[0];
}
let notebook = sourceFile.replace(/(.Rmd|.qmd)/, ".ipynb");
if (correction) {
notebook = notebook.replace(/content/, "corrections");
} else {
notebook = notebook.replace(/content/, "notebooks");
}
const githubRepoNotebooksSimplified = "github/linogaliana/python-datascientist-notebooks";
const githubAlias = githubRepoNotebooksSimplified.replace("github", "github.com");
const githubRepoNotebooks = `https://${githubAlias}`;
let githubLink ;
if (notebook === "") {
githubLink = githubRepoNotebooks;
} else {
githubLink = `${githubRepoNotebooks}/blob/main`;
}
const notebookRelPath = `/${notebook}`;
const [section, chapter] = notebook.split("/").slice(-2);
githubLink = `<a href="${githubLink}${notebookRelPath}" class="github"><i class="fab fa-github"></i></a>`;
const sectionLatest = section.split("/").slice(-1)[0];
const chapterNoExtension = chapter.replace(".ipynb", "");
const onyxiaInitArgs = [sectionLatest, chapterNoExtension];
if (correction) {
onyxiaInitArgs.push("correction");
}
const gpuSuffix = GPU ? "-gpu" : "";
const sspcloudJupyterLinkLauncher = `https://datalab.sspcloud.fr/launcher/ide/jupyter-${sspCloudService}${gpuSuffix}?autoLaunch=true&onyxia.friendlyName=%C2%AB${chapterNoExtension}%C2%BB&init.personalInit=%C2%ABhttps%3A%2F%2Fraw.githubusercontent.com%2Flinogaliana%2Fpython-datascientist%2Fmaster%2Fsspcloud%2Finit-jupyter.sh%C2%BB&init.personalInitArgs=%C2%AB${onyxiaInitArgs.join('%20')}%C2%BB`;
let sspcloudJupyterLink;
if (type === "md") {
sspcloudJupyterLink = `[](${sspcloudJupyterLinkLauncher})`;
} else {
sspcloudJupyterLink = `<a href="${sspcloudJupyterLinkLauncher}" target="_blank" rel="noopener"><img src="https://img.shields.io/badge/SSP%20Cloud-Lancer_avec_Jupyter-orange?logo=Jupyter&logoColor=orange" alt="Onyxia"></a>`;
}
if (split === 4) {
sspcloudJupyterLink += '<br>';
}
const sspcloudVscodeLinkLauncher = `https://datalab.sspcloud.fr/launcher/ide/vscode-${sspCloudService}${gpuSuffix}?autoLaunch=true&onyxia.friendlyName=%C2%AB${chapterNoExtension}%C2%BB&init.personalInit=%C2%ABhttps%3A%2F%2Fraw.githubusercontent.com%2Flinogaliana%2Fpython-datascientist%2Fmaster%2Fsspcloud%2Finit-vscode.sh%C2%BB&init.personalInitArgs=%C2%AB${onyxiaInitArgs.join('%20')}%C2%BB`;
let sspcloudVscodeLink;
if (type === "md") {
sspcloudVscodeLink = `[](${sspcloudVscodeLinkLauncher})`;
} else {
sspcloudVscodeLink = `<a href="${sspcloudVscodeLinkLauncher}" target="_blank" rel="noopener"><img src="https://img.shields.io/badge/SSP%20Cloud-Lancer_avec_VSCode-blue?logo=visualstudiocode&logoColor=blue" alt="Onyxia"></a>`;
}
if (split === 5) {
sspcloudVscodeLink += '<br>';
}
let colabLink;
if (type === "md") {
colabLink = `[](http://colab.research.google.com/${githubRepoNotebooksSimplified}/blob/main${notebookRelPath})`;
} else {
colabLink = `<a href="https://colab.research.google.com/${githubRepoNotebooksSimplified}/blob/main${notebookRelPath}" target="_blank" rel="noopener"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>`;
}
if (split === 7) {
colabLink += '<br>';
}
let vscodeLink;
if (type === "md") {
vscodeLink = `[](https://github.dev/linogaliana/python-datascientist-notebooks${notebookRelPath})`;
} else {
vscodeLink = `<a href="https://github.dev/linogaliana/python-datascientist-notebooks${notebookRelPath}" target="_blank" rel="noopener"><img src="https://img.shields.io/static/v1?logo=visualstudiocode&label=&message=Open%20in%20Visual%20Studio%20Code&labelColor=2c2c32&color=007acc&logoColor=007acc" alt="githubdev"></a></p>`;
}
const badges = [
githubLink,
sspcloudVscodeLink,
sspcloudJupyterLink
];
if (!onyxiaOnly) {
badges.push(colabLink);
}
let result = badges.join("\n");
if (type === "html") {
result = `<p class="badges">${result}</p>`;
}
if (onyxiaOnly) {
result = `${sspcloudJupyterLink}${sspcloudVscodeLink}`;
}
return result;
}
function printBadges({
fpath,
onyxiaOnly = false,
split = 5,
type = "html",
sspCloudService = "python",
GPU = false,
correction = false
} = {}) {
const badges = reminderBadges({
sourceFile: fpath,
type: type,
split: split,
onyxiaOnly: onyxiaOnly,
sspCloudService: sspCloudService,
GPU: GPU,
correction: correction
});
return badges
}
@book{galiana2023,
author = {Galiana, Lino},
title = {Python Pour La Data Science},
date = {2023},
url = {https://pythonds.linogaliana.fr/},
doi = {10.5281/zenodo.8229676},
langid = {en}
}