Python pour la data science

Lino Galiana

doi:10.5281/zenodo.8229676

La partie Pandas a évolué récemment. Vous pouvez retrouver les contenus liés à Pandas dans les chapitres suivants:

Informations additionnelles

Environnement Python

Ce site a été construit automatiquement par le biais d’une action Github utilisant le logiciel de publication reproductible Quarto (version 1.8.26).

L’environnement utilisé pour obtenir les résultats est reproductible par le biais d’uv. Le fichier pyproject.toml utilisé pour construire cet environnement est disponible sur le dépôt linogaliana/python-datascientist

pyproject.toml

[project]
name = "python-datascientist"
version = "0.1.0"
description = "Source code for Lino Galiana's Python for data science course"
readme = "README.md"
requires-python = ">=3.12,<3.13"
dependencies = [
    "altair==5.4.1",
    "black==24.8.0",
    "cartiflette",
    "contextily==1.6.2",
    "duckdb>=0.10.1",
    "folium>=0.19.6",
    "geoplot==0.5.1",
    "graphviz==0.20.3",
    "great-tables==0.12.0",
    "ipykernel>=6.29.5",
    "jupyter>=1.1.1",
    "jupyter-cache==1.0.0",
    "kaleido==0.2.1",
    "langchain-community==0.3.9",
    "loguru==0.7.3",
    "markdown>=3.8",
    "nbclient==0.10.0",
    "nbformat==5.10.4",
    "nltk>=3.9.1",
    "pip>=25.1.1",
    "plotly>=6.1.2",
    "plotnine>=0.15",
    "polars==1.8.2",
    "pyarrow==17.0.0",
    "pynsee==0.1.8",
    "python-dotenv==1.0.1",
    "pywaffle==1.1.1",
    "requests>=2.32.3",
    "scikit-image==0.24.0",
    "scipy==1.13.0",
    "spacy==3.8.4",
    "webdriver-manager==4.0.2",
    "wordcloud==1.9.3",
    "xlrd==2.0.1",
    "yellowbrick==1.5",
]

[tool.uv.sources]
cartiflette = { git = "https://github.com/inseefrlab/cartiflette" }

[dependency-groups]
dev = [
    "nb-clean>=4.0.1",
]

Pour utiliser exactement le même environnement (version de Python et packages), se reporter à la documentation d’uv.

Historique du fichier

md`Ce fichier a été modifié __${table_commit.length}__ fois depuis sa création le ${creation_string} (dernière modification le ${last_modification_string})`

html`<div>${git_history_table}</div>`

html`<div>${git_history_plot}</div>`

SHA	Date	Author	Description
91431fa2	2025-06-09 17:08:00	Lino Galiana	Improve homepage hero banner (#612)
e0d615e3	2024-05-03 11:15:29	Lino Galiana	Restructure la partie Pandas (#497)
c9f9f8a7	2024-04-24 15:09:35	Lino Galiana	Dark mode and CSS improvements (#494)
d75641d7	2024-04-22 18:59:01	Lino Galiana	Editorialisation des chapitres de manipulation de données (#491)
c03aa619	2024-01-16 17:33:18	Lino Galiana	Exercice sur les chemins relatifs (#483)
056c6068	2023-12-20 20:08:25	linogaliana	Change pandas image
005d89b8	2023-12-20 17:23:04	Lino Galiana	Finalise l’affichage des statistiques Git (#478)
3fba6124	2023-12-17 18:16:42	Lino Galiana	Remove some badges from python (#476)
16842200	2023-12-02 12:06:40	Antoine Palazzolo	Première partie de relecture de fin du cours (#467)
1f23de28	2023-12-01 17:25:36	Lino Galiana	Stockage des images sur S3 (#466)
a06a2689	2023-11-23 18:23:28	Antoine Palazzolo	2ème relectures chapitres ML (#457)
09654c71	2023-11-14 15:16:44	Antoine Palazzolo	Suggestions Git & Visualisation (#449)
cef6a0dd	2023-10-18 13:18:46	Lino Galiana	Allègement des actions github (#437)
97676f56	2023-10-14 17:56:44	Lino Galiana	Du style pour le site (#434)
7221e7b2	2023-10-10 14:00:44	Thomas Faria	Relecture Thomas TD Pandas (#431)
a7711832	2023-10-09 11:27:45	Antoine Palazzolo	Relecture TD2 par Antoine (#418)
ac80862b	2023-10-07 21:05:25	Lino Galiana	Relecture antuki (#427)
7e03cea7	2023-10-04 14:07:17	Lino Galiana	Clean pandas tutorial and exercises (#417)
e8d0062d	2023-09-26 15:54:49	Kim A	Relecture KA 25/09/2023 (#412)
154f09e4	2023-09-26 14:59:11	Antoine Palazzolo	Des typos corrigées par Antoine (#411)
9a4e2267	2023-08-28 17:11:52	Lino Galiana	Action to check URL still exist (#399)
80823022	2023-08-25 17:48:36	Lino Galiana	Mise à jour des scripts de construction des notebooks (#395)
3bdf3b06	2023-08-25 11:23:02	Lino Galiana	Simplification de la structure 🤓 (#393)
c312bdce	2023-08-11 18:06:25	Lino Galiana	A few controls for Quarto website (#389)
5d4874a8	2023-08-11 15:09:33	Lino Galiana	Pimp les introductions des trois premières parties (#387)
dde3e934	2023-07-21 22:22:05	Lino Galiana	Fix bug on chapter order (#385)
3560f1f8	2023-07-21 17:04:56	Lino Galiana	Build on smaller sized image (#384)
f146354c	2023-07-21 18:15:10	Lino Galiana	Update index.qmd
f6dde337	2023-07-18 22:32:00	Lino Galiana	Change badges (#376)
143e706c	2023-07-18 19:37:28	Lino Galiana	Améliore la navigation (#375)
130ed717	2023-07-18 19:37:11	Lino Galiana	Restructure les titres (#374)
ef28fefd	2023-07-07 08:14:42	Lino Galiana	Listing pour la première partie (#369)
64baaf81	2023-07-03 17:05:53	Lino Galiana	Script for branch deploy (#367)
f21a24d3	2023-07-02 10:58:15	Lino Galiana	Pipeline Quarto & Pages 🚀 (#365)
867325e4	2023-06-11 13:56:43	Lino Galiana	Add numeric_only argument (#359)
99188177	2022-12-30 15:10:59	Lino Galiana	Retour sur le chapitre DallE / StableDiffusion (#344)
94e7c0a2	2022-12-29 09:42:35	Lino Galiana	pip install pynsee (#342)
a8dd720f	2022-12-26 21:35:52	Lino Galiana	Improve aesthetics on Github (#338)
e2b53ac9	2022-09-28 17:09:31	Lino Galiana	Retouche les chapitres pandas (#287)
eb8f9220	2022-09-22 17:40:43	Lino Galiana	Corrige bug TP pandas (#276)
fd439f03	2022-09-19 09:37:50	avouacr	fix ssp cloud links
3056d410	2022-09-02 12:19:55	avouacr	fix all SSP Cloud launcher links
8042a167	2022-08-24 16:23:36	Lino Galiana	Box pour les notebooks :sparkles: (#256)
494a85ae	2022-08-05 14:49:56	Lino Galiana	Images featured ✨ (#252)
d201e3cd	2022-08-03 15:50:34	Lino Galiana	Pimp la homepage ✨ (#249)
2360ff7b	2022-08-02 16:29:57	Lino Galiana	Test wowchemy update (#247)
d3a54066	2022-06-27 17:44:30	Lino Galiana	Utilisation test du système de référence de quarto (#240)
1239e3e9	2022-06-21 14:05:15	Lino Galiana	Enonces (#239)
48606ddd	2022-05-31 19:05:11	Lino Galiana	Amélioration rendu dataframe pandas (#229)
12965bac	2022-05-25 15:53:27	Lino Galiana	:launch: Bascule vers quarto (#226)
9c71d6e7	2022-03-08 10:34:26	Lino Galiana	Plus d’éléments sur S3 (#218)
5cac236e	2021-12-16 19:46:43	Lino Galiana	un petit mot sur mercator (#201)
6777f038	2021-10-29 09:38:09	Lino Galiana	Notebooks corrections (#171)
2a8809fb	2021-10-27 12:05:34	Lino Galiana	Simplification des hooks pour gagner en flexibilité et clarté (#166)
5ad057f6	2021-10-10 15:13:16	Lino Galiana	Relectures pandas & geopandas (#159)
48706626	2021-10-05 08:29:33	Romain Avouac	fix and simplify pyinsee install (#157)
06779326	2021-10-03 15:32:51	Lino Galiana	Ajoute un code pour download pynsee (#156)
2fa78c9f	2021-09-27 11:24:19	Lino Galiana	Relecture de la partie numpy/pandas (#152)
85ba1194	2021-09-16 11:27:56	Lino Galiana	Relectures des TP KA avant 1er cours (#142)
2f4d3905	2021-09-02 15:12:29	Lino Galiana	Utilise un shortcode github (#131)
2e4d5862	2021-09-02 12:03:39	Lino Galiana	Simplify badges generation (#130)
4a317e39	2021-08-31 12:38:17	Lino Galiana	pynsee pour importer des données Insee 🚀 (#127)
2f7b52d9	2021-07-20 17:37:03	Lino Galiana	Improve notebooks automatic creation (#120)
6729a724	2021-06-22 18:07:05	Lino Galiana	Mise à jour badge onyxia (#115)
4cdb759c	2021-05-12 10:37:23	Lino Galiana	:sparkles: :star2: Nouveau thème hugo :snake: :fire: (#105)
175d377f	2021-05-04 18:29:26	Raphaele Adjerad	Quelques manipulations supplémentaires pandas (#106)
7f9f97bc	2021-04-30 21:44:04	Lino Galiana	🐳 + 🐍 New workflow (docker 🐳) and new dataset for modelization (2020 🇺🇸 elections) (#99)
0a0d0348	2021-03-26 20:16:22	Lino Galiana	Ajout d’une section sur S3 (#97)
6d010fa2	2020-09-29 18:45:34	Lino Galiana	Simplifie l’arborescence du site, partie 1 (#57)
66f9f87a	2020-09-24 19:23:04	Lino Galiana	Introduction des figures générées par python dans le site (#52)
76e206c0	2020-09-09 18:02:08	Lino Galiana	Finalisation du chapitre pandas (#24)
5c1e76d9	2020-09-09 11:25:38	Lino Galiana	Ajout des éléments webscraping, regex, API (#21)
d48e68fa	2020-09-08 18:35:07	Lino Galiana	Continuer la partie pandas (#13)
85365cad	2020-09-05 14:50:10	linogaliana	ajout badges onyxia
611be4dd	2020-09-05 14:27:47	linogaliana	modifs marginales
05593982	2020-09-05 14:22:55	linogaliana	modifs marginales
9c12c2cb	2020-09-04 17:39:09	Lino Galiana	Introduction à pandas (#11)

creation = d3.min(
  table_commit.map(d => new Date(d.Date))
)

last_modification = d3.max(
  table_commit.map(d => new Date(d.Date))
)

creation_string = creation.toLocaleString("fr", {
  "day": "numeric",
  "month": "long",
  "year": "numeric"
})

last_modification_string = last_modification.toLocaleString("fr", {
  "day": "numeric",
  "month": "long",
  "year": "numeric"
})

git_history_table = Inputs.table(
  table_commit,
  {
    format: {
      SHA: x => md`[${x}](${github_repo}/commit/${x})`,
      Description: x => md`${replacePullRequestPattern(x, github_repo)}`,
      /*Date: x => x.toLocaleString("fr", {
        "month": "numeric",
        "day": "numeric",
        "year": "numeric"
        })
      */
    }
  }
)

git_history_plot = Plot.plot({
  marks: [
    Plot.ruleY([0], {stroke: "royalblue"}),
    Plot.dot(
          table_commit,
          Plot.pointerX({x: (d) => new Date(d.date), y: 0, stroke: "red"})),
    Plot.dot(table_commit, {x: (d) => new Date(d.Date), y: 0, fill: "royalblue"})
  ]
})

function replacePullRequestPattern(inputString, githubRepo) {
    // Use a regular expression to match the pattern #digit
    var pattern = /#(\d+)/g;

    // Replace the pattern with ${github_repo}/pull/#digit
    var replacedString = inputString.replace(pattern, '[#$1](' + githubRepo + '/pull/$1)');

    return replacedString;
}

github_repo = "https://github.com/linogaliana/python-datascientist"

table_commit = {

// Get the HTML table by its class name
var table = document.querySelector('.commit-table');

// Check if the table exists
if (table) {
    // Initialize an array to store the table data
    var dataArray = [];

    // Extract headers from the first row
    var headers = [];
    for (var i = 0; i < table.rows[0].cells.length; i++) {
        headers.push(table.rows[0].cells[i].textContent.trim());
    }

    // Iterate through the rows, starting from the second row
    for (var i = 1; i < table.rows.length; i++) {
        var row = table.rows[i];
        var rowData = {};

        // Iterate through the cells in the row
        for (var j = 0; j < row.cells.length; j++) {
            // Use headers as keys and cell content as values
            rowData[headers[j]] = row.cells[j].textContent.trim();
        }

        // Push the rowData object to the dataArray
        dataArray.push(rowData);
    }
  }

  return dataArray

}

// Get the element with class 'git-details'
{
  var gitDetails = document.querySelector('.commit-table');

  // Check if the element exists
  if (gitDetails) {
      // Hide the element
      gitDetails.style.display = 'none';
  }
}

Plot = require('@observablehq/plot@0.6.12/dist/plot.umd.min.js')

Retour au sommet

Citation

BibTeX

@book{galiana2023,
  author = {Galiana, Lino},
  title = {Python pour la data science},
  date = {2023},
  url = {https://pythonds.linogaliana.fr/},
  doi = {10.5281/zenodo.8229676},
  langid = {fr}
}

Veuillez citer ce travail comme suit :

Galiana, Lino. 2023. Python pour la data science. https://doi.org/10.5281/zenodo.8229676.