Python pour la data science

Lino Galiana

doi:10.5281/zenodo.8229676

La partie Pandas a évolué récemment. Vous pouvez retrouver les contenus liés à Pandas dans les chapitres suivants:

Informations additionnelles

Environnement Python

Ce site a été construit automatiquement par le biais d’une action Github utilisant le logiciel de publication reproductible Quarto (version 1.8.26).

L’environnement utilisé pour obtenir les résultats est reproductible par le biais d’uv. Le fichier pyproject.toml utilisé pour construire cet environnement est disponible sur le dépôt linogaliana/python-datascientist

pyproject.toml

[project]
name = "python-datascientist"
version = "0.1.0"
description = "Source code for Lino Galiana's Python for data science course"
readme = "README.md"
requires-python = ">=3.12,<3.13"
dependencies = [
    "altair==5.4.1",
    "black==24.8.0",
    "cartiflette",
    "contextily==1.6.2",
    "duckdb>=0.10.1",
    "folium>=0.19.6",
    "geoplot==0.5.1",
    "graphviz==0.20.3",
    "great-tables==0.12.0",
    "ipykernel>=6.29.5",
    "jupyter>=1.1.1",
    "jupyter-cache==1.0.0",
    "kaleido==0.2.1",
    "langchain-community==0.3.9",
    "loguru==0.7.3",
    "markdown>=3.8",
    "nbclient==0.10.0",
    "nbformat==5.10.4",
    "nltk>=3.9.1",
    "pip>=25.1.1",
    "plotly>=6.1.2",
    "plotnine>=0.15",
    "polars==1.8.2",
    "pyarrow==17.0.0",
    "pynsee==0.1.8",
    "python-dotenv==1.0.1",
    "pywaffle==1.1.1",
    "requests>=2.32.3",
    "scikit-image==0.24.0",
    "scipy==1.13.0",
    "spacy==3.8.4",
    "webdriver-manager==4.0.2",
    "wordcloud==1.9.3",
    "xlrd==2.0.1",
    "yellowbrick==1.5",
]

[tool.uv.sources]
cartiflette = { git = "https://github.com/inseefrlab/cartiflette" }

[dependency-groups]
dev = [
    "nb-clean>=4.0.1",
]

Pour utiliser exactement le même environnement (version de Python et packages), se reporter à la documentation d’uv.

Historique du fichier

md`Ce fichier a été modifié __${table_commit.length}__ fois depuis sa création le ${creation_string} (dernière modification le ${last_modification_string})`

html`<div>${git_history_table}</div>`

html`<div>${git_history_plot}</div>`

SHA	Date	Author	Description
91431fa2	2025-06-09 17:08:00	Lino Galiana	Improve homepage hero banner (#612)
e0d615e3	2024-05-03 11:15:29	Lino Galiana	Restructure la partie Pandas (#497)
4d678bfa	2024-04-23 12:09:46	linogaliana	Typo
d75641d7	2024-04-22 18:59:01	Lino Galiana	Editorialisation des chapitres de manipulation de données (#491)
7298c6c1	2024-04-19 21:34:00	Lino Galiana	Exercice great_tables (#489)
c03aa619	2024-01-16 17:33:18	Lino Galiana	Exercice sur les chemins relatifs (#483)
005d89b8	2023-12-20 17:23:04	Lino Galiana	Finalise l’affichage des statistiques Git (#478)
3fba6124	2023-12-17 18:16:42	Lino Galiana	Remove some badges from python (#476)
16842200	2023-12-02 12:06:40	Antoine Palazzolo	Première partie de relecture de fin du cours (#467)
1f23de28	2023-12-01 17:25:36	Lino Galiana	Stockage des images sur S3 (#466)
a06a2689	2023-11-23 18:23:28	Antoine Palazzolo	2ème relectures chapitres ML (#457)
889a71ba	2023-11-10 11:40:51	Antoine Palazzolo	Modification TP 3 (#443)
8071bbb1	2023-10-23 17:43:37	tomseimandi	Make minor changes to 02b, 03, 04a (#440)
df01f019	2023-10-10 15:55:04	Lino Galiana	Menus automatisés (#432)
7221e7b2	2023-10-10 14:00:44	Thomas Faria	Relecture Thomas TD Pandas (#431)
98bb8868	2023-10-09 11:50:03	Lino Galiana	typo
a7711832	2023-10-09 11:27:45	Antoine Palazzolo	Relecture TD2 par Antoine (#418)
ac80862b	2023-10-07 21:05:25	Lino Galiana	Relecture antuki (#427)
5ab34aa4	2023-10-04 14:54:20	Kim A	Relecture Kim pandas & git (#416)
7e03cea7	2023-10-04 14:07:17	Lino Galiana	Clean pandas tutorial and exercises (#417)
154f09e4	2023-09-26 14:59:11	Antoine Palazzolo	Des typos corrigées par Antoine (#411)
9a4e2267	2023-08-28 17:11:52	Lino Galiana	Action to check URL still exist (#399)
9977c5dc	2023-08-28 10:43:36	Lino Galiana	Fix bug path pandas (#397)
a8f90c2f	2023-08-28 09:26:12	Lino Galiana	Update featured paths (#396)
3bdf3b06	2023-08-25 11:23:02	Lino Galiana	Simplification de la structure 🤓 (#393)
5d4874a8	2023-08-11 15:09:33	Lino Galiana	Pimp les introductions des trois premières parties (#387)
130ed717	2023-07-18 19:37:11	Lino Galiana	Restructure les titres (#374)
ef28fefd	2023-07-07 08:14:42	Lino Galiana	Listing pour la première partie (#369)
f21a24d3	2023-07-02 10:58:15	Lino Galiana	Pipeline Quarto & Pages 🚀 (#365)
3c880d59	2022-12-27 17:34:59	Lino Galiana	Chapitre regex + Change les boites dans plusieurs chapitres (#339)
89d0798a	2022-11-02 10:19:58	Lino Galiana	Ajoute icone aux autres TP (#317)
a3eadd4d	2022-11-01 18:51:14	Romain Avouac	Modèle de notebooks de correction exécutables (#304)
af763cc9	2022-10-12 10:17:56	Lino Galiana	Reprise exercice geopandas (#294)
1ef97df0	2022-10-11 12:14:03	Lino Galiana	Relecture chapitre geopandas (#289)
e2b53ac9	2022-09-28 17:09:31	Lino Galiana	Retouche les chapitres pandas (#287)
eb8f9220	2022-09-22 17:40:43	Lino Galiana	Corrige bug TP pandas (#276)
f10815b5	2022-08-25 16:00:03	Lino Galiana	Notebooks should now look more beautiful (#260)
494a85ae	2022-08-05 14:49:56	Lino Galiana	Images featured ✨ (#252)
d201e3cd	2022-08-03 15:50:34	Lino Galiana	Pimp la homepage ✨ (#249)
12965bac	2022-05-25 15:53:27	Lino Galiana	:launch: Bascule vers quarto (#226)
9c71d6e7	2022-03-08 10:34:26	Lino Galiana	Plus d’éléments sur S3 (#218)
6777f038	2021-10-29 09:38:09	Lino Galiana	Notebooks corrections (#171)
2a8809fb	2021-10-27 12:05:34	Lino Galiana	Simplification des hooks pour gagner en flexibilité et clarté (#166)
b138cf3e	2021-10-21 18:05:59	Lino Galiana	Mise à jour TP webscraping et API (#164)
48706626	2021-10-05 08:29:33	Romain Avouac	fix and simplify pyinsee install (#157)
06779326	2021-10-03 15:32:51	Lino Galiana	Ajoute un code pour download pynsee (#156)
2fa78c9f	2021-09-27 11:24:19	Lino Galiana	Relecture de la partie numpy/pandas (#152)
85ba1194	2021-09-16 11:27:56	Lino Galiana	Relectures des TP KA avant 1er cours (#142)
2e4d5862	2021-09-02 12:03:39	Lino Galiana	Simplify badges generation (#130)
bf5ebc5f	2021-09-01 14:41:17	Lino Galiana	Fix problem import pynsee (#128)
4a317e39	2021-08-31 12:38:17	Lino Galiana	pynsee pour importer des données Insee 🚀 (#127)
4cdb759c	2021-05-12 10:37:23	Lino Galiana	:sparkles: :star2: Nouveau thème hugo :snake: :fire: (#105)
7f9f97bc	2021-04-30 21:44:04	Lino Galiana	🐳 + 🐍 New workflow (docker 🐳) and new dataset for modelization (2020 🇺🇸 elections) (#99)
6d010fa2	2020-09-29 18:45:34	Lino Galiana	Simplifie l’arborescence du site, partie 1 (#57)
66f9f87a	2020-09-24 19:23:04	Lino Galiana	Introduction des figures générées par python dans le site (#52)
4677769b	2020-09-15 18:19:24	Lino Galiana	Nettoyage des coquilles pour premiers TP (#37)
5c1e76d9	2020-09-09 11:25:38	Lino Galiana	Ajout des éléments webscraping, regex, API (#21)
d48e68fa	2020-09-08 18:35:07	Lino Galiana	Continuer la partie pandas (#13)

creation = d3.min(
  table_commit.map(d => new Date(d.Date))
)

last_modification = d3.max(
  table_commit.map(d => new Date(d.Date))
)

creation_string = creation.toLocaleString("fr", {
  "day": "numeric",
  "month": "long",
  "year": "numeric"
})

last_modification_string = last_modification.toLocaleString("fr", {
  "day": "numeric",
  "month": "long",
  "year": "numeric"
})

git_history_table = Inputs.table(
  table_commit,
  {
    format: {
      SHA: x => md`[${x}](${github_repo}/commit/${x})`,
      Description: x => md`${replacePullRequestPattern(x, github_repo)}`,
      /*Date: x => x.toLocaleString("fr", {
        "month": "numeric",
        "day": "numeric",
        "year": "numeric"
        })
      */
    }
  }
)

git_history_plot = Plot.plot({
  marks: [
    Plot.ruleY([0], {stroke: "royalblue"}),
    Plot.dot(
          table_commit,
          Plot.pointerX({x: (d) => new Date(d.date), y: 0, stroke: "red"})),
    Plot.dot(table_commit, {x: (d) => new Date(d.Date), y: 0, fill: "royalblue"})
  ]
})

function replacePullRequestPattern(inputString, githubRepo) {
    // Use a regular expression to match the pattern #digit
    var pattern = /#(\d+)/g;

    // Replace the pattern with ${github_repo}/pull/#digit
    var replacedString = inputString.replace(pattern, '[#$1](' + githubRepo + '/pull/$1)');

    return replacedString;
}

github_repo = "https://github.com/linogaliana/python-datascientist"

table_commit = {

// Get the HTML table by its class name
var table = document.querySelector('.commit-table');

// Check if the table exists
if (table) {
    // Initialize an array to store the table data
    var dataArray = [];

    // Extract headers from the first row
    var headers = [];
    for (var i = 0; i < table.rows[0].cells.length; i++) {
        headers.push(table.rows[0].cells[i].textContent.trim());
    }

    // Iterate through the rows, starting from the second row
    for (var i = 1; i < table.rows.length; i++) {
        var row = table.rows[i];
        var rowData = {};

        // Iterate through the cells in the row
        for (var j = 0; j < row.cells.length; j++) {
            // Use headers as keys and cell content as values
            rowData[headers[j]] = row.cells[j].textContent.trim();
        }

        // Push the rowData object to the dataArray
        dataArray.push(rowData);
    }
  }

  return dataArray

}

// Get the element with class 'git-details'
{
  var gitDetails = document.querySelector('.commit-table');

  // Check if the element exists
  if (gitDetails) {
      // Hide the element
      gitDetails.style.display = 'none';
  }
}

Plot = require('@observablehq/plot@0.6.12/dist/plot.umd.min.js')

Retour au sommet

Citation

BibTeX

@book{galiana2023,
  author = {Galiana, Lino},
  title = {Python pour la data science},
  date = {2023},
  url = {https://pythonds.linogaliana.fr/},
  doi = {10.5281/zenodo.8229676},
  langid = {fr}
}

Veuillez citer ce travail comme suit :

Galiana, Lino. 2023. Python pour la data science. https://doi.org/10.5281/zenodo.8229676.