Python pour la data science

Lino Galiana

doi:10.5281/zenodo.8229676

La partie GeoPandas a évolué récemment. Vous pouvez retrouver les contenus liés à GeoPandas dans le chapitre suivant (éléments magistraux et exercices):

Introduction aux données spatiales avec Geopandas

Informations additionnelles

Environnement Python

Ce site a été construit automatiquement par le biais d’une action Github utilisant le logiciel de publication reproductible Quarto (version 1.8.26).

L’environnement utilisé pour obtenir les résultats est reproductible par le biais d’uv. Le fichier pyproject.toml utilisé pour construire cet environnement est disponible sur le dépôt linogaliana/python-datascientist

pyproject.toml

[project]
name = "python-datascientist"
version = "0.1.0"
description = "Source code for Lino Galiana's Python for data science course"
readme = "README.md"
requires-python = ">=3.13,<3.14"
dependencies = [
    "altair>=6.0.0",
    "black==24.8.0",
    "cartiflette",
    "contextily==1.6.2",
    "duckdb>=0.10.1",
    "folium>=0.19.6",
    "gdal!=3.11.1",
    "geoplot==0.5.1",
    "graphviz==0.20.3",
    "great-tables>=0.12.0",
    "gt-extras>=0.0.8",
    "ipykernel>=6.29.5",
    "jupyter>=1.1.1",
    "jupyter-cache==1.0.0",
    "kaleido==0.2.1",
    "langchain-community>=0.3.27",
    "loguru==0.7.3",
    "markdown>=3.8",
    "nbclient==0.10.0",
    "nbformat==5.10.4",
    "nltk>=3.9.1",
    "pip>=25.1.1",
    "plotly>=6.1.2",
    "plotnine>=0.15",
    "polars==1.8.2",
    "pyarrow>=17.0.0",
    "pynsee==0.1.8",
    "python-dotenv==1.0.1",
    "python-frontmatter>=1.1.0",
    "pywaffle==1.1.1",
    "requests>=2.32.3",
    "scikit-image==0.24.0",
    "scipy>=1.13.0",
    "selenium<4.39.0",
    "spacy>=3.8.4",
    "webdriver-manager==4.0.2",
    "wordcloud==1.9.3",
]

[tool.uv.sources]
cartiflette = { git = "https://github.com/inseefrlab/cartiflette" }
gdal = [
  { index = "gdal-wheels", marker = "sys_platform == 'linux'" },
  { index = "geospatial_wheels", marker = "sys_platform == 'win32'" },
]

[[tool.uv.index]]
name = "geospatial_wheels"
url = "https://nathanjmcdougall.github.io/geospatial-wheels-index/"
explicit = true

[[tool.uv.index]]
name = "gdal-wheels"
url = "https://gitlab.com/api/v4/projects/61637378/packages/pypi/simple"
explicit = true

[dependency-groups]
dev = [
    "nb-clean>=4.0.1",
]

Pour utiliser exactement le même environnement (version de Python et packages), se reporter à la documentation d’uv.

Historique du fichier

md`Ce fichier a été modifié __${table_commit.length}__ fois depuis sa création le ${creation_string} (dernière modification le ${last_modification_string})`

html`<div>${git_history_table}</div>`

html`<div>${git_history_plot}</div>`

SHA	Date	Author	Description
91431fa2	2025-06-09 17:08:00	Lino Galiana	Improve homepage hero banner (#612)
91bfa525	2024-05-27 15:01:32	Lino Galiana	Restructuration partie geopandas (#500)
06d003a1	2024-04-23 10:09:22	Lino Galiana	Continue la restructuration des sous-parties (#492)
8c316d0a	2024-04-05 19:00:59	Lino Galiana	Fix cartiflette deprecated snippets (#487)
ce33d5dc	2024-01-16 15:47:22	Lino Galiana	Adapte les exemples de code de cartiflette (#482)
005d89b8	2023-12-20 17:23:04	Lino Galiana	Finalise l’affichage des statistiques Git (#478)
3fba6124	2023-12-17 18:16:42	Lino Galiana	Remove some badges from python (#476)
1f23de28	2023-12-01 17:25:36	Lino Galiana	Stockage des images sur S3 (#466)
69cf52bd	2023-11-21 16:12:37	Antoine Palazzolo	[On-going] Suggestions chapitres modélisation (#452)
09654c71	2023-11-14 15:16:44	Antoine Palazzolo	Suggestions Git & Visualisation (#449)
8728352d	2023-10-24 11:50:08	Lino Galiana	Correction coquilles geopandas (#444)
102ce9fd	2023-10-22 11:39:37	Thomas Faria	Relecture Thomas, première partie (#438)
a7711832	2023-10-09 11:27:45	Antoine Palazzolo	Relecture TD2 par Antoine (#418)
f8831e77	2023-10-09 10:53:34	Lino Galiana	Relecture antuki geopandas (#429)
154f09e4	2023-09-26 14:59:11	Antoine Palazzolo	Des typos corrigées par Antoine (#411)
9a4e2267	2023-08-28 17:11:52	Lino Galiana	Action to check URL still exist (#399)
a8f90c2f	2023-08-28 09:26:12	Lino Galiana	Update featured paths (#396)
a9198dee	2023-08-25 18:33:00	Lino Galiana	Geopandas tutorial
3bdf3b06	2023-08-25 11:23:02	Lino Galiana	Simplification de la structure 🤓 (#393)
130ed717	2023-07-18 19:37:11	Lino Galiana	Restructure les titres (#374)
f0c583c0	2023-07-07 14:12:22	Lino Galiana	Images viz (#371)
f21a24d3	2023-07-02 10:58:15	Lino Galiana	Pipeline Quarto & Pages 🚀 (#365)
8d81b5f2	2023-02-18 18:21:59	Lino Galiana	Change source get_vectorfile (#355)
d2eb6c2f	2023-02-18 17:15:35	Lino Galiana	Update index.qmd
3912a7ea	2023-02-07 17:18:25	Lino Galiana	Back to IGN provider (#350)
0312041b	2022-12-11 13:43:49	Lino Galiana	reprend box de geopandas (#332)
6662800b	2022-10-28 11:14:27	Lino Galiana	Change IGN dataset provider (#308)
f394b233	2022-10-13 14:32:05	Lino Galiana	Dernieres modifs geopandas (#298)
8df6bbc6	2022-10-12 11:50:57	Lino Galiana	Corrige les tags du tuto geopandas (#295)
af763cc9	2022-10-12 10:17:56	Lino Galiana	Reprise exercice geopandas (#294)
1ef97df0	2022-10-11 12:14:03	Lino Galiana	Relecture chapitre geopandas (#289)
f10815b5	2022-08-25 16:00:03	Lino Galiana	Notebooks should now look more beautiful (#260)
494a85ae	2022-08-05 14:49:56	Lino Galiana	Images featured ✨ (#252)
d201e3cd	2022-08-03 15:50:34	Lino Galiana	Pimp la homepage ✨ (#249)
12965bac	2022-05-25 15:53:27	Lino Galiana	:launch: Bascule vers quarto (#226)
9c71d6e7	2022-03-08 10:34:26	Lino Galiana	Plus d’éléments sur S3 (#218)
5cac236e	2021-12-16 19:46:43	Lino Galiana	un petit mot sur mercator (#201)
77120b89	2021-11-01 20:28:28	Lino Galiana	Ajoute une section sur le geocodage (#173)
6777f038	2021-10-29 09:38:09	Lino Galiana	Notebooks corrections (#171)
2a8809fb	2021-10-27 12:05:34	Lino Galiana	Simplification des hooks pour gagner en flexibilité et clarté (#166)
735e6775	2021-10-19 09:46:12	Lino Galiana	Règle problème des cartes qui s’affichent pas (#165)
5ad057f6	2021-10-10 15:13:16	Lino Galiana	Relectures pandas & geopandas (#159)
2e4d5862	2021-09-02 12:03:39	Lino Galiana	Simplify badges generation (#130)
4cdb759c	2021-05-12 10:37:23	Lino Galiana	:sparkles: :star2: Nouveau thème hugo :snake: :fire: (#105)
7f9f97bc	2021-04-30 21:44:04	Lino Galiana	🐳 + 🐍 New workflow (docker 🐳) and new dataset for modelization (2020 🇺🇸 elections) (#99)
6d010fa2	2020-09-29 18:45:34	Lino Galiana	Simplifie l’arborescence du site, partie 1 (#57)
66f9f87a	2020-09-24 19:23:04	Lino Galiana	Introduction des figures générées par python dans le site (#52)
badc4929	2020-09-22 18:36:33	Lino Galiana	Finalize geopandas section (#48)
ffb05cf5	2020-09-10 17:18:15	Lino Galiana	Partie sur les données spatiales (#20) :warning: pas fini

creation = d3.min(
  table_commit.map(d => new Date(d.Date))
)

last_modification = d3.max(
  table_commit.map(d => new Date(d.Date))
)

creation_string = creation.toLocaleString("fr", {
  "day": "numeric",
  "month": "long",
  "year": "numeric"
})

last_modification_string = last_modification.toLocaleString("fr", {
  "day": "numeric",
  "month": "long",
  "year": "numeric"
})

git_history_table = Inputs.table(
  table_commit,
  {
    format: {
      SHA: x => md`[${x}](${github_repo}/commit/${x})`,
      Description: x => md`${replacePullRequestPattern(x, github_repo)}`,
      /*Date: x => x.toLocaleString("fr", {
        "month": "numeric",
        "day": "numeric",
        "year": "numeric"
        })
      */
    }
  }
)

git_history_plot = Plot.plot({
  marks: [
    Plot.ruleY([0], {stroke: "royalblue"}),
    Plot.dot(
          table_commit,
          Plot.pointerX({x: (d) => new Date(d.date), y: 0, stroke: "red"})),
    Plot.dot(table_commit, {x: (d) => new Date(d.Date), y: 0, fill: "royalblue"})
  ]
})

function replacePullRequestPattern(inputString, githubRepo) {
    // Use a regular expression to match the pattern #digit
    var pattern = /#(\d+)/g;

    // Replace the pattern with ${github_repo}/pull/#digit
    var replacedString = inputString.replace(pattern, '[#$1](' + githubRepo + '/pull/$1)');

    return replacedString;
}

github_repo = "https://github.com/linogaliana/python-datascientist"

table_commit = {

// Get the HTML table by its class name
var table = document.querySelector('.commit-table');

// Check if the table exists
if (table) {
    // Initialize an array to store the table data
    var dataArray = [];

    // Extract headers from the first row
    var headers = [];
    for (var i = 0; i < table.rows[0].cells.length; i++) {
        headers.push(table.rows[0].cells[i].textContent.trim());
    }

    // Iterate through the rows, starting from the second row
    for (var i = 1; i < table.rows.length; i++) {
        var row = table.rows[i];
        var rowData = {};

        // Iterate through the cells in the row
        for (var j = 0; j < row.cells.length; j++) {
            // Use headers as keys and cell content as values
            rowData[headers[j]] = row.cells[j].textContent.trim();
        }

        // Push the rowData object to the dataArray
        dataArray.push(rowData);
    }
  }

  return dataArray

}

// Get the element with class 'git-details'
{
  var gitDetails = document.querySelector('.commit-table');

  // Check if the element exists
  if (gitDetails) {
      // Hide the element
      gitDetails.style.display = 'none';
  }
}

Plot = require('@observablehq/plot@0.6.12/dist/plot.umd.min.js')

Retour au sommet

Citation

BibTeX

@book{galiana2025,
  author = {Galiana, Lino},
  title = {Python pour la data science},
  date = {2025},
  url = {https://pythonds.linogaliana.fr/},
  doi = {10.5281/zenodo.8229676},
  langid = {fr}
}

Veuillez citer ce travail comme suit :

Galiana, Lino. 2025. Python pour la data science. https://doi.org/10.5281/zenodo.8229676.