Python pour la data science

Lino Galiana

doi:10.5281/zenodo.8229676

La partie GeoPandas a évolué récemment. Vous pouvez retrouver les contenus liés à GeoPandas dans le chapitre suivant (éléments magistraux et exercices):

Introduction aux données spatiales avec Geopandas

Informations additionnelles

Environnement Python

Ce site a été construit automatiquement par le biais d’une action Github utilisant le logiciel de publication reproductible Quarto (version 1.8.26).

L’environnement utilisé pour obtenir les résultats est reproductible par le biais d’uv. Le fichier pyproject.toml utilisé pour construire cet environnement est disponible sur le dépôt linogaliana/python-datascientist

pyproject.toml

[project]
name = "python-datascientist"
version = "0.1.0"
description = "Source code for Lino Galiana's Python for data science course"
readme = "README.md"
requires-python = ">=3.13,<3.14"
dependencies = [
    "altair>=6.0.0",
    "black==24.8.0",
    "cartiflette",
    "contextily==1.6.2",
    "duckdb>=0.10.1",
    "folium>=0.19.6",
    "gdal!=3.11.1",
    "geoplot==0.5.1",
    "graphviz==0.20.3",
    "great-tables>=0.12.0",
    "gt-extras>=0.0.8",
    "ipykernel>=6.29.5",
    "jupyter>=1.1.1",
    "jupyter-cache==1.0.0",
    "kaleido==0.2.1",
    "langchain-community>=0.3.27",
    "loguru==0.7.3",
    "markdown>=3.8",
    "nbclient==0.10.0",
    "nbformat==5.10.4",
    "nltk>=3.9.1",
    "pip>=25.1.1",
    "plotly>=6.1.2",
    "plotnine>=0.15",
    "polars==1.8.2",
    "pyarrow>=17.0.0",
    "pynsee==0.1.8",
    "python-dotenv==1.0.1",
    "python-frontmatter>=1.1.0",
    "pywaffle==1.1.1",
    "requests>=2.32.3",
    "scikit-image==0.24.0",
    "scipy>=1.13.0",
    "selenium<4.39.0",
    "spacy>=3.8.4",
    "webdriver-manager==4.0.2",
    "wordcloud==1.9.3",
]

[tool.uv.sources]
cartiflette = { git = "https://github.com/inseefrlab/cartiflette" }
gdal = [
  { index = "gdal-wheels", marker = "sys_platform == 'linux'" },
  { index = "geospatial_wheels", marker = "sys_platform == 'win32'" },
]

[[tool.uv.index]]
name = "geospatial_wheels"
url = "https://nathanjmcdougall.github.io/geospatial-wheels-index/"
explicit = true

[[tool.uv.index]]
name = "gdal-wheels"
url = "https://gitlab.com/api/v4/projects/61637378/packages/pypi/simple"
explicit = true

[dependency-groups]
dev = [
    "nb-clean>=4.0.1",
]

Pour utiliser exactement le même environnement (version de Python et packages), se reporter à la documentation d’uv.

Historique du fichier

md`Ce fichier a été modifié __${table_commit.length}__ fois depuis sa création le ${creation_string} (dernière modification le ${last_modification_string})`

html`<div>${git_history_table}</div>`

html`<div>${git_history_plot}</div>`

SHA	Date	Author	Description
91431fa2	2025-06-09 17:08:00	Lino Galiana	Improve homepage hero banner (#612)
91bfa525	2024-05-27 15:01:32	Lino Galiana	Restructuration partie geopandas (#500)
c80ffa8f	2024-04-06 16:12:43	linogaliana	Problem with geopandas
3f7fff1e	2024-04-06 13:55:28	Lino Galiana	Update 03_geopandas_TP.qmd
e57f9cc2	2024-04-06 13:12:25	Lino Galiana	Retire commentaire inutile
8c316d0a	2024-04-05 19:00:59	Lino Galiana	Fix cartiflette deprecated snippets (#487)
ce33d5dc	2024-01-16 15:47:22	Lino Galiana	Adapte les exemples de code de cartiflette (#482)
005d89b8	2023-12-20 17:23:04	Lino Galiana	Finalise l’affichage des statistiques Git (#478)
3fba6124	2023-12-17 18:16:42	Lino Galiana	Remove some badges from python (#476)
1f23de28	2023-12-01 17:25:36	Lino Galiana	Stockage des images sur S3 (#466)
09654c71	2023-11-14 15:16:44	Antoine Palazzolo	Suggestions Git & Visualisation (#449)
889a71ba	2023-11-10 11:40:51	Antoine Palazzolo	Modification TP 3 (#443)
8728352d	2023-10-24 11:50:08	Lino Galiana	Correction coquilles geopandas (#444)
8071bbb1	2023-10-23 17:43:37	tomseimandi	Make minor changes to 02b, 03, 04a (#440)
102ce9fd	2023-10-22 11:39:37	Thomas Faria	Relecture Thomas, première partie (#438)
e918be69	2023-10-09 12:39:31	linogaliana	change featured
f8831e77	2023-10-09 10:53:34	Lino Galiana	Relecture antuki geopandas (#429)
20432f77	2023-09-27 15:31:22	Lino Galiana	Restructure le TP GeoPandas (#414)
338c07ef	2023-09-26 13:44:45	linogaliana	eval false
154f09e4	2023-09-26 14:59:11	Antoine Palazzolo	Des typos corrigées par Antoine (#411)
a8f90c2f	2023-08-28 09:26:12	Lino Galiana	Update featured paths (#396)
3bdf3b06	2023-08-25 11:23:02	Lino Galiana	Simplification de la structure 🤓 (#393)
130ed717	2023-07-18 19:37:11	Lino Galiana	Restructure les titres (#374)
ef28fefd	2023-07-07 08:14:42	Lino Galiana	Listing pour la première partie (#369)
f21a24d3	2023-07-02 10:58:15	Lino Galiana	Pipeline Quarto & Pages 🚀 (#365)
bdf396d0	2023-06-11 17:22:29	Lino Galiana	update geopandas part for pandas v2 (#360)
3912a7ea	2023-02-07 17:18:25	Lino Galiana	Back to IGN provider (#350)
3b8715e8	2022-12-11 18:52:23	Lino Galiana	TP geopandas (#333)
2e215a9d	2022-10-28 14:35:09	Lino Galiana	Solve problem geopandas TP (#309)
f394b233	2022-10-13 14:32:05	Lino Galiana	Dernieres modifs geopandas (#298)
af763cc9	2022-10-12 10:17:56	Lino Galiana	Reprise exercice geopandas (#294)
f10815b5	2022-08-25 16:00:03	Lino Galiana	Notebooks should now look more beautiful (#260)
494a85ae	2022-08-05 14:49:56	Lino Galiana	Images featured ✨ (#252)
d201e3cd	2022-08-03 15:50:34	Lino Galiana	Pimp la homepage ✨ (#249)
12965bac	2022-05-25 15:53:27	Lino Galiana	:launch: Bascule vers quarto (#226)
9c71d6e7	2022-03-08 10:34:26	Lino Galiana	Plus d’éléments sur S3 (#218)
6777f038	2021-10-29 09:38:09	Lino Galiana	Notebooks corrections (#171)
2a8809fb	2021-10-27 12:05:34	Lino Galiana	Simplification des hooks pour gagner en flexibilité et clarté (#166)
735e6775	2021-10-19 09:46:12	Lino Galiana	Règle problème des cartes qui s’affichent pas (#165)
5ad057f6	2021-10-10 15:13:16	Lino Galiana	Relectures pandas & geopandas (#159)
2e4d5862	2021-09-02 12:03:39	Lino Galiana	Simplify badges generation (#130)
80877d20	2021-06-28 11:34:24	Lino Galiana	Ajout d’un exercice de NLP à partir openfood database (#98)
4cdb759c	2021-05-12 10:37:23	Lino Galiana	:sparkles: :star2: Nouveau thème hugo :snake: :fire: (#105)
7f9f97bc	2021-04-30 21:44:04	Lino Galiana	🐳 + 🐍 New workflow (docker 🐳) and new dataset for modelization (2020 🇺🇸 elections) (#99)
6d010fa2	2020-09-29 18:45:34	Lino Galiana	Simplifie l’arborescence du site, partie 1 (#57)
66f9f87a	2020-09-24 19:23:04	Lino Galiana	Introduction des figures générées par python dans le site (#52)
badc4929	2020-09-22 18:36:33	Lino Galiana	Finalize geopandas section (#48)
15b7dad6	2020-09-18 11:03:41	Lino Galiana	Finalisation TP geopandas (#33)
ffb05cf5	2020-09-10 17:18:15	Lino Galiana	Partie sur les données spatiales (#20) :warning: pas fini

creation = d3.min(
  table_commit.map(d => new Date(d.Date))
)

last_modification = d3.max(
  table_commit.map(d => new Date(d.Date))
)

creation_string = creation.toLocaleString("fr", {
  "day": "numeric",
  "month": "long",
  "year": "numeric"
})

last_modification_string = last_modification.toLocaleString("fr", {
  "day": "numeric",
  "month": "long",
  "year": "numeric"
})

git_history_table = Inputs.table(
  table_commit,
  {
    format: {
      SHA: x => md`[${x}](${github_repo}/commit/${x})`,
      Description: x => md`${replacePullRequestPattern(x, github_repo)}`,
      /*Date: x => x.toLocaleString("fr", {
        "month": "numeric",
        "day": "numeric",
        "year": "numeric"
        })
      */
    }
  }
)

git_history_plot = Plot.plot({
  marks: [
    Plot.ruleY([0], {stroke: "royalblue"}),
    Plot.dot(
          table_commit,
          Plot.pointerX({x: (d) => new Date(d.date), y: 0, stroke: "red"})),
    Plot.dot(table_commit, {x: (d) => new Date(d.Date), y: 0, fill: "royalblue"})
  ]
})

function replacePullRequestPattern(inputString, githubRepo) {
    // Use a regular expression to match the pattern #digit
    var pattern = /#(\d+)/g;

    // Replace the pattern with ${github_repo}/pull/#digit
    var replacedString = inputString.replace(pattern, '[#$1](' + githubRepo + '/pull/$1)');

    return replacedString;
}

github_repo = "https://github.com/linogaliana/python-datascientist"

table_commit = {

// Get the HTML table by its class name
var table = document.querySelector('.commit-table');

// Check if the table exists
if (table) {
    // Initialize an array to store the table data
    var dataArray = [];

    // Extract headers from the first row
    var headers = [];
    for (var i = 0; i < table.rows[0].cells.length; i++) {
        headers.push(table.rows[0].cells[i].textContent.trim());
    }

    // Iterate through the rows, starting from the second row
    for (var i = 1; i < table.rows.length; i++) {
        var row = table.rows[i];
        var rowData = {};

        // Iterate through the cells in the row
        for (var j = 0; j < row.cells.length; j++) {
            // Use headers as keys and cell content as values
            rowData[headers[j]] = row.cells[j].textContent.trim();
        }

        // Push the rowData object to the dataArray
        dataArray.push(rowData);
    }
  }

  return dataArray

}

// Get the element with class 'git-details'
{
  var gitDetails = document.querySelector('.commit-table');

  // Check if the element exists
  if (gitDetails) {
      // Hide the element
      gitDetails.style.display = 'none';
  }
}

Plot = require('@observablehq/plot@0.6.12/dist/plot.umd.min.js')

Retour au sommet

Citation

BibTeX

@book{galiana2025,
  author = {Galiana, Lino},
  title = {Python pour la data science},
  date = {2025},
  url = {https://pythonds.linogaliana.fr/},
  doi = {10.5281/zenodo.8229676},
  langid = {fr}
}

Veuillez citer ce travail comme suit :

Galiana, Lino. 2025. Python pour la data science. https://doi.org/10.5281/zenodo.8229676.