diff --git a/.~lock.notes.odt# b/.~lock.notes.odt# deleted file mode 100644 index 9f5f00a..0000000 --- a/.~lock.notes.odt# +++ /dev/null @@ -1 +0,0 @@ -,eleonore1,eleonore,24.06.2024 18:03,file:///home/eleonore1/.config/libreoffice/4; \ No newline at end of file diff --git a/development_explanation.odt b/development_explanation.odt deleted file mode 100644 index 73fbf01..0000000 Binary files a/development_explanation.odt and /dev/null differ diff --git a/texte_explicatif.txt b/doc/developement_explanations_fr.md similarity index 80% rename from texte_explicatif.txt rename to doc/developement_explanations_fr.md index afb8b3b..9ce3eaf 100644 --- a/texte_explicatif.txt +++ b/doc/developement_explanations_fr.md @@ -7,9 +7,9 @@ Comment minimiser la consommation en ressources mémoire et flux de données d'u # Objectif - L’objectif est d'obtenir la dernière version (ou une version précise) d’un dépôt git, en utilisant le moins de ressources possible. Par ressources, on entend le flux de données qui part du remote pour arriver au dossier local, ainsi que la place mémoire occupée par le dépôt sur le serveur local. + L’objectif est d'obtenir la dernière version (ou une version précise) d’un dépôt git, en utilisant le moins de ressources possible. Par ressources, nous entendons le flux de données qui part du remote pour arriver au dossier local, ainsi que la place mémoire occupée par le dépôt sur le serveur local. - Le dépôt Git créé n'enverra aucune donnée au remote. Il n'aura aucune utilité de l'historique. Il pourra éventuellement conserver certains fichiers locaux en plus de ses clonages Git. En cas de conflit, le remote aura toujours raison. Il incluera les éventuels submodules. Il peut vouloir télécharger le dernier commit de HEAD( par défaut) ou bien un commit d'une certaine référence, c'est-à-dire branche ou tag. + Le dépôt Git créé n'enverra aucune donnée au remote. Il aura accès aux tags mais pas à l'historique. Il pourra éventuellement conserver certains fichiers locaux non-tracés en plus de ses clonages Git. Il incluera les éventuels submodules. Il peut vouloir télécharger le dernier commit de main (par défaut) ou bien un commit d'une certaine référence, c'est-à-dire branche ou tag. # Procédé @@ -32,13 +32,22 @@ git clone --depth=1 --recurse-submodules --remote-submodules ## Pour mettre à jour : +git fetch --tags --depth=1 --prune --prune-tags origin $ref +git reset --hard --recurse-submodules FETCH_HEAD git submodule update --init --recursive --force --depth=1 --remote -git fetch --tags --depth=1 --prune --prune-tags origin -git reset --hard origin/main git reflog expire --expire=now --all git gc --aggressive --prune=now [git clean -qfdx] + git fetch --tags --depth=1 --prune --prune-tags origin + + tags permet de fetch les tags, elle doit être précisée y compris si un tag est fetched par référence + depth=1 permet de considérer uniquement le dernier commit + prune permet de supprimer du dossier remote en local les références qui ne sont plus accessibles + prune-tags permet non seulement de supprimer du dossier remote en local les références qui ne sont plus accessibles, mais aussi de supprimer les tags locaux qui n'existent pas sur le remote + + git reset --hard --recurse-submodules origin/main + git submodule update --init --recursive --force --depth=1 --remote init met à jour le fichier .gitmodules @@ -46,19 +55,11 @@ git gc --aggressive --prune=now force permet d'ignorer les changements locaux aux submodules et d'automatiquement check out la nouvelle version depth=1 permet de considérer uniquement le dernier commit du submodule remote permet de mettre à jour depuis le remote submodule originel - - git fetch --tags --depth=1 --prune --prune-tags origin - - tags permet de fetch les tags, elle doit être précisée y compris si un tag est fetched par référence - depth=1 permet de considérer uniquement le dernier commit - prune permet de supprimer du dossier remote en local les références qui ne sont plus accessibles - prune-tags permet non seulement de supprimer du dossier remote en local les références qui ne sont plus accessibles, mais aussi de supprimer les les tags locaux qui n'existent pas sur le remote - - git reset --hard origin/main + ATTENTION : l'ordre compte. Exécuter cette instruction en premier la rendrait inefficace en raison de l'option --recurse-submodules du git reset. Celle-ci est néanmoins à conserver pour gérer le cas de délétion du submodule. git reflog expire --expire=now --all - cette ligne permet de marquer tous les reflogs isolés comme expirés immédiatement au lieu de 90 jours plus tard. Cela permet un plus grand nettoyage par git gc. git rev-list permet de vérifier quels objets sont reliés et ne seront pas marqués comme expirés. + cette commande permet de marquer tous les reflogs isolés comme expirés immédiatement au lieu de 90 jours plus tard. Cela permet un plus grand nettoyage par git gc. git rev-list permet de vérifier quels objets sont reliés et ne seront pas marqués comme expirés. git gc --aggressive --prune=now @@ -76,13 +77,13 @@ Un clone superficiel consiste à ne pas cloner tout l'historique du dépôt. Un clone partiel consiste à ne pas cloner tous les fichiers et/ou dossiers du dépôt, selon un filtre. Les filtres peuvent concerner les Binary Large Objects (blobs) ou bien les trees. Si le filtre concerne l'ancienneté, un clone partiel peut alors aussi être un clone superficiel. Les clones partiels peuvent être créés grâce à la commande git clone --filter. Lors de check-out ou switch, des objets initialement ignorés par le clone --filter peuvent être importés. -Dans notre cas, nous ne souhaitons garder que le tout dernier commit, qui sera dans tous les cas laissé passer par git clone --filter et ce n'est donc pas pertinent. +Dans notre cas, nous ne souhaitons garder qu’un commit précis, qui sera dans tous les cas laissé passer par git clone --filter et ce n'est donc pas pertinent. Les clones partiels peuvent aussi être créés par le sparse-checking. Certains fichiers et/ou dossiers n'apparaissent alors pas du tout dans le dossier local et ne sont pas concernés par les opérations git porcelain (de surface). Néanmoins, les objets associés à ces fichiers et dossiers sont toujours stockés dans le .git -Un clone superficiel peut être grâce à l'option depth= qui indique le nombre de commits à conserver. Cette option est disponible pour la commande clone mais aussi fetch. +Un clone superficiel peut être créé grâce à l'option depth= qui indique le nombre de commits à conserver. Cette option est disponible pour la commande clone mais aussi fetch. ## Le large file storage -LFS est une extension Git qui permet de manipuler les fichiers choisis (par nom, expression ou taille) à l'aide d'un cache local. En effet, les fichiers sont remplacés par des références dans le dépôt GIt et un dossier local hors du dépôt est créé pour stocker les fichiers. Ils y sont téléchargés de manière lazy, c'est-à-dire uniquement lorsqu'ils sont checked out. Toutes les anciennes versions sont stockées sur un serveur en ligne. +LFS est une extension Git qui permet de manipuler les fichiers choisis (par nom, expression ou taille) à l'aide d'un cache local. En pratique, les fichiers sont remplacés par des références dans le dépôt Git et un dossier local hors du dépôt est créé pour stocker les fichiers. Ils y sont téléchargés de manière lazy, c'est-à-dire uniquement lorsqu'ils sont checked out. Toutes les anciennes versions sont stockées sur un serveur en ligne. C'est un mécanisme très intéressant, que nous ne retenons pas pour la même raison que le clone --filter : nous ne souhaitons garder que la toute dernière version des fichiers, qui serait dans tous les cas téléchargée par LFS. ## Suppression de l'historique @@ -93,14 +94,14 @@ La librairie Java repo-cleaner fonctionne, néanmoins la documentation Git estim Nous souhaitons supprimer tout l'historique sans filtrer, donc la commande git fetch --depth=1 suivie d'un git checkout, reset ou merge nous convient. ## checkout ? merge ? reset ? -Une fois que l'on a fetched les modifications dans notre dossier local remote/, quelle est la meilleure façon de les appliquer à notre index et working directory ? +Une fois que nous avons fetched les modifications dans notre dossier local remote/, quelle est la meilleure façon de les appliquer à notre index et working directory ? Nous allons comparer 4 possibilités : git merge -X, git merge -s, git reset --hard, git checkout -f -B. Les résultats finaux sont identiques à l'exception de git merge -X. -Dans le cas de git merge, on ne souhaite pas résoudre de conflits manuellement. Le remote doit toujours prévaloir sur les différences locales. +Dans le cas de git merge, nous ne souhaitons pas résoudre de conflits manuellement. Le remote doit toujours prévaloir sur les différences locales. ### git merge -X theirs Cette commande applique une stratégie ort qui en cas de conflit, donne la prévalence à theirs. -Néanmoins, puisque l'on travaille en --depth=1, les deux branches n'ont pas d'ancêtre commun, et on doit d'ailleurs fournir l'option --allow-unrelated-histories. L'absence d'ancêtre commun empêche Git de reconnaître les similitudes à l'intérieur d'un même fichier. N'importe quelle modification d'un fichier tracé sur ours, même sur une nouvelle ligne, causera ainsi un conflit et sera écrasée. Cette commande permet tout de même de sauvegarder les fichiers nouvellement créés et committés sur ours. +Néanmoins, puisque nous travaillons en --depth=1, les deux branches n'ont pas d'ancêtre commun, et nous devons d'ailleurs fournir l'option --allow-unrelated-histories. L'absence d'ancêtre commun empêche Git de reconnaître les similitudes à l'intérieur d'un même fichier. N'importe quelle modification d'un fichier tracé sur ours, même sur une nouvelle ligne, causera ainsi un conflit et sera écrasée. Cette commande permet tout de même de sauvegarder les fichiers nouvellement créés et committés sur ours. Les fichiers non-committés nouvellement créés sont conservés à moins que git clean soit run. Avantage : fichier commités créés sur ours conservés Inconvénient : en cas de délétion d'un fichier sur theirs qui existait déjà sur ours : il ne sera pas supprimé sur ours. @@ -108,9 +109,9 @@ Inconvénient : en cas de délétion d'un fichier sur theirs qui existait déjà ### git merge -s ours [attention les notions de theirs et ours sont inversées ici car git merge -s theirs n'existe pas] Cette commande applique une stratégie ours qui donne la prévalence à ours, qu'il y ait conflit ou pas. Elle va ignorer tous les changements et créations de fichiers committés sur theirs. Elle va également ignorer les modifications non committées. Les créations de fichier non-committées sont conservées à moins que git clean soit run. C'est le même résultat qu'avec la commande git reset --hard. -Comme l'option git merge -s theirs n'existe pas, on doit faire une petite manipulation : +Comme l'option git merge -s theirs n'existe pas, nous devons faire une petite manipulation : #on veut merge origin/main sur main, en donnant la prévalence à origin/main -#création d'une nouvelle branche temporaire temp que l'on check out, sourcée sur origin/main +#création d'une nouvelle branche temporaire temp qui est checked out, sourcée sur origin/main git switch -c temp origin/main #merge de main sur temp, en donnant la prévalence à temp qui est identique à origin/main git merge -s ours --allow-unrelated-histories main @@ -123,9 +124,8 @@ git branch -D temp Avantage : Inconvénient : création d'une branche temporaire. -### git checkout –force -B main origin/main - -Cette commande est équivalente à git merge -s ours et à git reset --hard, à la différence près que l'on finit en detached HEAD state, ce qui n'est pas un problème dans notre cas puisque l'on ne souhaite pas push de modification depuis notre dépôt. +### git checkout –force -B main origin/main +Cette commande est équivalente à git merge -s ours et à git reset --hard, à la différence près que nous finissons en detached HEAD state, ce qui n'est pas un problème dans notre cas puisque nous ne souhaitons pas push de modification depuis notre dépôt. Avantage : Inconvénient : detached HEAD state. @@ -138,9 +138,10 @@ Les tests nous montrent que les options les plus économes en mémoire sont git ### Gestion des submodules Les submodules sont clonés au début via les options de git clone --recurse-submodules --remote-submodules. -Puis ils sont mis à jour par git submodule update --init --recursive --force --depth=1 --remote. +Puis ils sont mis à jour par git submodule update --init --recursive --force --depth=1 –remote. +Git reset --hard doit avoi rl’option --recurse-submodules pour pouvoir supprimer des submodules du working directory. Les mêmes règles s'appliquent aux submodules qu'au reste du dépôt. Il est possible de préciser dans le fichier .gitmodules des règles d'import des submodules, comme un certain tag ou une certaine branche par exemple. -En retirant --remote-submodules du git clone et --remote du git submodule update, alors les submodules seront identiques au dépôt que l'on clone et plus au dépôt originel du submodule. +En retirant --remote-submodules du git clone et --remote du git submodule update, alors les submodules seront identiques au dépôt que nous clonons et plus au dépôt originel du submodule. ##Tests @@ -274,4 +275,3 @@ TEST 29: after addition then removal of a 1M file memory usage: 8 - diff --git a/doc/development_explanations.md b/doc/development_explanations.md new file mode 100644 index 0000000..5ffeeec --- /dev/null +++ b/doc/development_explanations.md @@ -0,0 +1,270 @@ +How to minimize the memory and data flow consumption of Git cloning? + +# Background + + Jean-Cloud is a small association providing hosting services on second-hand hardware. It is currently launching the Shlagernetes project, a software that enables services to be distributed and managed across several second-hand servers. Git is used in certain cases to install a service on a server or update it. + + +# Objective + + The objective is to obtain the latest version (or a specific version) of a git repository, using as few resources as possible. By resources, we mean the data flow from the remote to the local folder, as well as the memory space occupied by the repository on the local server. + + The created Git repository will not send any data to the remote. It has access to tags but not history. It can keep some local untracked files in addition to its Git clones. It includes submodules if present. It can either download the last main commit (default) or a commit from a certain reference, i.e. branch or tag. + +# Procedure + + Tests on various commands were carried out on a dummy repository. The test file is transportable and can be downloaded here. Note that to run locally, you need to authorize the protocol for local files: git config --global protocol.file.allow always. This is not the default configuration, as it may represent a security vulnerability. + + The tests consist in analyzing the memory space taken up by the local repository using the bash command "du", as well as analyzing the text produced by Git during cloning. + + +# Final results +The final chosen combination is : +## To clone : +git clone --depth=1 --recurse-submodules --remote-submodules + + depth=1 allows you to clone only the last commit along with the necessary objects. By default, it is single-branch. + recurse-submodules ensures that the contents of submodules are cloned + remote-submodules ensures submodule content is cloned from the original remote submodule + shallow-submodules ensures that only the latest submodule commit is imported (for this to work locally, specify ://file/ before the submodule path) + + +## To update : +git fetch --tags --depth=1 --prune --prune-tags origin $ref +git reset --hard --recurse-submodules FETCH_HEAD +git submodule update --init --recursive --force --depth=1 --remote +git reflog expire --expire=now --all +git gc --aggressive --prune=now +[git clean -qfdx] + + git fetch --tags --depth=1 --prune --prune-tags origin + + tags is used to fetch tags, and must be specified even if a tag is fetched by reference + depth=1 allows only the last commit to be considered + prune deletes references that are no longer accessible from the local remote folder + prune-tags not only deletes references in the local remote repository that are no longer accessible, but also deletes local tags that do not exist on the remote. + + git reset --hard --recurse-submodules origin/main + + git submodule update --init --recursive --force --depth=1 --remote + + init updates the .gitmodules file + recursive applies the command to submodules of submodules etc. + force ignores local changes to submodules and automatically checks out the new version + depth=1 allows you to consider only the last submodule commit + remote updates from the original remote submodule + CAREFUL: order does matter here. Using this instruction first would make it ineffective because of the --recurse-submodules of the git reset. This option is yet kept to deal with the case of deletion of a submodule. + + git reflog expire --expire=now --all + + this command marks all isolated reflogs as expired immediately instead of 90 days later. This makes for a bigger git gc clean up. git rev-list allows you to check which objects are linked and will not be marked as expired. + + git gc --aggressive --prune=now + + this command removes unrelated references and reorganizes the repository to optimize it. + aggressive invokes repack and takes longer. repack undoes and redoes packs, which are compression units. + + [git clean -qfdx] if this command is omitted, files created without committing are retained. + +This combination does not save any changes made to our repository, apart from the creation of non-committed files if git clean is omitted. + +# Details +Here is a summary of the different solutions we have explored to reduce the footprint of our Git repository. +## Partial vs. shallow cloning +Shallow cloning means not cloning the entire repository history. + +A partial clone means not cloning all the files and/or folders in the repository, according to a filter. Filters may concern Binary Large Objects (blobs) or trees. If the filter concerns age, then a partial clone can also be a shallow clone. +Partial clones can be created using the git clone --filter command. +During check-out or switch operations, objects initially ignored by the --filter clone can be imported. In our case, we only want to keep one precise commit, which will in any case be let through by git clone --filter which is therefore irrelevant. +Partial clones can also be created by sparse-checking. Some files and/or folders then do not appear at all in the local folder and are not affected by git porcelain (surface) operations. Nevertheless, the objects associated with these files and folders are still stored in the .git repository. + +A surface clone can be created using the depth= option, which specifies the number of commits to be kept. This option is available for both the clone and fetch commands. + +## Large file storage +LFS is a Git extension that lets you manipulate selected files (by name, expression or size) using a local cache. In practice, files are replaced by references in the Git repository and a local folder outside the repository is created to store the files. They are downloaded lazily, i.e. only when checked out. All older versions are stored on an online server. +This is a very interesting mechanism, which we will not use for the same reason as the --filter clone: we only want to keep one specific version of the files, which would in any case be downloaded by LFS. + +## Delete history +The git filter-branch command is not recommended by the Git documentation. It has several security and performance flaws. It can be used to rewrite branch history using filters. + +The Java repo-cleaner library works, but the Git documentation considers the Python filter-repo library to be faster and more secure. We do not wish to install either Python or Java, hence we will not dig any deeper into these two possibilities here. + +We want to delete the entire history without filtering, so the git command fetch --depth=1 followed by a git checkout, reset or merge works for us. + +## checkout ? merge ? reset ? +Once we have fetched the changes to our local remote/ folder, what is the best way to apply them to our index and working directory? +Let us compare 4 possibilities: git merge -X, git merge -s, git reset --hard, git checkout -f -B. The final results are identical, except for git merge -X. + +In the case of git merge, we do not wish to resolve conflicts manually. Remote must always take precedence over local differences. + +### git merge -X theirs +This command applies an ort strategy which, in the event of a conflict, gives precedence to theirs. +However, since we are working in --depth=1, the two branches have no common ancestor, and the --allow-unrelated-histories option must be supplied. The absence of a common ancestor prevents Git from recognizing similarities within the same file. Any modification to a tracked file on ours, even on a new line, will thus cause a conflict and be overwritten. This command does, however, save newly created and committed files on ours. +Newly created uncommitted files are kept unless git clean is run. +Advantage: committed files created on ours are saved. +Disadvantage: in the event of deletion of a file on theirs that already existed on ours: it will not be deleted on ours. + +### git merge -s ours +[caution: the notions of theirs and ours are reversed here, as git merge -s theirs does not exist]. +This command applies a ours strategy that gives prevalence to ours, whether there is a conflict or not. It will ignore all changes and file creations committed to theirs. It will also ignore uncommitted modifications. Uncommitted file creations are retained unless git clean is run. This is the same result as with the git reset --hard command. +As the git merge -s theirs option does not exist, we need to do a little manipulation: +#we want to merge origin/main on main, giving prevalence to origin/main +#create a new temp temporary branch that we check out, sourced on origin/main +git switch -c temp origin/main +#merge main on temp, giving prevalence to temp which is identical to origin/main +git merge -s ours --allow-unrelated-histories main +#return to main +git checkout main +#merge temp on main +git merge --allow-unrelated-histories temp +#delete temp +git branch -D temp +Advantage: +Disadvantage: creation of a temporary branch. + +### git checkout -force -B main origin/main +This command is equivalent to git merge -s ours and git reset --hard, with the difference that you end up in detached HEAD state, which does nos cause any problem in our case since we do not want to push any changes from our repository. +Advantage : +Disadvantage: detached HEAD state. + +### git reset --hard +git reset --hard is equivalent to git merge -s ours and git checkout --force -B. +Advantage: +Disadvantage: + +Tests show that the most memory-efficient options are git checkout -force -B, git merge -s ours and git --reset hard, which all do the same thing. However, git reset --hard does not involve the creation of a temporary branch and does not end in detached HEAD state, hence it is the one we choose. + +### Submodule management +Submodules are initially cloned using git clone --recurse-submodules --remote-submodules. +They are updated using git submodule update --init --recursive --force --depth=1 –remote. +Git reset --hard must be supplied with the --recurse-submodules option in order to delete submodules from the working directory. +The same rules apply to submodules as to the rest of the repository. In the .gitmodules file, it is possible to specify rules for importing submodules, such as a certain tag or branch. By removing --remote-submodules from git clone and --remote from git submodule update, submodules will be identical to the repository being cloned and no longer to the original submodule repository. + +##Tests +### Script description +The script consists of twenty-nine tests (listed in the results below), based on three functions: generate_random_file, get_storage_used and get_bandwidth. + +generate_random_file uses the bash command dd and /dev/random. +get_storage_used uses the bash command du. +get_bandwidth retrieves the output of Git commands and extracts the traffic displayed. This does not take submodule traffic into account. + +The first five tests concern cloning. +The following tests involve updating the repository using different commands, with three cases for each command: after adding a file, after deleting a file, after adding then deleting a file. + +### README extract + +NAME performance_tests.sh +SYNOPSIS performance_tests.sh [-a] [-h] [-n number]OPTIONS + -a excutes all the tests. + -n number executes test number -h prints the help. + +### Results + ======================================= Tests on the initial populating of the repository +============================================================= TEST0 +TEST 0: classic cloning. +memory usage: 22668 +bandwidth usage (submodule excluded): 8.49 MiB +============================================================= TEST1 +TEST 1: --single-branch cloning. +memory usage: 22168 +bandwidth usage (submodule excluded): 8.00 MiB +============================================================= TEST2 +TEST 2: --depth=1 --no-single-branch +memory usage: 17552 +bandwidth usage (submodule excluded): 3.49 MiB +============================================================= TEST3 +TEST 3: --depth=1 with single-branch (default)) +memory usage: 17052 +bandwidth usage (submodule excluded): 3.00 MiB +============================================================= TEST4 +TEST 4: --depth=1 with single-branch (default) and reflog and gc +HEAD is now at 23700cf adding submodule_for_performance_testing module +memory usage: 17056 +bandwidth usage (submodule excluded): 3.00 MiB +============================================================= TEST5 +TEST 5 : sparse-checking only sample0 with depth=1 +memory usage: 10060 +bandwidth usage (submodule excluded): unknown +============================================ Tests on the updating of the repository +================================================= classic fetching+checking out +============================================================= TEST6 +TEST 6: after addition of a 1M file +memory usage: +2108 +============================================================= TEST7 +TEST 7: after removal of a 1M file +memory usage: -972 +============================================================= TEST8 +TEST 8: after addition then removal of a 1M file +memory usage: 1088 +============================================== fetching+checking out with --depth=1 +============================================================= TEST9 +TEST 9: after addition of a 1M file +memory usage: +2112 +============================================================= TEST10 +TEST 10: after removal of a 1M file +memory usage: -968 +============================================================= TEST11 +TEST 11: after addition then removal of a 1M file +memory usage: 48 +========================================= --depth=1 fetching+checking out reflog and gc +============================================================= TEST12 +TEST 12: after addition of a 1M file +memory usage: +2052 +============================================================= TEST13 +TEST 13: after removal of a 1M file +memory usage: -1020 +============================================================= TEST14 +TEST 14: after addition then removal of a 1M file +memory usage: 4 +================================================ --depth=1 fetching+ reset --hard +============================================================= TEST15 +TEST 15: after addition of a 1M file +memory usage: +2116 +============================================================= TEST16 +TEST 16: after removal of a 1M file +memory usage: -964 +============================================================= TEST17 +TEST 17: after addition then removal of a 1M file +memory usage: 52 +======================================= --depth=1 fetching+ reset --hard and reflog and gc +============================================================= TEST18 +TEST 18: after addition of a 1M file +memory usage: 2056 +============================================================= TEST19 +TEST 19: after removal of a 1M file +memory usage: -1016 +============================================================= TEST20 +TEST 20: after addition then removal of a 1M file +memory usage: 8 +============================ --depth=1 fetching+checking out after modification applied in submodule +============================================================= TEST21 +TEST 21: after addition of a 1M file +memory usage: 2112 +============================================================= TEST22 +TEST 22: after removal of a 1M file +memory usage: -976 +============================================================= TEST23 +TEST 23: after addition then removal of a 1M file +memory usage: 48 +==================================== --depth=1 fetching+merging -X theirs with reflog and gc +============================================================= TEST24 +TEST 24: after addition of a 1M file +memory usage: +2056 +============================================================= TEST25 +TEST 25: after removal of a 1M file +memory usage: 8 +============================================================= TEST26 +TEST 26: after addition then removal of a 1M file +memory usage: 8 +===================================== --depth=1 fetching+merging -s ours with reflog and gc +============================================================= TEST27 +TEST 27: after addition of a 1M file +memory usage: +2056 +============================================================= TEST28 +TEST 28: after removal of a 1M file +memory usage: -1016 +============================================================= TEST29 +TEST 29: after addition then removal of a 1M file +memory usage: 8 + + diff --git a/explications_developpement.odt b/explications_developpement.odt deleted file mode 100644 index 47360c6..0000000 Binary files a/explications_developpement.odt and /dev/null differ diff --git a/git_update.sh b/git_update.sh deleted file mode 100755 index 2056059..0000000 --- a/git_update.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash -Help(){ - echo " -NAME - git_update.sh -SYNOPSIS - git_update.sh [-h] [-r ref] [-d dest] [-H] [-a] repository -OPTIONS - -h prints the help. - -r specifies the reference to the commit to be synchronized. It can be a tag or a branch. By default, it is the last commit of branch main. - -d specifies the destination of the clone or update. Directory must be empty if a new clone is to be made. - If the repository to be cloned is local, and its path is passed as a relative path, the path should start from the destination. - To avoid mistakes, absolute paths are advised. - -H allows the $HOME directory to be used by git_update.sh. By default, git_update.sh cannot access $HOME to prevent default behavior. - If you need the global .gitconfig located in your $HOME to be used, you should supply the -H option. - -a specifies that the aggressive option of the git garbage collection must be used. Only advised when changes happen in many different objects. Will slow down the execution. -DESCRIPTION - This script will replace the destination with the wanted commit of a git repository. The history is not preserved but tags are. Untracked files remain. - The git commands have been chosen so as to minimize the memory and bandwidth usages." -} - -#variables -summary="$0 [options] " -ref=main -dst='.' -use_home=false -be_aggressive="false" - -while getopts ":hr:d:H" option; do - case $option in - h) # display Help - Help - exit;; - r) # desired branch or tag - ref="$OPTARG";; - d) # destination of clone - dst="$OPTARG";; - H) # use real home dir - use_home="true";; - a) #use -a in git gc call - be_aggressive="true";; - \?) # invalid option - echo "Error: Invalid option here" - exit;; - esac -done -shift $((OPTIND-1)) - -repo="$1" -if [ -z "$repo" ] ; then - exit "$0: Empty repo given\n$summary" -fi - -if [ ! $use_home ] ; then - set -a - export HOME=/dev/null - set +a -fi - -mkdir -p "$dst" -cd "$dst" - -if [ -d .git ] ; then - echo "updating..." - git fetch --tags --depth=1 --prune --prune-tags --force origin $ref - git reset --hard --recurse-submodules FETCH_HEAD - git submodule update --init --recursive --force --depth=1 --remote - #garbage collection of anything unreachable at the moment - git reflog expire --expire=now --all - if "$be_aggressive" ; then - git gc --prune=now --aggressive - elsels - git gc --prune=now - fi -else -echo "cloning..." - clone_dst='.' - git clone -b "$ref" --recurse-submodules --shallow-submodules --depth 1 "$repo" "$clone_dst" -fi - diff --git a/notes.odt b/notes.odt deleted file mode 100644 index d2f8ff3..0000000 Binary files a/notes.odt and /dev/null differ diff --git a/performance_tests.sh b/performance_tests.sh deleted file mode 100755 index c3d093d..0000000 --- a/performance_tests.sh +++ /dev/null @@ -1,1041 +0,0 @@ -#!/bin/bash -. driglibash-base - -REPO_NAME=git_update_testing -REPO_PATH=./remote -WITH_SUBMODULE="true" -SUB_NAME="submodule_for_performance_testing" -FILES_TO_KEEP='sample0' -REMOTE="./remote/performance_testing" - -if [ "$WITH_SUBMODULE" = "true" ]; then - bash creation_repo.sh -s &> /dev/null -else - bash creation_repo.sh &> /dev/null -fi - -Help() -{ - echo " -NAME - performance_tests.sh -SYNOPSIS - performance_tests.sh [-a] [-h] [-n number] -OPTIONS - -a excutes all the tests. - -n number executes test number - -h prints the help. -DESCRIPTION - This script is in writing. It allows you to measure memory and bandwidth usage. The first four test different cloning methods. Te following apply changes to the local remote before testing fetching and merging commands. - TEST0: classic cloning - TEST1: --single-branch cloning - TEST2: --depth=1 --no-single-branch cloning - TEST3: --depth=1 cloning - TEST4: --depth=1 with reflog and gc cloning - TEST5: sparse-checking 1M sample0 cloning - _________________ - TEST6: classic fetching+checking out after addition of a 1M file - TEST7: classic fetching+checking out after removal of a 1M file - TEST8: classic fetching+checking out after addition then removal of a 1M file - - TEST9: --depth=1 fetching+checking out after addition of a 1M file - TEST10: --depth=1 fetching+checking out after removal of a 1M file - TEST11: --depth=1 fetching+checking out after addition then removal of 1M a file - - TEST12: --depth=1 fetching+checking out with reflog annd gc after addition of a 1M file - TEST13: --depth=1 fetching+checking out with reflog annd gc after removal of a 1M file - TEST14: --depth=1 fetching+checking out with reflog annd gc after addition then removal of a 1M file - - TEST15: --depth=1 fetching+ --reset=hard after addition of a 1M file - TEST16: --depth=1 fetching+ --reset=hard after removal of a 1M file - TEST17: --depth=1 fetching+ --reset=hard after addition then removal of a 1M file - - TEST18: --depth=1 fetching+ --reset=hard and reflog and gc after addition of a 1M file - TEST19: --depth=1 fetching+ --reset=hard and reflog and gc after removal of a 1M file - TEST20: --depth=1 fetching+ --reset=hard and reflog and gc after addition then removal of a 1M file - - TEST21: --depth=1 fetching+checking out after addition of a 1M file in submodule - TEST22: --depth=1 fetching+checking out after removal of a 1M file in submodule - TEST23: --depth=1 fetching+checking out after addition then removal of a 1M file in submodule - - TEST24: --depth=1 fetching+merging -X theirs with reflog and gc after addition of a 1M file - TEST25: --depth=1 fetching+merging -X theirs with reflog and gc after removal of a 1M file - TEST26: --depth=1 fetching+merging -X theirs with reflog and gc after addition then removal of a 1M file - - TEST27: --depth=1 fetching+merging -s ours with reflog and gc after addition of a 1M file - TEST28: --depth=1 fetching+merging -s ours with reflog and gc after removal of a 1M file - TEST29: --depth=1 fetching+merging -s ours with reflog and gc after addition then removal of a 1M file" -} - -#USEFUL FUNCTIONS FOR THE TESTS -create_random_file(){ - run dd if=/dev/urandom of=$1 bs=$2 count=1 &> /dev/null -} - -get_storage_used(){ - mem=$(du $1 | tail -n1 | tr -cd [:digit:]) -} - -get_bandwidth(){ - bw="unknown" - bw=$(grep -e "Receiving objects:" $1 | grep -o "Receiving objects: [[:alnum:]%/(),. ]*" | tail -n1) - bw=${bw#*,} -} - -#TESTS ON THE INITIAL POPULATING OF THE REPO -section "Tests on the initial populating of the repository" -test0(){ - section TEST0 - echo "TEST 0: classic cloning." - run git clone --recurse-submodules --remote-submodules --progress --no-local $1 &> cloning_text - run cd $REPO_NAME - run git submodule update --init --recursive --force --remote --progress &> /dev/null - run cd .. - get_storage_used "./$REPO_NAME" - get_bandwidth cloning_text - echo "memory usage: $mem" - echo "bandwidth usage (submodule excluded): $bw" - run rm cloning_text - run rm -rf $REPO_NAME -} - -test1(){ - section TEST1 - echo "TEST 1: --single-branch cloning." - run git clone --recurse-submodules --remote-submodules --progress --single-branch --no-local $1 &> cloning_text - run cd $REPO_NAME - run git submodule update --init --recursive --force --remote --progress &> /dev/null - run cd .. - get_storage_used ./$REPO_NAME - get_bandwidth cloning_text - echo "memory usage: $mem" - echo "bandwidth usage (submodule excluded): $bw" - run rm cloning_text - run rm -rf $REPO_NAME -} - -test2(){ - section TEST2 - echo "TEST 2: --depth=1 --no-single-branch" - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local --no-single-branch $1 &> cloning_text - run cd $REPO_NAME - run git submodule update --init --recursive --force --remote --progress --depth=1 --no-single-branch &> /dev/null - run cd .. - get_storage_used ./$REPO_NAME - get_bandwidth cloning_text - echo "memory usage: $mem" - echo "bandwidth usage (submodule excluded): $bw" - run rm cloning_text - run rm -rf $REPO_NAME -} - -test3(){ - section TEST3 - echo "TEST 3: --depth=1 with single-branch (default))" - run git clone --recurse-submodules --remote-submodules --progress --single-branch --no-local --depth=1 $1 &> cloning_text - run cd $REPO_NAME - run git submodule update --init --recursive --force --remote --progress --depth=1 &> /dev/null - run cd .. - get_storage_used ./$REPO_NAME - get_bandwidth cloning_text - echo "memory usage: $mem" - echo "bandwidth usage (submodule excluded): $bw" - run rm cloning_text - run rm -rf $REPO_NAME -} - -test4(){ - section TEST4 - echo "TEST 4: --depth=1 with single-branch (default) and reflog and gc" - run git clone --recurse-submodules --remote-submodules --progress --single-branch --no-local --depth=1 $1 &> cloning_text - run cd $REPO_NAME - run git submodule update --init --recursive --force --remote --progress --depth=1 &> /dev/null - run git reset --hard - run cd .. - get_storage_used ./$REPO_NAME - get_bandwidth cloning_text - echo "memory usage: $mem" - echo "bandwidth usage (submodule excluded): $bw" - run rm cloning_text - run rm -rf $REPO_NAME -} - -test5(){ - section TEST5 - run mkdir $REPO_NAME - run echo "TEST 5 : sparse-checking only $FILES_TO_KEEP with depth=1" - #creating a git repo with sparse-checking settings - run cd $REPO_NAME - run git init -q - run git config core.sparsecheckout true - run echo $FILES_TO_KEEP >> .git/info/sparse-checkout - #pulling from the remote with sparse-checking enabled - run git remote add -f origin ../$1 &> /dev/null - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - echo "memory usage: $mem" - echo "bandwidth usage (submodule excluded): unknown" - run cd .. - run rm -rf $REPO_NAME -} - -#Tests on the updating of the repository -#classic fetching -test6(){ - section TEST6 - run echo 'TEST 6: after addition of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --remote &> /dev/null - run git fetch --progress --tags --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: +$mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test7(){ - section TEST7 - run echo 'TEST 7: after removal of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - run rm sample0 - run git add sample0 - run git commit --quiet -m"1M sample0 deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --remote &> /dev/null - run git fetch --progress --tags --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run git clean -df - run cd ../.. - run rm -rf performance_testing -} -test8(){ - section TEST8 - run echo 'TEST 8: after addition then removal of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run rm sample5 - run git add sample5 - run git commit --quiet -m"1M "sample5" deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --remote &> /dev/null - run git fetch --progress --tags --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~2 - run cd ../.. - run rm -rf performance_testing -} - -#fetching with --depth=1 -test9(){ - section TEST9 - run echo 'TEST 9: after addition of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run cd ../../$REPO_NAME - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: +$mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test10(){ - section TEST10 - run echo 'TEST 10: after removal of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - run rm sample0 - run git add sample0 - run git commit --quiet -m"1M sample0 deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test11(){ - section TEST11 - run echo 'TEST 11: after addition then removal of a 1M file' - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run rm sample5 - run git add sample5 - run git commit --quiet -m"1M "sample5" deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~2 - run cd ../.. - run rm -rf performance_testing -} - -# --depth=1 fetching with reflog and gc -test12(){ - section TEST12 - run echo 'TEST 12: after addition of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run cd ../../$REPO_NAME - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: +$mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test13(){ - section TEST13 - run echo 'TEST 13: after removal of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - run rm sample0 - run git add sample0 - run git commit --quiet -m"1M sample0 deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test14(){ - section TEST14 - run echo 'TEST 14: after addition then removal of a 1M file' - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run rm sample5 - run git add sample5 - run git commit --quiet -m"1M "sample5" deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~2 - run cd ../.. - run rm -rf performance_testing -} - -#--depth=1 fetching with reset --hard -test15(){ - section TEST15 - run echo 'TEST 15: after addition of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run cd ../../$REPO_NAME - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git reset --hard origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: +$mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test16(){ - section TEST16 - run echo 'TEST 16: after removal of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - run rm sample0 - run git add sample0 - run git commit --quiet -m"1M sample0 deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git reset --hard origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test17(){ - section TEST17 - run echo 'TEST 17: after addition then removal of a 1M file' - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run rm sample5 - run git add sample5 - run git commit --quiet -m"1M "sample5" deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git reset --hard origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~2 - run cd ../.. - run rm -rf performance_testing -} -#--depth=1 fetching with reset --hard and reflog and gc -test18(){ - section TEST18 - run echo 'TEST 18: after addition of a 1M file' - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git reset --hard origin/main &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test19(){ - section TEST19 - run echo 'TEST 19: after removal of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - run rm sample0 - run git add sample0 - run git commit --quiet -m"1M sample0 deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git reset --hard origin/main &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test20(){ - section TEST20 - run echo 'TEST 20: after addition then removal of a 1M file' - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run rm sample5 - run git add sample5 - run git commit --quiet -m"1M "sample5" deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git reset --hard origin/main &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~2 - run cd ../.. - run rm -rf performance_testing -} -#--depth=1 fetching after modification applied in submodule -test21(){ - section TEST21 - run echo 'TEST 21: after addition of a 1M file' - if [ "$WITH_SUBMODULE" = "true" ]; then - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote submodule - run cd .. - run cd $REPO_PATH/$SUB_NAME - create_random_file 'sub_sample1' '1M' - run git add sub_sample1 - run git commit --quiet -m"second 1M sample created" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REPO_PATH/$SUB_NAME - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing - else - echo "This test will not be performed because we are in no-submodule mode. Change boolean \$WITH_SUBMODULE to switch." - fi -} - -test22(){ - section TEST22 - run echo 'TEST 22: after removal of a 1M file' - if [ "$WITH_SUBMODULE" = "true" ]; then - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote submodule - run cd .. - run cd $REPO_PATH/$SUB_NAME - rm sub_sample0 - run git add sub_sample0 - run git commit --quiet -m"1M 'sub_sample0' deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REPO_PATH/$SUB_NAME - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing - else - echo "This test will not be performed because we are in no-submodule mode. Change boolean \$WITH_SUBMODULE to switch." - fi -} - -test23(){ - section TEST23 - run echo 'TEST 23: after addition then removal of a 1M file' - if [ "$WITH_SUBMODULE" = "true" ]; then - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote submodule - run cd .. - run cd $REPO_PATH/$SUB_NAME - create_random_file 'sub_sample1' '1M' - run git add sub_sample1 - run git commit --quiet -m"second 1M sample created" - rm sub_sample1 - run git add sub_sample1 - run git commit --quiet -m"1M 'sub_sample1' deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git checkout -f origin/main &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REPO_PATH/$SUB_NAME - run git reset --hard -q HEAD~2 - run cd ../.. - run rm -rf performance_testing - else - echo "This test will not be performed because we are in no-submodule mode. Change boolean \$WITH_SUBMODULE to switch." - fi -} - -# --depth=1 fetching+merging -X theirs with reflog and gc -test24(){ - section TEST24 - run echo 'TEST 24: after addition of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run cd ../../$REPO_NAME - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git merge -X theirs --allow-unrelated-histories &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: +$mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test25(){ - section TEST25 - run echo 'TEST 25: after removal of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - run rm sample0 - run git add sample0 - run git commit --quiet -m"1M sample0 deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git merge -X theirs --allow-unrelated-histories &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test26(){ - section TEST26 - run echo 'TEST 26: after addition then removal of a 1M file' - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run rm sample5 - run git add sample5 - run git commit --quiet -m"1M "sample5" deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git merge -X theirs --allow-unrelated-histories &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~2 - run cd ../.. - run rm -rf performance_testing -} - -# --depth=1 fetching+merging -s ours with reflog and gc -test27(){ - section TEST27 - run echo 'TEST 27: after addition of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run cd ../../$REPO_NAME - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git switch -c temp origin/main &> /dev/null #creating a temporary branch identical to origin/main - run git merge -s ours --allow-unrelated-histories main &> /dev/null - run git checkout main &> /dev/null - run git merge --allow-unrelated-histories temp &> /dev/null - run git branch -D temp &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: +$mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test28(){ - section TEST28 - run echo 'TEST 28: after removal of a 1M file' - #initialization - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - run rm sample0 - run git add sample0 - run git commit --quiet -m"1M sample0 deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git switch -c temp origin/main &> /dev/null #creating a temporary branch identical to origin/main - run git merge -s ours --allow-unrelated-histories main &> /dev/null - run git checkout main &> /dev/null - run git merge --allow-unrelated-histories temp &> /dev/null - run git branch -D temp &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~1 - run cd ../.. - run rm -rf performance_testing -} -test29(){ - section TEST29 - run echo 'TEST 29: after addition then removal of a 1M file' - run git clone --recurse-submodules --remote-submodules --progress --depth=1 --no-local $1 &> /dev/null - run cd $REPO_NAME - get_storage_used . - mem_before=$mem - #modification of the remote repo - run cd ../$REMOTE - create_random_file 'sample5' '1M' #adding a 1M file - run git add sample5 - run git commit --quiet -m"fourth 1M sample created" - run rm sample5 - run git add sample5 - run git commit --quiet -m"1M "sample5" deleted" - run cd ../../$REPO_NAME - #fetching - run git submodule update --init --recursive --force --depth=1 --remote &> /dev/null - run git fetch --progress --tags --depth=1 --prune --prune-tags origin &> /dev/null - run git switch -c temp origin/main &> /dev/null #creating a temporary branch identical to origin/main - run git merge -s ours --allow-unrelated-histories main &> /dev/null - run git checkout main &> /dev/null - #run git merge --allow-unrelated-histories temp &> /dev/null - run git branch -D temp &> /dev/null - run git reflog expire --expire=now --all &> /dev/null - run git gc --aggressive --prune=now &> /dev/null - get_storage_used . - mem_after=$mem - mem=$(($mem_after-$mem_before)) - echo "memory usage: $mem" - run cd ../$REMOTE - run git reset --hard -q HEAD~2 - run cd ../.. - run rm -rf performance_testing -} - - -while getopts ":hn:a" option; do - case $option in - h) # display Help - Help - exit;; - n) - TEST_NUM=$OPTARG;; - a) - ALL_TESTS=true;; - \?) # Invalid option - echo "Error: Invalid option here" - exit;; - esac -done - -if [ "$ALL_TESTS" = "true" ]; then - test0 $REMOTE - test1 $REMOTE - test2 $REMOTE - test3 $REMOTE - test4 $REMOTE - test5 $REMOTE - echo $(section "Tests on the updating of the repository") - section "classic fetching+checking out" - test6 $REMOTE - test7 $REMOTE - test8 $REMOTE - echo $(section "fetching+checking out with --depth=1") - test9 $REMOTE - test10 $REMOTE - test11 $REMOTE - echo $(section "--depth=1 fetching+checking out reflog and gc") - test12 $REMOTE - test13 $REMOTE - test14 $REMOTE - echo $(section "--depth=1 fetching+ reset --hard") - test15 $REMOTE - test16 $REMOTE - test17 $REMOTE - echo $(section "--depth=1 fetching+ reset --hard and reflog and gc") - test18 $REMOTE - test19 $REMOTE - test20 $REMOTE - echo $(section "--depth=1 fetching+checking out after modification applied in submodule") - test21 $REMOTE - test22 $REMOTE - test23 $REMOTE - echo $(section "--depth=1 fetching+merging -X theirs with reflog and gc") - test24 $REMOTE - test25 $REMOTE - test26 $REMOTE - echo $(section "--depth=1 fetching+merging -s ours with reflog and gc") - test27 $REMOTE - test28 $REMOTE - test29 $REMOTE - - -elif [ -n "$TEST_NUM" ]; then - case $TEST_NUM in - 0) - test0 $REMOTE;; - 1) - test1 $REMOTE;; - 2) - test2 $REMOTE;; - 3) - test3 $REMOTE;; - 4) - test4 $REMOTE;; - 5) - test5 $REMOTE;; - 6) - test6 $REMOTE;; - 7) - test7 $REMOTE;; - 8) - test8 $REMOTE;; - 9) - test9 $REMOTE;; - 10) - test10 $REMOTE;; - 11) - test11 $REMOTE;; - 12) - test12 $REMOTE;; - 13) - test13 $REMOTE;; - 14) - test14 $REMOTE;; - 15) - test15 $REMOTE;; - 16) - test16 $REMOTE;; - 17) - test17 $REMOTE;; - 18) - test18 $REMOTE;; - 19) - test19 $REMOTE;; - 20) - test20 $REMOTE;; - 21) - test21 $REMOTE;; - 22) - test22 $REMOTE;; - 23) - test23 $REMOTE;; - 24) - test24 $REMOTE;; - 25) - test25 $REMOTE;; - 26) - test26 $REMOTE;; - 26) - test26 $REMOTE;; - 27) - test27 $REMOTE;; - 28) - test28 $REMOTE;; - 29) - test29 $REMOTE;; - *) - echo "Error: Invalid test number" - die;; - esac -else - Help -fi - -#add the submodules management to the cloning -#add run \ No newline at end of file diff --git a/readme b/readme index ec12859..01a5144 100644 --- a/readme +++ b/readme @@ -1,13 +1,40 @@ # Overview -git_update.sh is a bash script performing a punctual synchronisation of a git repository. - +git_update.sh is a bash script performing a punctual synchronization of a git repository. +The script will replace the indicated destination with the wanted commit of a git repository. The history is not preserved but tags are. +The git commands have been chosen so as to minimize the memory and bandwidth usages." # Download Download the git_update.sh file. Git has to be installed on your computer. # Use +Extract of the help: +NAME + git_update.sh +SYNOPSIS + git_update.sh [-h] [-r ref] [-d dest] [-H] [-N] [-a] repository +OPTIONS + -h prints the help. + -r specifies the reference to the commit to be synchronized. It can be a tag or a branch. By default, it is the last commit of branch main. + -d specifies the destination of the clone or update. If it is non-empty and you want to keep the non-conflicting files, -N must be supplied. Otherwise all files will be deleted, even if gitignored. + If the repository to be cloned is local, and its path is passed as a relative path, the path should start from the destination. + To avoid mistakes, absolute paths are advised. + -H allows the $HOME directory to be used by git_update.sh. By default, git_update.sh cannot access $HOME to prevent default behavior. + If you need the global .gitconfig located in your $HOME to be used, you should supply the -H option. + -N indicates that the destination is non-empty. The files that do not conflict and, if it is an update, are not committed, will be kept. + By default, all files which are not in the imported commit are deleted, included those in the .gitignore. + -a specifies that the aggressive option of git clean must be used. git clean is called when -N is not supplied. + +# Examples +A) I want to clone the last commit of a branch into a non-existing directory +./path/to/git_update.sh -r myBranch -d myDirectory https://git.mydomain/myname/myrepository.git +Result: a directory myDirectory has been created in the current working directory. It is filled with the files of the last commit of branch myBranch as well as the .git. The history only shows the last commit, however the tags are accessible. + +B) I want to clone a certain tag into a preexisting directory. +./path/to/git_update.sh -r myTag -d myDirectory -N https://git.mydomain/myname/myrepository.git +Result: all the files of myDirectory have been kept, except those whose names were identical with files of myTag. Files of myTag have been imported + # Development process git_update.sh has been written by the French association Jean-Cloud, in the process of developing Shlagernetes, a new orchestration tool. Shlagernetes allows storing services on fallible second-hand servers and tries to consume the less possible resources. diff --git a/src/git_update.sh b/src/git_update.sh index 7781165..2056059 100755 --- a/src/git_update.sh +++ b/src/git_update.sh @@ -1,61 +1,54 @@ #!/bin/bash +Help(){ + echo " +NAME + git_update.sh +SYNOPSIS + git_update.sh [-h] [-r ref] [-d dest] [-H] [-a] repository +OPTIONS + -h prints the help. + -r specifies the reference to the commit to be synchronized. It can be a tag or a branch. By default, it is the last commit of branch main. + -d specifies the destination of the clone or update. Directory must be empty if a new clone is to be made. + If the repository to be cloned is local, and its path is passed as a relative path, the path should start from the destination. + To avoid mistakes, absolute paths are advised. + -H allows the $HOME directory to be used by git_update.sh. By default, git_update.sh cannot access $HOME to prevent default behavior. + If you need the global .gitconfig located in your $HOME to be used, you should supply the -H option. + -a specifies that the aggressive option of the git garbage collection must be used. Only advised when changes happen in many different objects. Will slow down the execution. +DESCRIPTION + This script will replace the destination with the wanted commit of a git repository. The history is not preserved but tags are. Untracked files remain. + The git commands have been chosen so as to minimize the memory and bandwidth usages." +} -# Clone un dépôt git au bon endroit -# Stocker un minum de données (et donc nettoyer) -# Télécharger un minimum de données -# En cas de conflit donner raison au remote (on écrase les versions locales) - - -declare -A usage -declare -A varia - +#variables summary="$0 [options] " - -usage[b]="Branch of git repo" -varia[b]=branch -branch=master - -usage[t]="Tog of git repo" -varia[t]=tag -tag= - -usage[d]="Destination of clone" -varia[d]=dst +ref=main dst='.' - -usage[i]="privkey used to ssh pull" -varia[i]=privkey -privkey='' - -usage[N]="Clone to a Non-empty target. Existing files will be overwriten" -varia[N]=nonempty_target -nonempty_target=false - -usage[K]="Remote host key file (known_hosts) for ssh connections" -varia[K]=hostkeyfile -hostkeyfile='' - -usage[H]="Use real home dir" -varia[H]=use_home use_home=false +be_aggressive="false" - -. driglibash-args - - -# Some SSH options -ssh_opt='ssh' -if [ -n "$privkey" ] ; then - ssh_opt="$ssh_opt -i $privkey" -fi - -if [ -n "$hostkeyfile" ] ; then - ssh_opt="$ssh_opt -o 'UserKnownHostsFile $hostkeyfile'" -fi +while getopts ":hr:d:H" option; do + case $option in + h) # display Help + Help + exit;; + r) # desired branch or tag + ref="$OPTARG";; + d) # destination of clone + dst="$OPTARG";; + H) # use real home dir + use_home="true";; + a) #use -a in git gc call + be_aggressive="true";; + \?) # invalid option + echo "Error: Invalid option here" + exit;; + esac +done +shift $((OPTIND-1)) repo="$1" if [ -z "$repo" ] ; then - die "$0: Empty repo given\n$summary" + exit "$0: Empty repo given\n$summary" fi if [ ! $use_home ] ; then @@ -64,44 +57,24 @@ if [ ! $use_home ] ; then set +a fi -run mkdir -p "$dst" -run cd "$dst" - +mkdir -p "$dst" +cd "$dst" if [ -d .git ] ; then - - # Compute git branch and tag - tagref= - if [ -n "$tag" ] ; then - tagref="tags/$tag" - fi - - run git fetch origin "$branch" --tags -f - run git checkout --force $tagref "origin/$branch" - run git reset --hard # TODO we can keep some files? - # Preserve existing files in some cases - if ! "$nonempty_target" ; then - git clean -qffdx + echo "updating..." + git fetch --tags --depth=1 --prune --prune-tags --force origin $ref + git reset --hard --recurse-submodules FETCH_HEAD + git submodule update --init --recursive --force --depth=1 --remote + #garbage collection of anything unreachable at the moment + git reflog expire --expire=now --all + if "$be_aggressive" ; then + git gc --prune=now --aggressive + elsels + git gc --prune=now fi - run git submodule update --init --recursive --force --recommend-shallow - run git submodule foreach git fetch - run git submodule foreach git checkout --force HEAD - run git submodule foreach git reset --hard - run git submodule foreach git clean -fdx else +echo "cloning..." clone_dst='.' - - # To override an existing dir, we need to clone elsewhere first - if "$nonempty_target" ; then - clone_dst="$(mktemp -d)" - fi - - run git clone -b "$branch" --single-branch --recurse-submodules --shallow-submodules --depth 1 --config core.sshCommand="$ssh_opt" "$repo" "$clone_dst" - - # To override an existing dir, we then move everything to that dir - if "$nonempty_target" ; then - run mv "$clone_dst/"{*,.*} . - run rmdir "$clone_dst" - fi + git clone -b "$ref" --recurse-submodules --shallow-submodules --depth 1 "$repo" "$clone_dst" fi diff --git a/src/pre-push b/src/pre-push deleted file mode 100755 index de70fc2..0000000 --- a/src/pre-push +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# Get package version -# TODO Add some well-known version provider mechanisms -version="" -[ -f package.json ] && version=$(npm pkg get version | tr -d '\"') -[ -f version ] && version=$(cat version) -[ -f VERSION ] && version=$(cat VERSION) - -# Create semver tags -git tag -f v${version%.*.*} -git tag -f v${version%.*} -git tag -f v$version - -# Push the tags -git push --no-verify --tags -f diff --git a/creation_repo.sh b/test/creation_repo.sh similarity index 100% rename from creation_repo.sh rename to test/creation_repo.sh diff --git a/test_git_update.sh b/test/functional_tests/test_git_update.sh similarity index 100% rename from test_git_update.sh rename to test/functional_tests/test_git_update.sh diff --git a/readme.md b/test/performance_tests/readme similarity index 100% rename from readme.md rename to test/performance_tests/readme diff --git a/tests-manuels/pourTesterGitUpdate b/tests-manuels/pourTesterGitUpdate deleted file mode 160000 index 0767da6..0000000 --- a/tests-manuels/pourTesterGitUpdate +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0767da6236479ef7a3e43ec6d5d74f0663901205