1
0
Fork 0
mirror of https://gitlab.rlp.net/pgp/pgp1-python-einfuehrung synced 2024-11-16 13:48:11 +00:00

Variante mit csv um encoding-Typ erweitert; Dies ist nicht noetig fuer die numpy-Variante

This commit is contained in:
Matthias Hoek 2019-10-14 16:41:48 +02:00
parent 4bb9da9c59
commit d5df6ecdd6

View file

@ -39,7 +39,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 15,
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2019-10-03T10:04:09.176914Z", "end_time": "2019-10-03T10:04:09.176914Z",
@ -53,14 +53,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 26,
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2019-10-03T10:05:45.212389Z", "end_time": "2019-10-03T10:05:45.212389Z",
"start_time": "2019-10-03T10:05:45.000440Z" "start_time": "2019-10-03T10:05:45.000440Z"
} }
}, },
"outputs": [], "outputs": [
{
"ename": "ValueError",
"evalue": "Some errors were detected !\n Line #2 (got 1 columns instead of 11)\n Line #4 (got 6 columns instead of 11)\n Line #5 (got 6 columns instead of 11)\n Line #6 (got 6 columns instead of 11)\n Line #7 (got 6 columns instead of 11)\n Line #8 (got 6 columns instead of 11)\n Line #9 (got 6 columns instead of 11)\n Line #10 (got 6 columns instead of 11)\n Line #11 (got 6 columns instead of 11)\n Line #12 (got 6 columns instead of 11)\n Line #13 (got 6 columns instead of 11)\n Line #14 (got 6 columns instead of 11)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-26-a36749af93d0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# Notebook befindet reicht es den Dateinamen anzuegebn. Ansonsten\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# müsst ihr den gesamten Pfad angeben.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgenfromtxt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpfad\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# <-- Einlesen der txt-Datei\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\numpy\\lib\\npyio.py\u001b[0m in \u001b[0;36mgenfromtxt\u001b[1;34m(fname, dtype, comments, delimiter, skip_header, skip_footer, converters, missing_values, filling_values, usecols, names, excludelist, deletechars, replace_space, autostrip, case_sensitive, defaultfmt, unpack, usemask, loose, invalid_raise, max_rows, encoding)\u001b[0m\n\u001b[0;32m 2073\u001b[0m \u001b[1;31m# Raise an exception ?\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2074\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0minvalid_raise\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2075\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0merrmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2076\u001b[0m \u001b[1;31m# Issue a warning ?\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2077\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: Some errors were detected !\n Line #2 (got 1 columns instead of 11)\n Line #4 (got 6 columns instead of 11)\n Line #5 (got 6 columns instead of 11)\n Line #6 (got 6 columns instead of 11)\n Line #7 (got 6 columns instead of 11)\n Line #8 (got 6 columns instead of 11)\n Line #9 (got 6 columns instead of 11)\n Line #10 (got 6 columns instead of 11)\n Line #11 (got 6 columns instead of 11)\n Line #12 (got 6 columns instead of 11)\n Line #13 (got 6 columns instead of 11)\n Line #14 (got 6 columns instead of 11)"
]
}
],
"source": [ "source": [
"pfad = 'BeispielDatenPGP2.txt' # <-- Pfad zur Datei, sofern sich die Datei am selben Ort wie euer\n", "pfad = 'BeispielDatenPGP2.txt' # <-- Pfad zur Datei, sofern sich die Datei am selben Ort wie euer\n",
" # Notebook befindet reicht es den Dateinamen anzuegebn. Ansonsten\n", " # Notebook befindet reicht es den Dateinamen anzuegebn. Ansonsten\n",
@ -77,14 +90,35 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 27,
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2019-10-03T10:28:02.193780Z", "end_time": "2019-10-03T10:28:02.193780Z",
"start_time": "2019-10-03T10:28:02.162533Z" "start_time": "2019-10-03T10:28:02.162533Z"
} }
}, },
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"array([[ 1., nan, nan, -53., nan, nan],\n",
" [ 2., nan, nan, -40., nan, nan],\n",
" [ 3., nan, nan, -29., nan, nan],\n",
" [ 4., nan, nan, -21., nan, nan],\n",
" [ 5., nan, nan, -11., nan, nan],\n",
" [ 6., nan, nan, -1., nan, nan],\n",
" [ 7., nan, nan, 10., nan, nan],\n",
" [ 8., nan, nan, 22., nan, nan],\n",
" [ 9., nan, nan, 32., nan, nan],\n",
" [ 10., nan, nan, 42., nan, nan],\n",
" [ 11., nan, nan, 50., nan, nan]])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"data = np.genfromtxt(pfad, \n", "data = np.genfromtxt(pfad, \n",
" skip_header=3 # <-- Überspringt die ersten 3 Zeilen unserer Files\n", " skip_header=3 # <-- Überspringt die ersten 3 Zeilen unserer Files\n",
@ -129,14 +163,35 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 25,
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2019-10-03T10:39:26.259305Z", "end_time": "2019-10-03T10:39:26.259305Z",
"start_time": "2019-10-03T10:39:26.223312Z" "start_time": "2019-10-03T10:39:26.223312Z"
} }
}, },
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"array([[ 1. , -9.9, nan, -53. , nan, nan],\n",
" [ 2. , -6.6, nan, -40. , nan, nan],\n",
" [ 3. , -4.7, nan, -29. , nan, nan],\n",
" [ 4. , -3.4, nan, -21. , nan, nan],\n",
" [ 5. , -1.8, nan, -11. , nan, nan],\n",
" [ 6. , -0.2, nan, -1. , nan, nan],\n",
" [ 7. , 0.9, nan, 10. , nan, nan],\n",
" [ 8. , 2.2, nan, 22. , nan, nan],\n",
" [ 9. , 3.3, nan, 32. , nan, nan],\n",
" [ 10. , 4.3, nan, 42. , nan, nan],\n",
" [ 11. , 6.3, nan, 50. , nan, nan]])"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"np.genfromtxt(pfad, \n", "np.genfromtxt(pfad, \n",
" skip_header=3, \n", " skip_header=3, \n",
@ -153,14 +208,46 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 21,
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2019-10-03T10:59:09.375845Z", "end_time": "2019-10-03T10:59:09.375845Z",
"start_time": "2019-10-03T10:59:09.314951Z" "start_time": "2019-10-03T10:59:09.314951Z"
} }
}, },
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"array([[ 1.000e+00, -9.900e+00, -2.730e+00, -5.300e+01, 2.400e+01,\n",
" -6.864e-03],\n",
" [ 2.000e+00, -6.600e+00, -2.030e+00, -4.000e+01, 2.400e+01,\n",
" -6.927e-03],\n",
" [ 3.000e+00, -4.700e+00, -1.510e+00, -2.900e+01, 2.410e+01,\n",
" -6.867e-03],\n",
" [ 4.000e+00, -3.400e+00, -1.090e+00, -2.100e+01, 2.410e+01,\n",
" -6.842e-03],\n",
" [ 5.000e+00, -1.800e+00, -5.700e-01, -1.100e+01, 2.380e+01,\n",
" -6.892e-03],\n",
" [ 6.000e+00, -2.000e-01, -4.000e-02, -1.000e+00, 2.410e+01,\n",
" -6.860e-03],\n",
" [ 7.000e+00, 9.000e-01, 5.200e-01, 1.000e+01, 2.400e+01,\n",
" -6.849e-03],\n",
" [ 8.000e+00, 2.200e+00, 1.090e+00, 2.200e+01, 2.400e+01,\n",
" -6.892e-03],\n",
" [ 9.000e+00, 3.300e+00, 1.620e+00, 3.200e+01, 2.410e+01,\n",
" -6.869e-03],\n",
" [ 1.000e+01, 4.300e+00, 2.150e+00, 4.200e+01, 2.420e+01,\n",
" -6.851e-03],\n",
" [ 1.100e+01, 6.300e+00, 2.550e+00, 5.000e+01, 2.430e+01,\n",
" -6.860e-03]])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"data = np.genfromtxt(pfad,\n", "data = np.genfromtxt(pfad,\n",
" skip_header=3, \n", " skip_header=3, \n",
@ -181,7 +268,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 22,
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2019-10-03T11:08:26.231136Z", "end_time": "2019-10-03T11:08:26.231136Z",
@ -248,15 +335,14 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Leider gibt uns auch dieser Ansatz erstmal eine Fehlermeldung, da wir wieder unseren Header überspringen müssen. Hierzu können wir das Konzept eines `if`-Statements verwenden. Eine `if`-Statement evaluiert ob eine Aussage *Wahr* oder *Falsch* und führt passierende auf dem Ergebnis eine Aktion durch. \n", "Leider gibt uns auch dieser Ansatz erstmal eine Fehlermeldung, da wir wieder unseren Header überspringen müssen. Dazu wird eine weitere for-Schleife eingesetzt, die nur die Header-Zeilen liest ohne sie weiterzuverarbeiten.\n",
"\n",
"\n", "\n",
"Um hieraus nutzen ziehen zu können müssen wir den obigen Code wie folgt modifizieren:" "Um hieraus nutzen ziehen zu können müssen wir den obigen Code wie folgt modifizieren:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 14,
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2019-10-06T09:14:47.437191Z", "end_time": "2019-10-06T09:14:47.437191Z",
@ -271,7 +357,7 @@
"\n", "\n",
"pfad = 'BeispielDatenPGP2.txt'\n", "pfad = 'BeispielDatenPGP2.txt'\n",
"\n", "\n",
"with open(pfad) as csvfile:\n", "with open(pfad, encoding=\"utf8\") as csvfile:\n",
" readCSV = csv.reader(csvfile, delimiter='\\t')\n", " readCSV = csv.reader(csvfile, delimiter='\\t')\n",
" \n", " \n",
" n = 3 # Anzahl der Headerzeilen, die übersprungen werden müssen\n", " n = 3 # Anzahl der Headerzeilen, die übersprungen werden müssen\n",