Untitled
unknown
plain_text
a year ago
3.5 kB
4
Indexable
from fpdf import FPDF
# Create a PDF document
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
# Set title
pdf.set_font("Arial", 'B', 16)
pdf.cell(0, 10, 'Pandas Operations on Student_result.csv', ln=True, align='C')
pdf.ln(10)
# Set font for content
pdf.set_font("Arial", size=12)
# Add content for each operation
content = [
("1. Read the ‘Student_result.csv’ to create a DataFrame and display Adm_No, Gender, and Percentage",
"import pandas as pd\n"
"df = pd.read_csv('Student_result.csv')\n"
"result_columns = df[['Adm_No', 'Gender', 'Percentage']]\n"
"print(result_columns)"),
("2. Display the first 5 and last 5 records from ‘Student_result.csv’",
"print(\"First 5 records:\")\n"
"print(df.head())\n"
"print(\"Last 5 records:\")\n"
"print(df.tail())"),
("3. Display the Student_result file with new column names",
"df.columns = ['Admission_Number', 'Name', 'Gender', 'Percentage']\n"
"print(df)"),
("4. Modify the Percentage of students below 40 with NaN in DataFrame",
"df.loc[df['Percentage'] < 40, 'Percentage'] = pd.NA\n"
"print(df)"),
("5. Create a duplicate file for ‘Student_result.csv’ containing Adm_No, Name, and Percentage",
"duplicate_df = df[['Adm_No', 'Name', 'Percentage']]\n"
"duplicate_df.to_csv('duplicate_Student_result.csv', index=False)"),
("6. Write the statement in Pandas to find the highest percentage and print the student’s name and percentage",
"highest_percentage = df.loc[df['Percentage'].idxmax()]\n"
"print(f\"Highest Percentage: {highest_percentage['Percentage']}, Name: {highest_percentage['Name']}\")"),
("7. Importing and exporting data between Pandas and MySQL database",
"from sqlalchemy import create_engine\n"
"engine = create_engine('mysql+pymysql://username:password@localhost/db_name')\n"
"df.to_sql('student_results', con=engine, if_exists='replace', index=False)\n"
"imported_df = pd.read_sql('SELECT * FROM student_results', con=engine)"),
("8. Find the sum of each column or find the column with the lowest mean",
"column_sums = df.sum()\n"
"lowest_mean_column = df.mean().idxmin()"),
("9. Locate the 3 largest values in a DataFrame",
"largest_values = df.nlargest(3, 'Percentage')\n"
"print(largest_values)"),
("10. Subtract the mean of a row from each element of the row in a DataFrame",
"df_mean_subtracted = df.sub(df.mean(axis=1), axis=0)\n"
"print(df_mean_subtracted)"),
("11. Replace all negative values in a DataFrame with 0",
"df[df < 0] = 0\n"
"print(df)"),
("12. Replace all missing values in a DataFrame with 999",
"df.fillna(999, inplace=True)\n"
"print(df)"),
("13. Given a Series, print all the elements that are above the 75th percentile",
"percentage_series = df['Percentage']\n"
"percentile_75 = percentage_series.quantile(0.75)\n"
"above_75th = percentage_series[percentage_series > percentile_75]\n"
"print(above_75th)")
]
# Add content to PDF
for title, code in content:
pdf.set_font("Arial", 'B', 12)
pdf.cell(0, 10, title, ln=True)
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, code)
pdf.ln(5)
# Save the PDF to a file
pdf_file_path = '/mnt/data/Pandas_Student_Result_Operations.pdf'
pdf.output(pdf_file_path)
pdf_file_pathEditor is loading...
Leave a Comment