循环通过CSV文件中的多个列,查找峰并写入Excel工作表

cqoc49vn  于 2023-04-09  发布在  其他
关注(0)|答案(1)|浏览(98)

很长时间的潜伏者和Python新手。提前感谢您的任何帮助。
我有包含2+列的CSV文件;column 1代表X时间序列数据,columns 2+包含我想找到几个峰的荧光值。我可以使用以下代码(从here修改)从单个列中找到峰。但是,我无法弄清楚如何循环列。CSV文件可能有2到几十列,所以我不想手动更改要读取和分析的列。
问题总和-无法循环XYYYY+ CSV文件,其中每个XY都写入CSV或Excel文件。
我已经尝试了很多次迭代,但最近一直在尝试在以下代码的几个地方使用“for column in data.T:”。

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 25 14:40:52 2023

@author: klinedd
"""

#import pyabf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
 
#import scipy
#from scipy import signal
from scipy.signal import find_peaks
from scipy.optimize import curve_fit
import csv

my_file = r'C:/David Documents/Python/_CaImaging_python/Python_test_GABARx.csv'

# Identify number of columns, or time series rois, in the CSV file
f = open(my_file)
reader = csv.reader(f,delimiter=',')
ncol = len(next(reader)) # Read first line and count columns
print("The number of columns in this file is:", ncol)

# Recordings in CSV files. Load the first trace
data = np.loadtxt(my_file, delimiter=",")
time = data [:,0] 
f_signal = data [:,1]
f_signal_all = data [:,1:]

#-------------
# Trying this. https://stackoverflow.com/questions/10148818/numpy-how-to-iterate-over-columns-of-array
#-------------


# Determine interval and frequency
interval = np.diff(time).min()  #in sec
fs = 1/interval #in hertz
print('Images aquired every', interval, 'seconds or', fs, 'Hz')

# If sampling frequency is manually added, replace number
# fs = x  # Sampling frequency, change x if needed

# Plot the full trace
fig1 = plt.figure(figsize=(12, 12))
ax1 = fig1.add_subplot(211)
ax1.set_title("F data")
ax1.plot(time, f_signal_all, linewidth=0.5)
ax1.set_ylabel("Fluoro")
ax1.set_xlabel("Time (s)")
fig1.tight_layout()

# Set variables for finding peial window
pretrigger_window = (2000 * fs)/1000  # Pre-event time window in ms
posttrigger_window = (15000 * fs)/1000  # Post-event time window in ms
event_no = 0  # Event viewer: 0 is the first event
window_length = posttrigger_window - pretrigger_window # Detemine the minimal length of the analysis window
print('Analysis window length is', window_length, 'sec')

# Set parameters of the Find peaks function
thresh_min = 10
thresh_max = 1200
thresh_prominence = 16
thresh_min_width = 0.9 * (fs/1000)

peaks, peaks_dict = find_peaks(f_signal, 
           height=(thresh_min, thresh_max),  # Min and max thresholds to detect peaks.
           threshold=None,  # Min and max vertical distance to neighboring samples.
           distance=None,  # Min horizontal distance between peaks.
           prominence=thresh_prominence,  # Vertical distance between the peak and lowest contour line.
           width=thresh_min_width,  # Min required width (in bins). E.g. For 10Khz, 10 bins = 1 ms.
           wlen=None,  # Window length to calculate prominence.
           rel_height=0.5,  # Relative height at which the peak width is measured.
           plateau_size=None)

for column in data.T:    
    
    
    # Create table with results
    table = pd.DataFrame(columns = ['event', 'peak_position', 
                                    'peak_position_s',
                                    'event_start', 'event_end',
                                    'Peak_Amp', 'Width_ms', 
                                    'inst_freq', 'isi_s', 
                                    'Area_pA/ms', 'log_decay', 
                                    'tau_exp'])
      
    table.event = np.arange(1, len(peaks) + 1)
    table.peak_position = peaks
    table.peak_position_s = peaks / fs  # Divided by fs to get s
    table.event_start = peaks_dict['left_ips'] - pretrigger_window
    table.event_end = peaks_dict['right_ips'] + posttrigger_window
    table.Peak_Amp = peaks_dict['peak_heights']  # height parameter is needed
    table.Width_ms = peaks_dict['widths']/(fs/1000) # Width (ms) at half-height 
      
    # Optional parameters (remember to add the column names to the table)
    # table.rise_half_amp_ms = (peaks - peaks_dict['left_ips'])/(sampling_rate/1000) 
    # table.decay_half_amp_ms = (peaks_dict['right_ips'] - peaks)/(sampling_rate/1000)
      
    # Calculations based on the parameters above
    table.inst_freq = np.append((1 / (np.array(table.peak_position[1:]) -
                                      np.array(peaks_dict['left_ips'][:-1])) * fs), 'nan')  
    table.isi_s = np.diff(peaks, axis=0, prepend=peaks[0]) / fs
      
    # Area
    for i, event in table.iterrows():
        individual_event = f_signal[int(event.event_start) : int(event.event_end)]
        table.loc[i, 'Area_pA/ms'] = np.round(individual_event.sum(), 1)/(fs/1000) 
      
    # Decay tau from logistic regression
    for i, event in table.iterrows():
        decay_tau = abs(f_signal[int(event.peak_position) : int(event.event_end)])
        log_decay_tau = np.log(decay_tau)
        decay_width = int(len(decay_tau))
        decay_width_array = list(range(0, decay_width))
        slope, _ = np.polyfit(decay_width_array, log_decay_tau, 1)
        tau = -1 / slope
        table.loc[i, 'log_decay'] = tau/(fs/1000) 
     
    # Decay tau from monoexponential fitting
    for i, event in table.iterrows():
        decay_tau = f_signal[int(event.peak_position) : int(event.event_end)]
        decay_width = int(len(decay_tau))
        decay_width_array = list(range(0, decay_width))
        a_initial = 200
        b_initial = 0.1
        # popt: optimal values for the parameters, pcov: estimated covariance of popt
        popt, pcov = curve_fit(lambda t, a, b: a * np.exp(b * t), 
                               decay_width_array, decay_tau, 
                               p0=(a_initial, b_initial), 
                               maxfev=1000)  # maxfev: number of iterations
        a = popt[0]  
        b = popt[1]      
        table.loc[i, 'tau_exp'] = abs((1/b)/(fs/1000))
    
    
    # Plot the event detection in the trace
    fig2 = plt.figure(figsize=(12,4))
    gridspec = fig2.add_gridspec(ncols=2, nrows=1, width_ratios=[2, 1])
    ax1 = fig2.add_subplot(gridspec[0])
    ax1.set_title("Events detection")   
    ax1.plot(f_signal)
    ax1.plot(peaks, f_signal[peaks], "r.")
    for i, txt in enumerate(table.event):
        ax1.annotate(table.event[i], (peaks[i], f_signal[peaks][i]))
    ax1.set_xlabel("Sec")
    ax1.set_ylabel("Fluoro")
    # ax1.axes.set_xlim(4000, 10000)  #changes the x-axis to view
    
    #Plot a single event
    ax2 = fig2.add_subplot(gridspec[1])  # gridspec specifies the ratio between plots
    ax2.set_title("Event viewer")
    ax2.plot(f_signal, "gray")
    ax2.plot(peaks, f_signal[peaks], "r.")
    ax2.set_xlabel("Sec bin")
    ax2.set_ylabel("Fluoro")
    # Event time window
    ax2.set_xlim(table.event_start[event_no], table.event_end[event_no]) 
    # Labelling the event
    line, = ax2.plot(peaks, f_signal[peaks], "r.") 
    line.set_label(table.event[event_no]) 
    ax2.legend()
    
    
    # Show graph and table
    plt.show()
    table
    
    table.to_csv('C:/David Documents/Python/_CaImaging_python/CaStuffdata2.csv')
ruarlubt

ruarlubt1#

这不是一个真正的答案,可能是一个更完整的解决方案的起点。
CSV文件:

cat column_test.csv                                                                                                                                                                                                     
x,y1,y2,y3
1,6,7,8
2,3,2,6
3,8,9,3

迭代数据并将第一列与后续各列配对的简单Python代码。

import csv

with open('column_test.csv', 'r', newline='') as csv_file:
    cReader = csv.reader(csv_file, delimiter=',')
    next(cReader)
    data_list = []
    for row in cReader:
        row_list = []
        for col in row[1:]: # Slice out the first column 
            row_list.append([row[0], col])
        data_list.append(row_list)

data_list                                                                                                                                                                                                               

[[['1', '6'], ['1', '7'], ['1', '8']],
 [['2', '3'], ['2', '2'], ['2', '6']],
 [['3', '8'], ['3', '9'], ['3', '3']]]

相关问题